eric7/WebBrowser/AdBlock/AdBlockSubscription.py

branch
eric7
changeset 8312
800c432b34c8
parent 8260
2161475d9639
child 8318
962bce857696
equal deleted inserted replaced
8311:4e8b98454baa 8312:800c432b34c8
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock subscription class.
8 """
9
10 import os
11 import re
12 import hashlib
13 import base64
14
15 from PyQt5.QtCore import (
16 pyqtSignal, Qt, QObject, QByteArray, QDateTime, QUrl, QUrlQuery,
17 QCryptographicHash, QFile, QIODevice, QTextStream, QDate, QTime
18 )
19 from PyQt5.QtNetwork import QNetworkReply, QNetworkRequest
20
21 from E5Gui import E5MessageBox
22
23 import Utilities
24 import Preferences
25
26
27 class AdBlockSubscription(QObject):
28 """
29 Class implementing the AdBlock subscription.
30
31 @signal changed() emitted after the subscription has changed
32 @signal rulesChanged() emitted after the subscription's rules have changed
33 @signal enabledChanged(bool) emitted after the enabled state was changed
34 @signal rulesEnabledChanged() emitted after a rule enabled state was
35 changed
36 """
37 changed = pyqtSignal()
38 rulesChanged = pyqtSignal()
39 enabledChanged = pyqtSignal(bool)
40 rulesEnabledChanged = pyqtSignal()
41
42 def __init__(self, url, custom, parent=None, default=False):
43 """
44 Constructor
45
46 @param url AdBlock URL for the subscription (QUrl)
47 @param custom flag indicating a custom subscription (boolean)
48 @param parent reference to the parent object (QObject)
49 @param default flag indicating a default subscription (boolean)
50 """
51 super().__init__(parent)
52
53 self.__custom = custom
54 self.__url = url.toEncoded()
55 self.__enabled = False
56 self.__downloading = None
57 self.__defaultSubscription = default
58
59 self.__title = ""
60 self.__location = QByteArray()
61 self.__lastUpdate = QDateTime()
62 self.__requiresLocation = ""
63 self.__requiresTitle = ""
64
65 self.__updatePeriod = 0 # update period in hours, 0 = use default
66 self.__remoteModified = QDateTime()
67
68 self.__rules = [] # list containing all AdBlock rules
69
70 self.__checksumRe = re.compile(
71 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""",
72 re.IGNORECASE | re.MULTILINE)
73 self.__expiresRe = re.compile(
74 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""",
75 re.IGNORECASE)
76 self.__remoteModifiedRe = re.compile(
77 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*"""
78 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*"""
79 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""",
80 re.IGNORECASE)
81
82 self.__monthNameToNumber = {
83 "Jan": 1,
84 "Feb": 2,
85 "Mar": 3,
86 "Apr": 4,
87 "May": 5,
88 "Jun": 6,
89 "Jul": 7,
90 "Aug": 8,
91 "Sep": 9,
92 "Oct": 10,
93 "Nov": 11,
94 "Dec": 12
95 }
96
97 self.__parseUrl(url)
98
99 def __parseUrl(self, url):
100 """
101 Private method to parse the AdBlock URL for the subscription.
102
103 @param url AdBlock URL for the subscription
104 @type QUrl
105 """
106 if url.scheme() != "abp":
107 return
108
109 if url.path() != "subscribe":
110 return
111
112 urlQuery = QUrlQuery(url)
113 self.__title = QUrl.fromPercentEncoding(
114 QByteArray(urlQuery.queryItemValue("title").encode()))
115 self.__enabled = urlQuery.queryItemValue("enabled") != "false"
116 self.__location = QByteArray(QUrl.fromPercentEncoding(
117 QByteArray(urlQuery.queryItemValue("location").encode()))
118 .encode("utf-8"))
119
120 # Check for required subscription
121 self.__requiresLocation = QUrl.fromPercentEncoding(
122 QByteArray(urlQuery.queryItemValue(
123 "requiresLocation").encode()))
124 self.__requiresTitle = QUrl.fromPercentEncoding(
125 QByteArray(urlQuery.queryItemValue("requiresTitle").encode()))
126 if self.__requiresLocation and self.__requiresTitle:
127 from WebBrowser.WebBrowserWindow import WebBrowserWindow
128 WebBrowserWindow.adBlockManager().loadRequiredSubscription(
129 self.__requiresLocation, self.__requiresTitle)
130
131 lastUpdateString = urlQuery.queryItemValue("lastUpdate")
132 self.__lastUpdate = QDateTime.fromString(lastUpdateString,
133 Qt.DateFormat.ISODate)
134
135 self.__loadRules()
136
137 def url(self):
138 """
139 Public method to generate the URL for this subscription.
140
141 @return AdBlock URL for the subscription
142 @rtype QUrl
143 """
144 url = QUrl()
145 url.setScheme("abp")
146 url.setPath("subscribe")
147
148 queryItems = []
149 queryItems.append(("location", bytes(self.__location).decode()))
150 queryItems.append(("title", self.__title))
151 if self.__requiresLocation and self.__requiresTitle:
152 queryItems.append(("requiresLocation", self.__requiresLocation))
153 queryItems.append(("requiresTitle", self.__requiresTitle))
154 if not self.__enabled:
155 queryItems.append(("enabled", "false"))
156 if self.__lastUpdate.isValid():
157 queryItems.append(
158 ("lastUpdate",
159 self.__lastUpdate.toString(Qt.DateFormat.ISODate))
160 )
161
162 query = QUrlQuery()
163 query.setQueryItems(queryItems)
164 url.setQuery(query)
165 return url
166
167 def isEnabled(self):
168 """
169 Public method to check, if the subscription is enabled.
170
171 @return flag indicating the enabled status
172 @rtype bool
173 """
174 return self.__enabled
175
176 def setEnabled(self, enabled):
177 """
178 Public method to set the enabled status.
179
180 @param enabled flag indicating the enabled status
181 @type bool
182 """
183 if self.__enabled == enabled:
184 return
185
186 self.__enabled = enabled
187 self.enabledChanged.emit(enabled)
188
189 def title(self):
190 """
191 Public method to get the subscription title.
192
193 @return subscription title
194 @rtype string
195 """
196 return self.__title
197
198 def setTitle(self, title):
199 """
200 Public method to set the subscription title.
201
202 @param title subscription title
203 @type str
204 """
205 if self.__title == title:
206 return
207
208 self.__title = title
209 self.changed.emit()
210
211 def location(self):
212 """
213 Public method to get the subscription location.
214
215 @return URL of the subscription location
216 @rtype QUrl
217 """
218 return QUrl.fromEncoded(self.__location)
219
220 def setLocation(self, url):
221 """
222 Public method to set the subscription location.
223
224 @param url URL of the subscription location
225 @type QUrl
226 """
227 if url == self.location():
228 return
229
230 self.__location = url.toEncoded()
231 self.__lastUpdate = QDateTime()
232 self.changed.emit()
233
234 def requiresLocation(self):
235 """
236 Public method to get the location of a required subscription.
237
238 @return location of a required subscription
239 @rtype str
240 """
241 return self.__requiresLocation
242
243 def lastUpdate(self):
244 """
245 Public method to get the date and time of the last update.
246
247 @return date and time of the last update
248 @rtype QDateTime
249 """
250 return self.__lastUpdate
251
252 def rulesFileName(self):
253 """
254 Public method to get the name of the rules file.
255
256 @return name of the rules file
257 @rtype str
258 """
259 if self.location().scheme() == "file":
260 return self.location().toLocalFile()
261
262 if self.__location.isEmpty():
263 return ""
264
265 sha1 = bytes(QCryptographicHash.hash(
266 self.__location, QCryptographicHash.Algorithm.Sha1).toHex()
267 ).decode()
268 dataDir = os.path.join(
269 Utilities.getConfigDir(), "web_browser", "subscriptions")
270 if not os.path.exists(dataDir):
271 os.makedirs(dataDir)
272 fileName = os.path.join(
273 dataDir, "adblock_subscription_{0}".format(sha1))
274 return fileName
275
276 def __loadRules(self):
277 """
278 Private method to load the rules of the subscription.
279 """
280 fileName = self.rulesFileName()
281 f = QFile(fileName)
282 if f.exists():
283 if not f.open(QIODevice.OpenModeFlag.ReadOnly):
284 E5MessageBox.warning(
285 None,
286 self.tr("Load subscription rules"),
287 self.tr(
288 """Unable to open AdBlock file '{0}' for reading.""")
289 .format(fileName))
290 else:
291 textStream = QTextStream(f)
292 header = textStream.readLine(1024)
293 if not header.startswith("[Adblock"):
294 E5MessageBox.warning(
295 None,
296 self.tr("Load subscription rules"),
297 self.tr("""AdBlock file '{0}' does not start"""
298 """ with [Adblock.""")
299 .format(fileName))
300 f.close()
301 f.remove()
302 self.__lastUpdate = QDateTime()
303 else:
304 from .AdBlockRule import AdBlockRule
305
306 self.__updatePeriod = 0
307 self.__remoteModified = QDateTime()
308 self.__rules = []
309 self.__rules.append(AdBlockRule(header, self))
310 while not textStream.atEnd():
311 line = textStream.readLine()
312 self.__rules.append(AdBlockRule(line, self))
313 expires = self.__expiresRe.search(line)
314 if expires:
315 period, kind = expires.groups()
316 if kind:
317 # hours
318 self.__updatePeriod = int(period)
319 else:
320 # days
321 self.__updatePeriod = int(period) * 24
322 remoteModified = self.__remoteModifiedRe.search(line)
323 if remoteModified:
324 day, month, year, time, hour, minute = (
325 remoteModified.groups()
326 )
327 self.__remoteModified.setDate(
328 QDate(int(year),
329 self.__monthNameToNumber[month],
330 int(day))
331 )
332 if time:
333 self.__remoteModified.setTime(
334 QTime(int(hour), int(minute)))
335 else:
336 # no time given, set it to 23:59
337 self.__remoteModified.setTime(QTime(23, 59))
338 self.changed.emit()
339 elif not fileName.endswith("_custom"):
340 self.__lastUpdate = QDateTime()
341
342 self.checkForUpdate()
343
344 def checkForUpdate(self):
345 """
346 Public method to check for an update.
347 """
348 updatePeriod = (
349 self.__updatePeriod
350 if self.__updatePeriod else
351 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24
352 )
353 if (
354 not self.__lastUpdate.isValid() or
355 (self.__remoteModified.isValid() and
356 self.__remoteModified.addSecs(updatePeriod * 3600) <
357 QDateTime.currentDateTime()) or
358 self.__lastUpdate.addSecs(updatePeriod * 3600) <
359 QDateTime.currentDateTime()
360 ):
361 self.updateNow()
362
363 def updateNow(self):
364 """
365 Public method to update the subscription immediately.
366 """
367 if self.__downloading is not None:
368 return
369
370 if not self.location().isValid():
371 return
372
373 if self.location().scheme() == "file":
374 self.__lastUpdate = QDateTime.currentDateTime()
375 self.__loadRules()
376 return
377
378 from WebBrowser.WebBrowserWindow import WebBrowserWindow
379 reply = WebBrowserWindow.networkManager().get(
380 QNetworkRequest(self.location()))
381 reply.finished.connect(
382 lambda: self.__rulesDownloaded(reply))
383 self.__downloading = reply
384
385 def __rulesDownloaded(self, reply):
386 """
387 Private slot to deal with the downloaded rules.
388
389 @param reply reference to the network reply
390 @type QNetworkReply
391 """
392 response = reply.readAll()
393 reply.close()
394 self.__downloading = None
395
396 if reply.error() != QNetworkReply.NetworkError.NoError:
397 if not self.__defaultSubscription:
398 # don't show error if we try to load the default
399 E5MessageBox.warning(
400 None,
401 self.tr("Downloading subscription rules"),
402 self.tr(
403 """<p>Subscription rules could not be"""
404 """ downloaded.</p><p>Error: {0}</p>""")
405 .format(reply.errorString()))
406 else:
407 # reset after first download attempt
408 self.__defaultSubscription = False
409 return
410
411 if response.isEmpty():
412 E5MessageBox.warning(
413 None,
414 self.tr("Downloading subscription rules"),
415 self.tr("""Got empty subscription rules."""))
416 return
417
418 fileName = self.rulesFileName()
419 QFile.remove(fileName)
420 f = QFile(fileName)
421 if not f.open(QIODevice.OpenModeFlag.ReadWrite):
422 E5MessageBox.warning(
423 None,
424 self.tr("Downloading subscription rules"),
425 self.tr(
426 """Unable to open AdBlock file '{0}' for writing.""")
427 .file(fileName))
428 return
429
430 from WebBrowser.WebBrowserWindow import WebBrowserWindow
431 if (
432 WebBrowserWindow.adBlockManager().useLimitedEasyList() and
433 self.url().toString().startswith(
434 WebBrowserWindow.adBlockManager().getDefaultSubscriptionUrl())
435 ):
436 limited = True
437 # ignore Third-party advertisers rules for performance
438 # whitelist rules at the end will be used
439 index = response.indexOf(
440 "!---------------------------"
441 "Third-party advertisers"
442 "---------------------------!")
443 part1 = response.left(index)
444 index = response.indexOf(
445 "!-----------------------"
446 "Whitelists to fix broken sites"
447 "------------------------!")
448 part2 = response.mid(index)
449 f.write(part1)
450 f.write(part2)
451 else:
452 limited = False
453 f.write(response)
454 f.close()
455 self.__lastUpdate = QDateTime.currentDateTime()
456 if limited or self.__validateCheckSum(fileName):
457 self.__loadRules()
458 else:
459 QFile.remove(fileName)
460 self.__downloading = None
461 reply.deleteLater()
462
463 def __validateCheckSum(self, fileName):
464 """
465 Private method to check the subscription file's checksum.
466
467 @param fileName name of the file containing the subscription
468 @type str
469 @return flag indicating a valid file. A file is considered
470 valid, if the checksum is OK, the file does not contain a
471 checksum (i.e. cannot be checked) or we are using the limited
472 EasyList (because we fiddled with the original).
473 @rtype bool
474 """
475 try:
476 with open(fileName, "r", encoding="utf-8") as f:
477 data = f.read()
478 except (OSError, OSError):
479 return False
480
481 match = re.search(self.__checksumRe, data)
482 if match:
483 expectedChecksum = match.group(1)
484 else:
485 # consider it as valid
486 return True
487
488 # normalize the data
489 data = re.sub(r"\r", "", data) # normalize eol
490 data = re.sub(r"\n+", "\n", data) # remove empty lines
491 data = re.sub(self.__checksumRe, "", data) # remove checksum line
492
493 # calculate checksum
494 md5 = hashlib.md5() # secok
495 md5.update(data.encode("utf-8"))
496 calculatedChecksum = (
497 base64.b64encode(md5.digest()).decode().rstrip("=")
498 )
499 if calculatedChecksum == expectedChecksum:
500 return True
501 else:
502 res = E5MessageBox.yesNo(
503 None,
504 self.tr("Downloading subscription rules"),
505 self.tr(
506 """<p>AdBlock subscription <b>{0}</b> has a wrong"""
507 """ checksum.<br/>"""
508 """Found: {1}<br/>"""
509 """Calculated: {2}<br/>"""
510 """Use it anyway?</p>""")
511 .format(self.__title, expectedChecksum,
512 calculatedChecksum))
513 return res
514
515 def saveRules(self):
516 """
517 Public method to save the subscription rules.
518 """
519 fileName = self.rulesFileName()
520 if not fileName:
521 return
522
523 f = QFile(fileName)
524 if not f.open(QIODevice.OpenModeFlag.ReadWrite |
525 QIODevice.OpenModeFlag.Truncate):
526 E5MessageBox.warning(
527 None,
528 self.tr("Saving subscription rules"),
529 self.tr(
530 """Unable to open AdBlock file '{0}' for writing.""")
531 .format(fileName))
532 return
533
534 textStream = QTextStream(f)
535 if not self.__rules or not self.__rules[0].isHeader():
536 textStream << "[Adblock Plus 1.1.1]\n"
537 for rule in self.__rules:
538 textStream << rule.filter() << "\n"
539
540 def rule(self, offset):
541 """
542 Public method to get a specific rule.
543
544 @param offset offset of the rule
545 @type int
546 @return requested rule
547 @rtype AdBlockRule
548 """
549 if offset >= len(self.__rules):
550 return None
551
552 return self.__rules[offset]
553
554 def allRules(self):
555 """
556 Public method to get the list of rules.
557
558 @return list of rules
559 @rtype list of AdBlockRule
560 """
561 return self.__rules[:]
562
563 def addRule(self, rule):
564 """
565 Public method to add a rule.
566
567 @param rule reference to the rule to add
568 @type AdBlockRule
569 @return offset of the rule
570 @rtype int
571 """
572 self.__rules.append(rule)
573 self.rulesChanged.emit()
574
575 return len(self.__rules) - 1
576
577 def removeRule(self, offset):
578 """
579 Public method to remove a rule given the offset.
580
581 @param offset offset of the rule to remove
582 @type int
583 """
584 if offset < 0 or offset > len(self.__rules):
585 return
586
587 del self.__rules[offset]
588 self.rulesChanged.emit()
589
590 def replaceRule(self, rule, offset):
591 """
592 Public method to replace a rule given the offset.
593
594 @param rule reference to the rule to set
595 @type AdBlockRule
596 @param offset offset of the rule to remove
597 @type int
598 @return requested rule
599 @rtype AdBlockRule
600 """
601 if offset >= len(self.__rules):
602 return None
603
604 self.__rules[offset] = rule
605 self.rulesChanged.emit()
606
607 return self.__rules[offset]
608
609 def canEditRules(self):
610 """
611 Public method to check, if rules can be edited.
612
613 @return flag indicating rules may be edited
614 @rtype bool
615 """
616 return self.__custom
617
618 def canBeRemoved(self):
619 """
620 Public method to check, if the subscription can be removed.
621
622 @return flag indicating removal is allowed
623 @rtype bool
624 """
625 return not self.__custom and not self.__defaultSubscription
626
627 def setRuleEnabled(self, offset, enabled):
628 """
629 Public method to enable a specific rule.
630
631 @param offset offset of the rule
632 @type int
633 @param enabled new enabled state
634 @type bool
635 @return reference to the changed rule
636 @rtype AdBlockRule
637 """
638 if offset >= len(self.__rules):
639 return None
640
641 rule = self.__rules[offset]
642 rule.setEnabled(enabled)
643 self.rulesEnabledChanged.emit()
644
645 if rule.isCSSRule():
646 from WebBrowser.WebBrowserWindow import WebBrowserWindow
647 WebBrowserWindow.mainWindow().reloadUserStyleSheet()
648
649 return rule

eric ide

mercurial