|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2009 - 2022 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock subscription class. |
|
8 """ |
|
9 |
|
10 import os |
|
11 import re |
|
12 import hashlib |
|
13 import base64 |
|
14 |
|
15 from PyQt6.QtCore import ( |
|
16 pyqtSignal, Qt, QObject, QByteArray, QDateTime, QUrl, QUrlQuery, |
|
17 QCryptographicHash, QDate, QTime |
|
18 ) |
|
19 from PyQt6.QtNetwork import QNetworkReply, QNetworkRequest |
|
20 |
|
21 from EricWidgets import EricMessageBox |
|
22 |
|
23 import Utilities |
|
24 import Preferences |
|
25 |
|
26 |
|
27 class AdBlockSubscription(QObject): |
|
28 """ |
|
29 Class implementing the AdBlock subscription. |
|
30 |
|
31 @signal changed() emitted after the subscription has changed |
|
32 @signal rulesChanged() emitted after the subscription's rules have changed |
|
33 @signal enabledChanged(bool) emitted after the enabled state was changed |
|
34 @signal rulesEnabledChanged() emitted after a rule enabled state was |
|
35 changed |
|
36 """ |
|
37 changed = pyqtSignal() |
|
38 rulesChanged = pyqtSignal() |
|
39 enabledChanged = pyqtSignal(bool) |
|
40 rulesEnabledChanged = pyqtSignal() |
|
41 |
|
42 def __init__(self, url, custom, parent=None, default=False): |
|
43 """ |
|
44 Constructor |
|
45 |
|
46 @param url AdBlock URL for the subscription (QUrl) |
|
47 @param custom flag indicating a custom subscription (boolean) |
|
48 @param parent reference to the parent object (QObject) |
|
49 @param default flag indicating a default subscription (boolean) |
|
50 """ |
|
51 super().__init__(parent) |
|
52 |
|
53 self.__custom = custom |
|
54 self.__url = url.toEncoded() |
|
55 self.__enabled = False |
|
56 self.__downloading = None |
|
57 self.__defaultSubscription = default |
|
58 |
|
59 self.__title = "" |
|
60 self.__location = QByteArray() |
|
61 self.__lastUpdate = QDateTime() |
|
62 self.__requiresLocation = "" |
|
63 self.__requiresTitle = "" |
|
64 |
|
65 self.__updatePeriod = 0 # update period in hours, 0 = use default |
|
66 self.__remoteModified = QDateTime() |
|
67 |
|
68 self.__rules = [] # list containing all AdBlock rules |
|
69 |
|
70 self.__checksumRe = re.compile( |
|
71 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""", |
|
72 re.IGNORECASE | re.MULTILINE) |
|
73 self.__expiresRe = re.compile( |
|
74 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""", |
|
75 re.IGNORECASE) |
|
76 self.__remoteModifiedRe = re.compile( |
|
77 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*""" |
|
78 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*""" |
|
79 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""", |
|
80 re.IGNORECASE) |
|
81 |
|
82 self.__monthNameToNumber = { |
|
83 "Jan": 1, |
|
84 "Feb": 2, |
|
85 "Mar": 3, |
|
86 "Apr": 4, |
|
87 "May": 5, |
|
88 "Jun": 6, |
|
89 "Jul": 7, |
|
90 "Aug": 8, |
|
91 "Sep": 9, |
|
92 "Oct": 10, |
|
93 "Nov": 11, |
|
94 "Dec": 12 |
|
95 } |
|
96 |
|
97 self.__parseUrl(url) |
|
98 |
|
99 def __parseUrl(self, url): |
|
100 """ |
|
101 Private method to parse the AdBlock URL for the subscription. |
|
102 |
|
103 @param url AdBlock URL for the subscription |
|
104 @type QUrl |
|
105 """ |
|
106 if url.scheme() != "abp": |
|
107 return |
|
108 |
|
109 if url.path() != "subscribe": |
|
110 return |
|
111 |
|
112 urlQuery = QUrlQuery(url) |
|
113 self.__title = QUrl.fromPercentEncoding( |
|
114 QByteArray(urlQuery.queryItemValue("title").encode())) |
|
115 self.__enabled = urlQuery.queryItemValue("enabled") != "false" |
|
116 self.__location = QByteArray(QUrl.fromPercentEncoding( |
|
117 QByteArray(urlQuery.queryItemValue("location").encode())) |
|
118 .encode("utf-8")) |
|
119 |
|
120 # Check for required subscription |
|
121 self.__requiresLocation = QUrl.fromPercentEncoding( |
|
122 QByteArray(urlQuery.queryItemValue( |
|
123 "requiresLocation").encode())) |
|
124 self.__requiresTitle = QUrl.fromPercentEncoding( |
|
125 QByteArray(urlQuery.queryItemValue("requiresTitle").encode())) |
|
126 if self.__requiresLocation and self.__requiresTitle: |
|
127 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
128 WebBrowserWindow.adBlockManager().loadRequiredSubscription( |
|
129 self.__requiresLocation, self.__requiresTitle) |
|
130 |
|
131 lastUpdateString = urlQuery.queryItemValue("lastUpdate") |
|
132 self.__lastUpdate = QDateTime.fromString(lastUpdateString, |
|
133 Qt.DateFormat.ISODate) |
|
134 |
|
135 self.__loadRules() |
|
136 |
|
137 def url(self): |
|
138 """ |
|
139 Public method to generate the URL for this subscription. |
|
140 |
|
141 @return AdBlock URL for the subscription |
|
142 @rtype QUrl |
|
143 """ |
|
144 url = QUrl() |
|
145 url.setScheme("abp") |
|
146 url.setPath("subscribe") |
|
147 |
|
148 queryItems = [] |
|
149 queryItems.append(("location", bytes(self.__location).decode())) |
|
150 queryItems.append(("title", self.__title)) |
|
151 if self.__requiresLocation and self.__requiresTitle: |
|
152 queryItems.append(("requiresLocation", self.__requiresLocation)) |
|
153 queryItems.append(("requiresTitle", self.__requiresTitle)) |
|
154 if not self.__enabled: |
|
155 queryItems.append(("enabled", "false")) |
|
156 if self.__lastUpdate.isValid(): |
|
157 queryItems.append( |
|
158 ("lastUpdate", |
|
159 self.__lastUpdate.toString(Qt.DateFormat.ISODate)) |
|
160 ) |
|
161 |
|
162 query = QUrlQuery() |
|
163 query.setQueryItems(queryItems) |
|
164 url.setQuery(query) |
|
165 return url |
|
166 |
|
167 def isEnabled(self): |
|
168 """ |
|
169 Public method to check, if the subscription is enabled. |
|
170 |
|
171 @return flag indicating the enabled status |
|
172 @rtype bool |
|
173 """ |
|
174 return self.__enabled |
|
175 |
|
176 def setEnabled(self, enabled): |
|
177 """ |
|
178 Public method to set the enabled status. |
|
179 |
|
180 @param enabled flag indicating the enabled status |
|
181 @type bool |
|
182 """ |
|
183 if self.__enabled == enabled: |
|
184 return |
|
185 |
|
186 self.__enabled = enabled |
|
187 self.enabledChanged.emit(enabled) |
|
188 |
|
189 def title(self): |
|
190 """ |
|
191 Public method to get the subscription title. |
|
192 |
|
193 @return subscription title |
|
194 @rtype string |
|
195 """ |
|
196 return self.__title |
|
197 |
|
198 def setTitle(self, title): |
|
199 """ |
|
200 Public method to set the subscription title. |
|
201 |
|
202 @param title subscription title |
|
203 @type str |
|
204 """ |
|
205 if self.__title == title: |
|
206 return |
|
207 |
|
208 self.__title = title |
|
209 self.changed.emit() |
|
210 |
|
211 def location(self): |
|
212 """ |
|
213 Public method to get the subscription location. |
|
214 |
|
215 @return URL of the subscription location |
|
216 @rtype QUrl |
|
217 """ |
|
218 return QUrl.fromEncoded(self.__location) |
|
219 |
|
220 def setLocation(self, url): |
|
221 """ |
|
222 Public method to set the subscription location. |
|
223 |
|
224 @param url URL of the subscription location |
|
225 @type QUrl |
|
226 """ |
|
227 if url == self.location(): |
|
228 return |
|
229 |
|
230 self.__location = url.toEncoded() |
|
231 self.__lastUpdate = QDateTime() |
|
232 self.changed.emit() |
|
233 |
|
234 def requiresLocation(self): |
|
235 """ |
|
236 Public method to get the location of a required subscription. |
|
237 |
|
238 @return location of a required subscription |
|
239 @rtype str |
|
240 """ |
|
241 return self.__requiresLocation |
|
242 |
|
243 def lastUpdate(self): |
|
244 """ |
|
245 Public method to get the date and time of the last update. |
|
246 |
|
247 @return date and time of the last update |
|
248 @rtype QDateTime |
|
249 """ |
|
250 return self.__lastUpdate |
|
251 |
|
252 def rulesFileName(self): |
|
253 """ |
|
254 Public method to get the name of the rules file. |
|
255 |
|
256 @return name of the rules file |
|
257 @rtype str |
|
258 """ |
|
259 if self.location().scheme() == "file": |
|
260 return self.location().toLocalFile() |
|
261 |
|
262 if self.__location.isEmpty(): |
|
263 return "" |
|
264 |
|
265 sha1 = bytes(QCryptographicHash.hash( |
|
266 self.__location, QCryptographicHash.Algorithm.Sha1).toHex() |
|
267 ).decode() |
|
268 dataDir = os.path.join( |
|
269 Utilities.getConfigDir(), "web_browser", "subscriptions") |
|
270 if not os.path.exists(dataDir): |
|
271 os.makedirs(dataDir) |
|
272 fileName = os.path.join( |
|
273 dataDir, "adblock_subscription_{0}".format(sha1)) |
|
274 return fileName |
|
275 |
|
276 def __loadRules(self): |
|
277 """ |
|
278 Private method to load the rules of the subscription. |
|
279 """ |
|
280 fileName = self.rulesFileName() |
|
281 if os.path.exists(fileName): |
|
282 try: |
|
283 with open(fileName, "r", encoding="utf-8") as f: |
|
284 header = f.readline().strip() |
|
285 if not header.startswith("[Adblock"): |
|
286 EricMessageBox.warning( |
|
287 None, |
|
288 self.tr("Load subscription rules"), |
|
289 self.tr("""AdBlock file '{0}' does not start""" |
|
290 """ with [Adblock.""") |
|
291 .format(fileName)) |
|
292 f.close() |
|
293 os.unlink(fileName) |
|
294 self.__lastUpdate = QDateTime() |
|
295 else: |
|
296 from .AdBlockRule import AdBlockRule |
|
297 |
|
298 self.__updatePeriod = 0 |
|
299 self.__remoteModified = QDateTime() |
|
300 self.__rules = [] |
|
301 self.__rules.append(AdBlockRule(header, self)) |
|
302 for line in f.readlines(): |
|
303 line = line.strip() |
|
304 if not line: |
|
305 continue |
|
306 self.__rules.append(AdBlockRule(line, self)) |
|
307 expires = self.__expiresRe.search(line) |
|
308 if expires: |
|
309 period, kind = expires.groups() |
|
310 if kind: |
|
311 # hours |
|
312 self.__updatePeriod = int(period) |
|
313 else: |
|
314 # days |
|
315 self.__updatePeriod = int(period) * 24 |
|
316 remoteModified = self.__remoteModifiedRe.search( |
|
317 line) |
|
318 if remoteModified: |
|
319 day, month, year, time, hour, minute = ( |
|
320 remoteModified.groups() |
|
321 ) |
|
322 self.__remoteModified.setDate( |
|
323 QDate(int(year), |
|
324 self.__monthNameToNumber[month], |
|
325 int(day)) |
|
326 ) |
|
327 if time: |
|
328 self.__remoteModified.setTime( |
|
329 QTime(int(hour), int(minute))) |
|
330 else: |
|
331 # no time given, set it to 23:59 |
|
332 self.__remoteModified.setTime( |
|
333 QTime(23, 59)) |
|
334 self.changed.emit() |
|
335 except OSError as err: |
|
336 EricMessageBox.warning( |
|
337 None, |
|
338 self.tr("Load subscription rules"), |
|
339 self.tr( |
|
340 """Unable to read AdBlock file '{0}'.\nReason: {1}""") |
|
341 .format(fileName, str(err)) |
|
342 ) |
|
343 |
|
344 elif not fileName.endswith("_custom"): |
|
345 self.__lastUpdate = QDateTime() |
|
346 |
|
347 self.checkForUpdate() |
|
348 |
|
349 def checkForUpdate(self): |
|
350 """ |
|
351 Public method to check for an update. |
|
352 """ |
|
353 updatePeriod = ( |
|
354 self.__updatePeriod |
|
355 if self.__updatePeriod else |
|
356 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24 |
|
357 ) |
|
358 if ( |
|
359 not self.__lastUpdate.isValid() or |
|
360 (self.__remoteModified.isValid() and |
|
361 self.__remoteModified.addSecs(updatePeriod * 3600) < |
|
362 QDateTime.currentDateTime()) or |
|
363 self.__lastUpdate.addSecs(updatePeriod * 3600) < |
|
364 QDateTime.currentDateTime() |
|
365 ): |
|
366 self.updateNow() |
|
367 |
|
368 def updateNow(self): |
|
369 """ |
|
370 Public method to update the subscription immediately. |
|
371 """ |
|
372 if self.__downloading is not None: |
|
373 return |
|
374 |
|
375 if not self.location().isValid(): |
|
376 return |
|
377 |
|
378 if self.location().scheme() == "file": |
|
379 self.__lastUpdate = QDateTime.currentDateTime() |
|
380 self.__loadRules() |
|
381 return |
|
382 |
|
383 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
384 reply = WebBrowserWindow.networkManager().get( |
|
385 QNetworkRequest(self.location())) |
|
386 reply.finished.connect( |
|
387 lambda: self.__rulesDownloaded(reply)) |
|
388 self.__downloading = reply |
|
389 |
|
390 def __rulesDownloaded(self, reply): |
|
391 """ |
|
392 Private slot to deal with the downloaded rules. |
|
393 |
|
394 @param reply reference to the network reply |
|
395 @type QNetworkReply |
|
396 """ |
|
397 response = bytes(reply.readAll()) |
|
398 reply.close() |
|
399 self.__downloading = None |
|
400 |
|
401 if reply.error() != QNetworkReply.NetworkError.NoError: |
|
402 if not self.__defaultSubscription: |
|
403 # don't show error if we try to load the default |
|
404 EricMessageBox.warning( |
|
405 None, |
|
406 self.tr("Downloading subscription rules"), |
|
407 self.tr( |
|
408 """<p>Subscription rules could not be""" |
|
409 """ downloaded.</p><p>Error: {0}</p>""") |
|
410 .format(reply.errorString())) |
|
411 else: |
|
412 # reset after first download attempt |
|
413 self.__defaultSubscription = False |
|
414 return |
|
415 |
|
416 if not response: |
|
417 EricMessageBox.warning( |
|
418 None, |
|
419 self.tr("Downloading subscription rules"), |
|
420 self.tr("""Got empty subscription rules.""")) |
|
421 return |
|
422 |
|
423 fileName = self.rulesFileName() |
|
424 try: |
|
425 with open(fileName, "wb") as f: |
|
426 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
427 if ( |
|
428 WebBrowserWindow.adBlockManager().useLimitedEasyList() and |
|
429 self.url().toString().startswith( |
|
430 WebBrowserWindow.adBlockManager() |
|
431 .getDefaultSubscriptionUrl()) |
|
432 ): |
|
433 limited = True |
|
434 # ignore Third-party advertisers rules for performance |
|
435 # whitelist rules at the end will be used |
|
436 index = response.find( |
|
437 b"!---------------------------" |
|
438 b"Third-party advertisers" |
|
439 b"---------------------------!") |
|
440 part1 = response[:index] |
|
441 index = response.find( |
|
442 b"!-----------------------" |
|
443 b"Whitelists to fix broken sites" |
|
444 b"------------------------!") |
|
445 part2 = response[index:] |
|
446 f.write(part1) |
|
447 f.write(part2) |
|
448 else: |
|
449 limited = False |
|
450 f.write(response) |
|
451 f.close() |
|
452 self.__lastUpdate = QDateTime.currentDateTime() |
|
453 |
|
454 if limited or self.__validateCheckSum(fileName): |
|
455 self.__loadRules() |
|
456 else: |
|
457 os.unlink(fileName) |
|
458 except OSError: |
|
459 EricMessageBox.warning( |
|
460 None, |
|
461 self.tr("Downloading subscription rules"), |
|
462 self.tr("""Unable to write to AdBlock file '{0}'.""") |
|
463 .file(fileName)) |
|
464 self.__downloading = None |
|
465 reply.deleteLater() |
|
466 |
|
467 def __validateCheckSum(self, fileName): |
|
468 """ |
|
469 Private method to check the subscription file's checksum. |
|
470 |
|
471 @param fileName name of the file containing the subscription |
|
472 @type str |
|
473 @return flag indicating a valid file. A file is considered |
|
474 valid, if the checksum is OK, the file does not contain a |
|
475 checksum (i.e. cannot be checked) or we are using the limited |
|
476 EasyList (because we fiddled with the original). |
|
477 @rtype bool |
|
478 """ |
|
479 try: |
|
480 with open(fileName, "r", encoding="utf-8") as f: |
|
481 data = f.read() |
|
482 except (OSError, OSError): |
|
483 return False |
|
484 |
|
485 match = re.search(self.__checksumRe, data) |
|
486 if match: |
|
487 expectedChecksum = match.group(1) |
|
488 else: |
|
489 # consider it as valid |
|
490 return True |
|
491 |
|
492 # normalize the data |
|
493 data = re.sub(r"\r", "", data) # normalize eol |
|
494 data = re.sub(r"\n+", "\n", data) # remove empty lines |
|
495 data = re.sub(self.__checksumRe, "", data) # remove checksum line |
|
496 |
|
497 # calculate checksum |
|
498 md5 = hashlib.md5() # secok |
|
499 md5.update(data.encode("utf-8")) |
|
500 calculatedChecksum = ( |
|
501 base64.b64encode(md5.digest()).decode().rstrip("=") |
|
502 ) |
|
503 if calculatedChecksum == expectedChecksum: |
|
504 return True |
|
505 else: |
|
506 res = EricMessageBox.yesNo( |
|
507 None, |
|
508 self.tr("Downloading subscription rules"), |
|
509 self.tr( |
|
510 """<p>AdBlock subscription <b>{0}</b> has a wrong""" |
|
511 """ checksum.<br/>""" |
|
512 """Found: {1}<br/>""" |
|
513 """Calculated: {2}<br/>""" |
|
514 """Use it anyway?</p>""") |
|
515 .format(self.__title, expectedChecksum, |
|
516 calculatedChecksum)) |
|
517 return res |
|
518 |
|
519 def saveRules(self): |
|
520 """ |
|
521 Public method to save the subscription rules. |
|
522 """ |
|
523 fileName = self.rulesFileName() |
|
524 if not fileName: |
|
525 return |
|
526 |
|
527 try: |
|
528 with open(fileName, "w", encoding="utf-8") as f: |
|
529 if not self.__rules or not self.__rules[0].isHeader(): |
|
530 f.write("[Adblock Plus 2.0]\n") |
|
531 for rule in self.__rules: |
|
532 f.write(rule.filter() + "\n") |
|
533 except OSError: |
|
534 EricMessageBox.warning( |
|
535 None, |
|
536 self.tr("Saving subscription rules"), |
|
537 self.tr("""Unable to write to AdBlock file '{0}'.""") |
|
538 .format(fileName)) |
|
539 |
|
540 def rule(self, offset): |
|
541 """ |
|
542 Public method to get a specific rule. |
|
543 |
|
544 @param offset offset of the rule |
|
545 @type int |
|
546 @return requested rule |
|
547 @rtype AdBlockRule |
|
548 """ |
|
549 if offset >= len(self.__rules): |
|
550 return None |
|
551 |
|
552 return self.__rules[offset] |
|
553 |
|
554 def allRules(self): |
|
555 """ |
|
556 Public method to get the list of rules. |
|
557 |
|
558 @return list of rules |
|
559 @rtype list of AdBlockRule |
|
560 """ |
|
561 return self.__rules[:] |
|
562 |
|
563 def addRule(self, rule): |
|
564 """ |
|
565 Public method to add a rule. |
|
566 |
|
567 @param rule reference to the rule to add |
|
568 @type AdBlockRule |
|
569 @return offset of the rule |
|
570 @rtype int |
|
571 """ |
|
572 self.__rules.append(rule) |
|
573 self.rulesChanged.emit() |
|
574 |
|
575 return len(self.__rules) - 1 |
|
576 |
|
577 def removeRule(self, offset): |
|
578 """ |
|
579 Public method to remove a rule given the offset. |
|
580 |
|
581 @param offset offset of the rule to remove |
|
582 @type int |
|
583 """ |
|
584 if offset < 0 or offset > len(self.__rules): |
|
585 return |
|
586 |
|
587 del self.__rules[offset] |
|
588 self.rulesChanged.emit() |
|
589 |
|
590 def replaceRule(self, rule, offset): |
|
591 """ |
|
592 Public method to replace a rule given the offset. |
|
593 |
|
594 @param rule reference to the rule to set |
|
595 @type AdBlockRule |
|
596 @param offset offset of the rule to remove |
|
597 @type int |
|
598 @return requested rule |
|
599 @rtype AdBlockRule |
|
600 """ |
|
601 if offset >= len(self.__rules): |
|
602 return None |
|
603 |
|
604 self.__rules[offset] = rule |
|
605 self.rulesChanged.emit() |
|
606 |
|
607 return self.__rules[offset] |
|
608 |
|
609 def canEditRules(self): |
|
610 """ |
|
611 Public method to check, if rules can be edited. |
|
612 |
|
613 @return flag indicating rules may be edited |
|
614 @rtype bool |
|
615 """ |
|
616 return self.__custom |
|
617 |
|
618 def canBeRemoved(self): |
|
619 """ |
|
620 Public method to check, if the subscription can be removed. |
|
621 |
|
622 @return flag indicating removal is allowed |
|
623 @rtype bool |
|
624 """ |
|
625 return not self.__custom and not self.__defaultSubscription |
|
626 |
|
627 def setRuleEnabled(self, offset, enabled): |
|
628 """ |
|
629 Public method to enable a specific rule. |
|
630 |
|
631 @param offset offset of the rule |
|
632 @type int |
|
633 @param enabled new enabled state |
|
634 @type bool |
|
635 @return reference to the changed rule |
|
636 @rtype AdBlockRule |
|
637 """ |
|
638 if offset >= len(self.__rules): |
|
639 return None |
|
640 |
|
641 rule = self.__rules[offset] |
|
642 rule.setEnabled(enabled) |
|
643 self.rulesEnabledChanged.emit() |
|
644 |
|
645 if rule.isCSSRule(): |
|
646 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
647 WebBrowserWindow.mainWindow().reloadUserStyleSheet() |
|
648 |
|
649 return rule |