|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2009 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock subscription class. |
|
8 """ |
|
9 |
|
10 import os |
|
11 import re |
|
12 import hashlib |
|
13 import base64 |
|
14 |
|
15 from PyQt5.QtCore import ( |
|
16 pyqtSignal, Qt, QObject, QByteArray, QDateTime, QUrl, QUrlQuery, |
|
17 QCryptographicHash, QFile, QIODevice, QTextStream, QDate, QTime |
|
18 ) |
|
19 from PyQt5.QtNetwork import QNetworkReply, QNetworkRequest |
|
20 |
|
21 from E5Gui import E5MessageBox |
|
22 |
|
23 import Utilities |
|
24 import Preferences |
|
25 |
|
26 |
|
27 class AdBlockSubscription(QObject): |
|
28 """ |
|
29 Class implementing the AdBlock subscription. |
|
30 |
|
31 @signal changed() emitted after the subscription has changed |
|
32 @signal rulesChanged() emitted after the subscription's rules have changed |
|
33 @signal enabledChanged(bool) emitted after the enabled state was changed |
|
34 @signal rulesEnabledChanged() emitted after a rule enabled state was |
|
35 changed |
|
36 """ |
|
37 changed = pyqtSignal() |
|
38 rulesChanged = pyqtSignal() |
|
39 enabledChanged = pyqtSignal(bool) |
|
40 rulesEnabledChanged = pyqtSignal() |
|
41 |
|
42 def __init__(self, url, custom, parent=None, default=False): |
|
43 """ |
|
44 Constructor |
|
45 |
|
46 @param url AdBlock URL for the subscription (QUrl) |
|
47 @param custom flag indicating a custom subscription (boolean) |
|
48 @param parent reference to the parent object (QObject) |
|
49 @param default flag indicating a default subscription (boolean) |
|
50 """ |
|
51 super().__init__(parent) |
|
52 |
|
53 self.__custom = custom |
|
54 self.__url = url.toEncoded() |
|
55 self.__enabled = False |
|
56 self.__downloading = None |
|
57 self.__defaultSubscription = default |
|
58 |
|
59 self.__title = "" |
|
60 self.__location = QByteArray() |
|
61 self.__lastUpdate = QDateTime() |
|
62 self.__requiresLocation = "" |
|
63 self.__requiresTitle = "" |
|
64 |
|
65 self.__updatePeriod = 0 # update period in hours, 0 = use default |
|
66 self.__remoteModified = QDateTime() |
|
67 |
|
68 self.__rules = [] # list containing all AdBlock rules |
|
69 |
|
70 self.__checksumRe = re.compile( |
|
71 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""", |
|
72 re.IGNORECASE | re.MULTILINE) |
|
73 self.__expiresRe = re.compile( |
|
74 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""", |
|
75 re.IGNORECASE) |
|
76 self.__remoteModifiedRe = re.compile( |
|
77 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*""" |
|
78 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*""" |
|
79 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""", |
|
80 re.IGNORECASE) |
|
81 |
|
82 self.__monthNameToNumber = { |
|
83 "Jan": 1, |
|
84 "Feb": 2, |
|
85 "Mar": 3, |
|
86 "Apr": 4, |
|
87 "May": 5, |
|
88 "Jun": 6, |
|
89 "Jul": 7, |
|
90 "Aug": 8, |
|
91 "Sep": 9, |
|
92 "Oct": 10, |
|
93 "Nov": 11, |
|
94 "Dec": 12 |
|
95 } |
|
96 |
|
97 self.__parseUrl(url) |
|
98 |
|
99 def __parseUrl(self, url): |
|
100 """ |
|
101 Private method to parse the AdBlock URL for the subscription. |
|
102 |
|
103 @param url AdBlock URL for the subscription |
|
104 @type QUrl |
|
105 """ |
|
106 if url.scheme() != "abp": |
|
107 return |
|
108 |
|
109 if url.path() != "subscribe": |
|
110 return |
|
111 |
|
112 urlQuery = QUrlQuery(url) |
|
113 self.__title = QUrl.fromPercentEncoding( |
|
114 QByteArray(urlQuery.queryItemValue("title").encode())) |
|
115 self.__enabled = urlQuery.queryItemValue("enabled") != "false" |
|
116 self.__location = QByteArray(QUrl.fromPercentEncoding( |
|
117 QByteArray(urlQuery.queryItemValue("location").encode())) |
|
118 .encode("utf-8")) |
|
119 |
|
120 # Check for required subscription |
|
121 self.__requiresLocation = QUrl.fromPercentEncoding( |
|
122 QByteArray(urlQuery.queryItemValue( |
|
123 "requiresLocation").encode())) |
|
124 self.__requiresTitle = QUrl.fromPercentEncoding( |
|
125 QByteArray(urlQuery.queryItemValue("requiresTitle").encode())) |
|
126 if self.__requiresLocation and self.__requiresTitle: |
|
127 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
128 WebBrowserWindow.adBlockManager().loadRequiredSubscription( |
|
129 self.__requiresLocation, self.__requiresTitle) |
|
130 |
|
131 lastUpdateString = urlQuery.queryItemValue("lastUpdate") |
|
132 self.__lastUpdate = QDateTime.fromString(lastUpdateString, |
|
133 Qt.DateFormat.ISODate) |
|
134 |
|
135 self.__loadRules() |
|
136 |
|
137 def url(self): |
|
138 """ |
|
139 Public method to generate the URL for this subscription. |
|
140 |
|
141 @return AdBlock URL for the subscription |
|
142 @rtype QUrl |
|
143 """ |
|
144 url = QUrl() |
|
145 url.setScheme("abp") |
|
146 url.setPath("subscribe") |
|
147 |
|
148 queryItems = [] |
|
149 queryItems.append(("location", bytes(self.__location).decode())) |
|
150 queryItems.append(("title", self.__title)) |
|
151 if self.__requiresLocation and self.__requiresTitle: |
|
152 queryItems.append(("requiresLocation", self.__requiresLocation)) |
|
153 queryItems.append(("requiresTitle", self.__requiresTitle)) |
|
154 if not self.__enabled: |
|
155 queryItems.append(("enabled", "false")) |
|
156 if self.__lastUpdate.isValid(): |
|
157 queryItems.append( |
|
158 ("lastUpdate", |
|
159 self.__lastUpdate.toString(Qt.DateFormat.ISODate)) |
|
160 ) |
|
161 |
|
162 query = QUrlQuery() |
|
163 query.setQueryItems(queryItems) |
|
164 url.setQuery(query) |
|
165 return url |
|
166 |
|
167 def isEnabled(self): |
|
168 """ |
|
169 Public method to check, if the subscription is enabled. |
|
170 |
|
171 @return flag indicating the enabled status |
|
172 @rtype bool |
|
173 """ |
|
174 return self.__enabled |
|
175 |
|
176 def setEnabled(self, enabled): |
|
177 """ |
|
178 Public method to set the enabled status. |
|
179 |
|
180 @param enabled flag indicating the enabled status |
|
181 @type bool |
|
182 """ |
|
183 if self.__enabled == enabled: |
|
184 return |
|
185 |
|
186 self.__enabled = enabled |
|
187 self.enabledChanged.emit(enabled) |
|
188 |
|
189 def title(self): |
|
190 """ |
|
191 Public method to get the subscription title. |
|
192 |
|
193 @return subscription title |
|
194 @rtype string |
|
195 """ |
|
196 return self.__title |
|
197 |
|
198 def setTitle(self, title): |
|
199 """ |
|
200 Public method to set the subscription title. |
|
201 |
|
202 @param title subscription title |
|
203 @type str |
|
204 """ |
|
205 if self.__title == title: |
|
206 return |
|
207 |
|
208 self.__title = title |
|
209 self.changed.emit() |
|
210 |
|
211 def location(self): |
|
212 """ |
|
213 Public method to get the subscription location. |
|
214 |
|
215 @return URL of the subscription location |
|
216 @rtype QUrl |
|
217 """ |
|
218 return QUrl.fromEncoded(self.__location) |
|
219 |
|
220 def setLocation(self, url): |
|
221 """ |
|
222 Public method to set the subscription location. |
|
223 |
|
224 @param url URL of the subscription location |
|
225 @type QUrl |
|
226 """ |
|
227 if url == self.location(): |
|
228 return |
|
229 |
|
230 self.__location = url.toEncoded() |
|
231 self.__lastUpdate = QDateTime() |
|
232 self.changed.emit() |
|
233 |
|
234 def requiresLocation(self): |
|
235 """ |
|
236 Public method to get the location of a required subscription. |
|
237 |
|
238 @return location of a required subscription |
|
239 @rtype str |
|
240 """ |
|
241 return self.__requiresLocation |
|
242 |
|
243 def lastUpdate(self): |
|
244 """ |
|
245 Public method to get the date and time of the last update. |
|
246 |
|
247 @return date and time of the last update |
|
248 @rtype QDateTime |
|
249 """ |
|
250 return self.__lastUpdate |
|
251 |
|
252 def rulesFileName(self): |
|
253 """ |
|
254 Public method to get the name of the rules file. |
|
255 |
|
256 @return name of the rules file |
|
257 @rtype str |
|
258 """ |
|
259 if self.location().scheme() == "file": |
|
260 return self.location().toLocalFile() |
|
261 |
|
262 if self.__location.isEmpty(): |
|
263 return "" |
|
264 |
|
265 sha1 = bytes(QCryptographicHash.hash( |
|
266 self.__location, QCryptographicHash.Algorithm.Sha1).toHex() |
|
267 ).decode() |
|
268 dataDir = os.path.join( |
|
269 Utilities.getConfigDir(), "web_browser", "subscriptions") |
|
270 if not os.path.exists(dataDir): |
|
271 os.makedirs(dataDir) |
|
272 fileName = os.path.join( |
|
273 dataDir, "adblock_subscription_{0}".format(sha1)) |
|
274 return fileName |
|
275 |
|
276 def __loadRules(self): |
|
277 """ |
|
278 Private method to load the rules of the subscription. |
|
279 """ |
|
280 fileName = self.rulesFileName() |
|
281 f = QFile(fileName) |
|
282 if f.exists(): |
|
283 if not f.open(QIODevice.OpenModeFlag.ReadOnly): |
|
284 E5MessageBox.warning( |
|
285 None, |
|
286 self.tr("Load subscription rules"), |
|
287 self.tr( |
|
288 """Unable to open AdBlock file '{0}' for reading.""") |
|
289 .format(fileName)) |
|
290 else: |
|
291 textStream = QTextStream(f) |
|
292 header = textStream.readLine(1024) |
|
293 if not header.startswith("[Adblock"): |
|
294 E5MessageBox.warning( |
|
295 None, |
|
296 self.tr("Load subscription rules"), |
|
297 self.tr("""AdBlock file '{0}' does not start""" |
|
298 """ with [Adblock.""") |
|
299 .format(fileName)) |
|
300 f.close() |
|
301 f.remove() |
|
302 self.__lastUpdate = QDateTime() |
|
303 else: |
|
304 from .AdBlockRule import AdBlockRule |
|
305 |
|
306 self.__updatePeriod = 0 |
|
307 self.__remoteModified = QDateTime() |
|
308 self.__rules = [] |
|
309 self.__rules.append(AdBlockRule(header, self)) |
|
310 while not textStream.atEnd(): |
|
311 line = textStream.readLine() |
|
312 self.__rules.append(AdBlockRule(line, self)) |
|
313 expires = self.__expiresRe.search(line) |
|
314 if expires: |
|
315 period, kind = expires.groups() |
|
316 if kind: |
|
317 # hours |
|
318 self.__updatePeriod = int(period) |
|
319 else: |
|
320 # days |
|
321 self.__updatePeriod = int(period) * 24 |
|
322 remoteModified = self.__remoteModifiedRe.search(line) |
|
323 if remoteModified: |
|
324 day, month, year, time, hour, minute = ( |
|
325 remoteModified.groups() |
|
326 ) |
|
327 self.__remoteModified.setDate( |
|
328 QDate(int(year), |
|
329 self.__monthNameToNumber[month], |
|
330 int(day)) |
|
331 ) |
|
332 if time: |
|
333 self.__remoteModified.setTime( |
|
334 QTime(int(hour), int(minute))) |
|
335 else: |
|
336 # no time given, set it to 23:59 |
|
337 self.__remoteModified.setTime(QTime(23, 59)) |
|
338 self.changed.emit() |
|
339 elif not fileName.endswith("_custom"): |
|
340 self.__lastUpdate = QDateTime() |
|
341 |
|
342 self.checkForUpdate() |
|
343 |
|
344 def checkForUpdate(self): |
|
345 """ |
|
346 Public method to check for an update. |
|
347 """ |
|
348 updatePeriod = ( |
|
349 self.__updatePeriod |
|
350 if self.__updatePeriod else |
|
351 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24 |
|
352 ) |
|
353 if ( |
|
354 not self.__lastUpdate.isValid() or |
|
355 (self.__remoteModified.isValid() and |
|
356 self.__remoteModified.addSecs(updatePeriod * 3600) < |
|
357 QDateTime.currentDateTime()) or |
|
358 self.__lastUpdate.addSecs(updatePeriod * 3600) < |
|
359 QDateTime.currentDateTime() |
|
360 ): |
|
361 self.updateNow() |
|
362 |
|
363 def updateNow(self): |
|
364 """ |
|
365 Public method to update the subscription immediately. |
|
366 """ |
|
367 if self.__downloading is not None: |
|
368 return |
|
369 |
|
370 if not self.location().isValid(): |
|
371 return |
|
372 |
|
373 if self.location().scheme() == "file": |
|
374 self.__lastUpdate = QDateTime.currentDateTime() |
|
375 self.__loadRules() |
|
376 return |
|
377 |
|
378 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
379 reply = WebBrowserWindow.networkManager().get( |
|
380 QNetworkRequest(self.location())) |
|
381 reply.finished.connect( |
|
382 lambda: self.__rulesDownloaded(reply)) |
|
383 self.__downloading = reply |
|
384 |
|
385 def __rulesDownloaded(self, reply): |
|
386 """ |
|
387 Private slot to deal with the downloaded rules. |
|
388 |
|
389 @param reply reference to the network reply |
|
390 @type QNetworkReply |
|
391 """ |
|
392 response = reply.readAll() |
|
393 reply.close() |
|
394 self.__downloading = None |
|
395 |
|
396 if reply.error() != QNetworkReply.NetworkError.NoError: |
|
397 if not self.__defaultSubscription: |
|
398 # don't show error if we try to load the default |
|
399 E5MessageBox.warning( |
|
400 None, |
|
401 self.tr("Downloading subscription rules"), |
|
402 self.tr( |
|
403 """<p>Subscription rules could not be""" |
|
404 """ downloaded.</p><p>Error: {0}</p>""") |
|
405 .format(reply.errorString())) |
|
406 else: |
|
407 # reset after first download attempt |
|
408 self.__defaultSubscription = False |
|
409 return |
|
410 |
|
411 if response.isEmpty(): |
|
412 E5MessageBox.warning( |
|
413 None, |
|
414 self.tr("Downloading subscription rules"), |
|
415 self.tr("""Got empty subscription rules.""")) |
|
416 return |
|
417 |
|
418 fileName = self.rulesFileName() |
|
419 QFile.remove(fileName) |
|
420 f = QFile(fileName) |
|
421 if not f.open(QIODevice.OpenModeFlag.ReadWrite): |
|
422 E5MessageBox.warning( |
|
423 None, |
|
424 self.tr("Downloading subscription rules"), |
|
425 self.tr( |
|
426 """Unable to open AdBlock file '{0}' for writing.""") |
|
427 .file(fileName)) |
|
428 return |
|
429 |
|
430 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
431 if ( |
|
432 WebBrowserWindow.adBlockManager().useLimitedEasyList() and |
|
433 self.url().toString().startswith( |
|
434 WebBrowserWindow.adBlockManager().getDefaultSubscriptionUrl()) |
|
435 ): |
|
436 limited = True |
|
437 # ignore Third-party advertisers rules for performance |
|
438 # whitelist rules at the end will be used |
|
439 index = response.indexOf( |
|
440 "!---------------------------" |
|
441 "Third-party advertisers" |
|
442 "---------------------------!") |
|
443 part1 = response.left(index) |
|
444 index = response.indexOf( |
|
445 "!-----------------------" |
|
446 "Whitelists to fix broken sites" |
|
447 "------------------------!") |
|
448 part2 = response.mid(index) |
|
449 f.write(part1) |
|
450 f.write(part2) |
|
451 else: |
|
452 limited = False |
|
453 f.write(response) |
|
454 f.close() |
|
455 self.__lastUpdate = QDateTime.currentDateTime() |
|
456 if limited or self.__validateCheckSum(fileName): |
|
457 self.__loadRules() |
|
458 else: |
|
459 QFile.remove(fileName) |
|
460 self.__downloading = None |
|
461 reply.deleteLater() |
|
462 |
|
463 def __validateCheckSum(self, fileName): |
|
464 """ |
|
465 Private method to check the subscription file's checksum. |
|
466 |
|
467 @param fileName name of the file containing the subscription |
|
468 @type str |
|
469 @return flag indicating a valid file. A file is considered |
|
470 valid, if the checksum is OK, the file does not contain a |
|
471 checksum (i.e. cannot be checked) or we are using the limited |
|
472 EasyList (because we fiddled with the original). |
|
473 @rtype bool |
|
474 """ |
|
475 try: |
|
476 with open(fileName, "r", encoding="utf-8") as f: |
|
477 data = f.read() |
|
478 except (OSError, OSError): |
|
479 return False |
|
480 |
|
481 match = re.search(self.__checksumRe, data) |
|
482 if match: |
|
483 expectedChecksum = match.group(1) |
|
484 else: |
|
485 # consider it as valid |
|
486 return True |
|
487 |
|
488 # normalize the data |
|
489 data = re.sub(r"\r", "", data) # normalize eol |
|
490 data = re.sub(r"\n+", "\n", data) # remove empty lines |
|
491 data = re.sub(self.__checksumRe, "", data) # remove checksum line |
|
492 |
|
493 # calculate checksum |
|
494 md5 = hashlib.md5() # secok |
|
495 md5.update(data.encode("utf-8")) |
|
496 calculatedChecksum = ( |
|
497 base64.b64encode(md5.digest()).decode().rstrip("=") |
|
498 ) |
|
499 if calculatedChecksum == expectedChecksum: |
|
500 return True |
|
501 else: |
|
502 res = E5MessageBox.yesNo( |
|
503 None, |
|
504 self.tr("Downloading subscription rules"), |
|
505 self.tr( |
|
506 """<p>AdBlock subscription <b>{0}</b> has a wrong""" |
|
507 """ checksum.<br/>""" |
|
508 """Found: {1}<br/>""" |
|
509 """Calculated: {2}<br/>""" |
|
510 """Use it anyway?</p>""") |
|
511 .format(self.__title, expectedChecksum, |
|
512 calculatedChecksum)) |
|
513 return res |
|
514 |
|
515 def saveRules(self): |
|
516 """ |
|
517 Public method to save the subscription rules. |
|
518 """ |
|
519 fileName = self.rulesFileName() |
|
520 if not fileName: |
|
521 return |
|
522 |
|
523 f = QFile(fileName) |
|
524 if not f.open(QIODevice.OpenModeFlag.ReadWrite | |
|
525 QIODevice.OpenModeFlag.Truncate): |
|
526 E5MessageBox.warning( |
|
527 None, |
|
528 self.tr("Saving subscription rules"), |
|
529 self.tr( |
|
530 """Unable to open AdBlock file '{0}' for writing.""") |
|
531 .format(fileName)) |
|
532 return |
|
533 |
|
534 textStream = QTextStream(f) |
|
535 if not self.__rules or not self.__rules[0].isHeader(): |
|
536 textStream << "[Adblock Plus 1.1.1]\n" |
|
537 for rule in self.__rules: |
|
538 textStream << rule.filter() << "\n" |
|
539 |
|
540 def rule(self, offset): |
|
541 """ |
|
542 Public method to get a specific rule. |
|
543 |
|
544 @param offset offset of the rule |
|
545 @type int |
|
546 @return requested rule |
|
547 @rtype AdBlockRule |
|
548 """ |
|
549 if offset >= len(self.__rules): |
|
550 return None |
|
551 |
|
552 return self.__rules[offset] |
|
553 |
|
554 def allRules(self): |
|
555 """ |
|
556 Public method to get the list of rules. |
|
557 |
|
558 @return list of rules |
|
559 @rtype list of AdBlockRule |
|
560 """ |
|
561 return self.__rules[:] |
|
562 |
|
563 def addRule(self, rule): |
|
564 """ |
|
565 Public method to add a rule. |
|
566 |
|
567 @param rule reference to the rule to add |
|
568 @type AdBlockRule |
|
569 @return offset of the rule |
|
570 @rtype int |
|
571 """ |
|
572 self.__rules.append(rule) |
|
573 self.rulesChanged.emit() |
|
574 |
|
575 return len(self.__rules) - 1 |
|
576 |
|
577 def removeRule(self, offset): |
|
578 """ |
|
579 Public method to remove a rule given the offset. |
|
580 |
|
581 @param offset offset of the rule to remove |
|
582 @type int |
|
583 """ |
|
584 if offset < 0 or offset > len(self.__rules): |
|
585 return |
|
586 |
|
587 del self.__rules[offset] |
|
588 self.rulesChanged.emit() |
|
589 |
|
590 def replaceRule(self, rule, offset): |
|
591 """ |
|
592 Public method to replace a rule given the offset. |
|
593 |
|
594 @param rule reference to the rule to set |
|
595 @type AdBlockRule |
|
596 @param offset offset of the rule to remove |
|
597 @type int |
|
598 @return requested rule |
|
599 @rtype AdBlockRule |
|
600 """ |
|
601 if offset >= len(self.__rules): |
|
602 return None |
|
603 |
|
604 self.__rules[offset] = rule |
|
605 self.rulesChanged.emit() |
|
606 |
|
607 return self.__rules[offset] |
|
608 |
|
609 def canEditRules(self): |
|
610 """ |
|
611 Public method to check, if rules can be edited. |
|
612 |
|
613 @return flag indicating rules may be edited |
|
614 @rtype bool |
|
615 """ |
|
616 return self.__custom |
|
617 |
|
618 def canBeRemoved(self): |
|
619 """ |
|
620 Public method to check, if the subscription can be removed. |
|
621 |
|
622 @return flag indicating removal is allowed |
|
623 @rtype bool |
|
624 """ |
|
625 return not self.__custom and not self.__defaultSubscription |
|
626 |
|
627 def setRuleEnabled(self, offset, enabled): |
|
628 """ |
|
629 Public method to enable a specific rule. |
|
630 |
|
631 @param offset offset of the rule |
|
632 @type int |
|
633 @param enabled new enabled state |
|
634 @type bool |
|
635 @return reference to the changed rule |
|
636 @rtype AdBlockRule |
|
637 """ |
|
638 if offset >= len(self.__rules): |
|
639 return None |
|
640 |
|
641 rule = self.__rules[offset] |
|
642 rule.setEnabled(enabled) |
|
643 self.rulesEnabledChanged.emit() |
|
644 |
|
645 if rule.isCSSRule(): |
|
646 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
647 WebBrowserWindow.mainWindow().reloadUserStyleSheet() |
|
648 |
|
649 return rule |