|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock subscription class. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 import os |
|
13 import re |
|
14 import hashlib |
|
15 import base64 |
|
16 |
|
17 from PyQt5.QtCore import pyqtSignal, Qt, QObject, QByteArray, QDateTime, \ |
|
18 QUrl, QUrlQuery, QCryptographicHash, QFile, QIODevice, QTextStream, \ |
|
19 QDate, QTime |
|
20 from PyQt5.QtNetwork import QNetworkReply, QNetworkRequest |
|
21 |
|
22 from E5Gui import E5MessageBox |
|
23 |
|
24 import Utilities |
|
25 import Preferences |
|
26 |
|
27 |
|
28 class AdBlockSubscription(QObject): |
|
29 """ |
|
30 Class implementing the AdBlock subscription. |
|
31 |
|
32 @signal changed() emitted after the subscription has changed |
|
33 @signal rulesChanged() emitted after the subscription's rules have changed |
|
34 @signal enabledChanged(bool) emitted after the enabled state was changed |
|
35 @signal rulesEnabledChanged() emitted after a rule enabled state was |
|
36 changed |
|
37 """ |
|
38 changed = pyqtSignal() |
|
39 rulesChanged = pyqtSignal() |
|
40 enabledChanged = pyqtSignal(bool) |
|
41 rulesEnabledChanged = pyqtSignal() |
|
42 |
|
43 def __init__(self, url, custom, parent=None, default=False): |
|
44 """ |
|
45 Constructor |
|
46 |
|
47 @param url AdBlock URL for the subscription (QUrl) |
|
48 @param custom flag indicating a custom subscription (boolean) |
|
49 @param parent reference to the parent object (QObject) |
|
50 @param default flag indicating a default subscription (boolean) |
|
51 """ |
|
52 super(AdBlockSubscription, self).__init__(parent) |
|
53 |
|
54 self.__custom = custom |
|
55 self.__url = url.toEncoded() |
|
56 self.__enabled = False |
|
57 self.__downloading = None |
|
58 self.__defaultSubscription = default |
|
59 |
|
60 self.__title = "" |
|
61 self.__location = QByteArray() |
|
62 self.__lastUpdate = QDateTime() |
|
63 self.__requiresLocation = "" |
|
64 self.__requiresTitle = "" |
|
65 |
|
66 self.__updatePeriod = 0 # update period in hours, 0 = use default |
|
67 self.__remoteModified = QDateTime() |
|
68 |
|
69 self.__rules = [] # list containing all AdBlock rules |
|
70 |
|
71 self.__checksumRe = re.compile( |
|
72 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""", |
|
73 re.IGNORECASE | re.MULTILINE) |
|
74 self.__expiresRe = re.compile( |
|
75 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""", |
|
76 re.IGNORECASE) |
|
77 self.__remoteModifiedRe = re.compile( |
|
78 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*""" |
|
79 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*""" |
|
80 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""", |
|
81 re.IGNORECASE) |
|
82 |
|
83 self.__monthNameToNumber = { |
|
84 "Jan": 1, |
|
85 "Feb": 2, |
|
86 "Mar": 3, |
|
87 "Apr": 4, |
|
88 "May": 5, |
|
89 "Jun": 6, |
|
90 "Jul": 7, |
|
91 "Aug": 8, |
|
92 "Sep": 9, |
|
93 "Oct": 10, |
|
94 "Nov": 11, |
|
95 "Dec": 12 |
|
96 } |
|
97 |
|
98 self.__parseUrl(url) |
|
99 |
|
100 def __parseUrl(self, url): |
|
101 """ |
|
102 Private method to parse the AdBlock URL for the subscription. |
|
103 |
|
104 @param url AdBlock URL for the subscription |
|
105 @type QUrl |
|
106 """ |
|
107 if url.scheme() != "abp": |
|
108 return |
|
109 |
|
110 if url.path() != "subscribe": |
|
111 return |
|
112 |
|
113 urlQuery = QUrlQuery(url) |
|
114 self.__title = QUrl.fromPercentEncoding( |
|
115 QByteArray(urlQuery.queryItemValue("title").encode())) |
|
116 self.__enabled = urlQuery.queryItemValue("enabled") != "false" |
|
117 self.__location = QByteArray(QUrl.fromPercentEncoding( |
|
118 QByteArray(urlQuery.queryItemValue("location").encode())) |
|
119 .encode("utf-8")) |
|
120 |
|
121 # Check for required subscription |
|
122 self.__requiresLocation = QUrl.fromPercentEncoding( |
|
123 QByteArray(urlQuery.queryItemValue( |
|
124 "requiresLocation").encode())) |
|
125 self.__requiresTitle = QUrl.fromPercentEncoding( |
|
126 QByteArray(urlQuery.queryItemValue("requiresTitle").encode())) |
|
127 if self.__requiresLocation and self.__requiresTitle: |
|
128 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
129 WebBrowserWindow.adBlockManager().loadRequiredSubscription( |
|
130 self.__requiresLocation, self.__requiresTitle) |
|
131 |
|
132 lastUpdateString = urlQuery.queryItemValue("lastUpdate") |
|
133 self.__lastUpdate = QDateTime.fromString(lastUpdateString, |
|
134 Qt.ISODate) |
|
135 |
|
136 self.__loadRules() |
|
137 |
|
138 def url(self): |
|
139 """ |
|
140 Public method to generate the URL for this subscription. |
|
141 |
|
142 @return AdBlock URL for the subscription |
|
143 @rtype QUrl |
|
144 """ |
|
145 url = QUrl() |
|
146 url.setScheme("abp") |
|
147 url.setPath("subscribe") |
|
148 |
|
149 queryItems = [] |
|
150 queryItems.append(("location", bytes(self.__location).decode())) |
|
151 queryItems.append(("title", self.__title)) |
|
152 if self.__requiresLocation and self.__requiresTitle: |
|
153 queryItems.append(("requiresLocation", self.__requiresLocation)) |
|
154 queryItems.append(("requiresTitle", self.__requiresTitle)) |
|
155 if not self.__enabled: |
|
156 queryItems.append(("enabled", "false")) |
|
157 if self.__lastUpdate.isValid(): |
|
158 queryItems.append(("lastUpdate", |
|
159 self.__lastUpdate.toString(Qt.ISODate))) |
|
160 |
|
161 query = QUrlQuery() |
|
162 query.setQueryItems(queryItems) |
|
163 url.setQuery(query) |
|
164 return url |
|
165 |
|
166 def isEnabled(self): |
|
167 """ |
|
168 Public method to check, if the subscription is enabled. |
|
169 |
|
170 @return flag indicating the enabled status |
|
171 @rtype bool |
|
172 """ |
|
173 return self.__enabled |
|
174 |
|
175 def setEnabled(self, enabled): |
|
176 """ |
|
177 Public method to set the enabled status. |
|
178 |
|
179 @param enabled flag indicating the enabled status |
|
180 @type bool |
|
181 """ |
|
182 if self.__enabled == enabled: |
|
183 return |
|
184 |
|
185 self.__enabled = enabled |
|
186 self.enabledChanged.emit(enabled) |
|
187 |
|
188 def title(self): |
|
189 """ |
|
190 Public method to get the subscription title. |
|
191 |
|
192 @return subscription title |
|
193 @rtype string |
|
194 """ |
|
195 return self.__title |
|
196 |
|
197 def setTitle(self, title): |
|
198 """ |
|
199 Public method to set the subscription title. |
|
200 |
|
201 @param title subscription title |
|
202 @type str |
|
203 """ |
|
204 if self.__title == title: |
|
205 return |
|
206 |
|
207 self.__title = title |
|
208 self.changed.emit() |
|
209 |
|
210 def location(self): |
|
211 """ |
|
212 Public method to get the subscription location. |
|
213 |
|
214 @return URL of the subscription location |
|
215 @rtype QUrl |
|
216 """ |
|
217 return QUrl.fromEncoded(self.__location) |
|
218 |
|
219 def setLocation(self, url): |
|
220 """ |
|
221 Public method to set the subscription location. |
|
222 |
|
223 @param url URL of the subscription location |
|
224 @type QUrl |
|
225 """ |
|
226 if url == self.location(): |
|
227 return |
|
228 |
|
229 self.__location = url.toEncoded() |
|
230 self.__lastUpdate = QDateTime() |
|
231 self.changed.emit() |
|
232 |
|
233 def requiresLocation(self): |
|
234 """ |
|
235 Public method to get the location of a required subscription. |
|
236 |
|
237 @return location of a required subscription |
|
238 @rtype str |
|
239 """ |
|
240 return self.__requiresLocation |
|
241 |
|
242 def lastUpdate(self): |
|
243 """ |
|
244 Public method to get the date and time of the last update. |
|
245 |
|
246 @return date and time of the last update |
|
247 @rtype QDateTime |
|
248 """ |
|
249 return self.__lastUpdate |
|
250 |
|
251 def rulesFileName(self): |
|
252 """ |
|
253 Public method to get the name of the rules file. |
|
254 |
|
255 @return name of the rules file |
|
256 @rtype str |
|
257 """ |
|
258 if self.location().scheme() == "file": |
|
259 return self.location().toLocalFile() |
|
260 |
|
261 if self.__location.isEmpty(): |
|
262 return "" |
|
263 |
|
264 sha1 = bytes(QCryptographicHash.hash( |
|
265 self.__location, QCryptographicHash.Sha1).toHex()).decode() |
|
266 dataDir = os.path.join( |
|
267 Utilities.getConfigDir(), "web_browser", "subscriptions") |
|
268 if not os.path.exists(dataDir): |
|
269 os.makedirs(dataDir) |
|
270 fileName = os.path.join( |
|
271 dataDir, "adblock_subscription_{0}".format(sha1)) |
|
272 return fileName |
|
273 |
|
274 def __loadRules(self): |
|
275 """ |
|
276 Private method to load the rules of the subscription. |
|
277 """ |
|
278 fileName = self.rulesFileName() |
|
279 f = QFile(fileName) |
|
280 if f.exists(): |
|
281 if not f.open(QIODevice.ReadOnly): |
|
282 E5MessageBox.warning( |
|
283 None, |
|
284 self.tr("Load subscription rules"), |
|
285 self.tr( |
|
286 """Unable to open AdBlock file '{0}' for reading.""") |
|
287 .format(fileName)) |
|
288 else: |
|
289 textStream = QTextStream(f) |
|
290 header = textStream.readLine(1024) |
|
291 if not header.startswith("[Adblock"): |
|
292 E5MessageBox.warning( |
|
293 None, |
|
294 self.tr("Load subscription rules"), |
|
295 self.tr("""AdBlock file '{0}' does not start""" |
|
296 """ with [Adblock.""") |
|
297 .format(fileName)) |
|
298 f.close() |
|
299 f.remove() |
|
300 self.__lastUpdate = QDateTime() |
|
301 else: |
|
302 from .AdBlockRule import AdBlockRule |
|
303 |
|
304 self.__updatePeriod = 0 |
|
305 self.__remoteModified = QDateTime() |
|
306 self.__rules = [] |
|
307 self.__rules.append(AdBlockRule(header, self)) |
|
308 while not textStream.atEnd(): |
|
309 line = textStream.readLine() |
|
310 self.__rules.append(AdBlockRule(line, self)) |
|
311 expires = self.__expiresRe.search(line) |
|
312 if expires: |
|
313 period, kind = expires.groups() |
|
314 if kind: |
|
315 # hours |
|
316 self.__updatePeriod = int(period) |
|
317 else: |
|
318 # days |
|
319 self.__updatePeriod = int(period) * 24 |
|
320 remoteModified = self.__remoteModifiedRe.search(line) |
|
321 if remoteModified: |
|
322 day, month, year, time, hour, minute = \ |
|
323 remoteModified.groups() |
|
324 self.__remoteModified.setDate( |
|
325 QDate(int(year), |
|
326 self.__monthNameToNumber[month], |
|
327 int(day)) |
|
328 ) |
|
329 if time: |
|
330 self.__remoteModified.setTime( |
|
331 QTime(int(hour), int(minute))) |
|
332 else: |
|
333 # no time given, set it to 23:59 |
|
334 self.__remoteModified.setTime(QTime(23, 59)) |
|
335 self.changed.emit() |
|
336 elif not fileName.endswith("_custom"): |
|
337 self.__lastUpdate = QDateTime() |
|
338 |
|
339 self.checkForUpdate() |
|
340 |
|
341 def checkForUpdate(self): |
|
342 """ |
|
343 Public method to check for an update. |
|
344 """ |
|
345 if self.__updatePeriod: |
|
346 updatePeriod = self.__updatePeriod |
|
347 else: |
|
348 updatePeriod = \ |
|
349 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24 |
|
350 if not self.__lastUpdate.isValid() or \ |
|
351 (self.__remoteModified.isValid() and |
|
352 self.__remoteModified.addSecs(updatePeriod * 3600) < |
|
353 QDateTime.currentDateTime()) or \ |
|
354 self.__lastUpdate.addSecs(updatePeriod * 3600) < \ |
|
355 QDateTime.currentDateTime(): |
|
356 self.updateNow() |
|
357 |
|
358 def updateNow(self): |
|
359 """ |
|
360 Public method to update the subscription immediately. |
|
361 """ |
|
362 if self.__downloading is not None: |
|
363 return |
|
364 |
|
365 if not self.location().isValid(): |
|
366 return |
|
367 |
|
368 if self.location().scheme() == "file": |
|
369 self.__lastUpdate = QDateTime.currentDateTime() |
|
370 self.__loadRules() |
|
371 return |
|
372 |
|
373 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
374 reply = WebBrowserWindow.networkManager().get( |
|
375 QNetworkRequest(self.location())) |
|
376 reply.finished.connect( |
|
377 lambda: self.__rulesDownloaded(reply)) |
|
378 self.__downloading = reply |
|
379 |
|
380 def __rulesDownloaded(self, reply): |
|
381 """ |
|
382 Private slot to deal with the downloaded rules. |
|
383 |
|
384 @param reply reference to the network reply |
|
385 @type QNetworkReply |
|
386 """ |
|
387 response = reply.readAll() |
|
388 reply.close() |
|
389 self.__downloading = None |
|
390 |
|
391 if reply.error() != QNetworkReply.NoError: |
|
392 if not self.__defaultSubscription: |
|
393 # don't show error if we try to load the default |
|
394 E5MessageBox.warning( |
|
395 None, |
|
396 self.tr("Downloading subscription rules"), |
|
397 self.tr( |
|
398 """<p>Subscription rules could not be""" |
|
399 """ downloaded.</p><p>Error: {0}</p>""") |
|
400 .format(reply.errorString())) |
|
401 else: |
|
402 # reset after first download attempt |
|
403 self.__defaultSubscription = False |
|
404 return |
|
405 |
|
406 if response.isEmpty(): |
|
407 E5MessageBox.warning( |
|
408 None, |
|
409 self.tr("Downloading subscription rules"), |
|
410 self.tr("""Got empty subscription rules.""")) |
|
411 return |
|
412 |
|
413 fileName = self.rulesFileName() |
|
414 QFile.remove(fileName) |
|
415 f = QFile(fileName) |
|
416 if not f.open(QIODevice.ReadWrite): |
|
417 E5MessageBox.warning( |
|
418 None, |
|
419 self.tr("Downloading subscription rules"), |
|
420 self.tr( |
|
421 """Unable to open AdBlock file '{0}' for writing.""") |
|
422 .file(fileName)) |
|
423 return |
|
424 |
|
425 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
426 if WebBrowserWindow.adBlockManager().useLimitedEasyList() and \ |
|
427 self.url().toString().startswith( |
|
428 WebBrowserWindow.adBlockManager().getDefaultSubscriptionUrl()): |
|
429 limited = True |
|
430 # ignore Third-party advertisers rules for performance |
|
431 # whitelist rules at the end will be used |
|
432 index = response.indexOf( |
|
433 "!---------------------------" |
|
434 "Third-party advertisers" |
|
435 "---------------------------!") |
|
436 part1 = response.left(index) |
|
437 index = response.indexOf( |
|
438 "!-----------------------" |
|
439 "Whitelists to fix broken sites" |
|
440 "------------------------!") |
|
441 part2 = response.mid(index) |
|
442 f.write(part1) |
|
443 f.write(part2) |
|
444 else: |
|
445 limited = False |
|
446 f.write(response) |
|
447 f.close() |
|
448 self.__lastUpdate = QDateTime.currentDateTime() |
|
449 if limited or self.__validateCheckSum(fileName): |
|
450 self.__loadRules() |
|
451 else: |
|
452 QFile.remove(fileName) |
|
453 self.__downloading = None |
|
454 reply.deleteLater() |
|
455 |
|
456 def __validateCheckSum(self, fileName): |
|
457 """ |
|
458 Private method to check the subscription file's checksum. |
|
459 |
|
460 @param fileName name of the file containing the subscription |
|
461 @type str |
|
462 @return flag indicating a valid file. A file is considered |
|
463 valid, if the checksum is OK, the file does not contain a |
|
464 checksum (i.e. cannot be checked) or we are using the limited |
|
465 EasyList (because we fiddled with the original). |
|
466 @rtype bool |
|
467 """ |
|
468 try: |
|
469 f = open(fileName, "r", encoding="utf-8") |
|
470 data = f.read() |
|
471 f.close() |
|
472 except (IOError, OSError): |
|
473 return False |
|
474 |
|
475 match = re.search(self.__checksumRe, data) |
|
476 if match: |
|
477 expectedChecksum = match.group(1) |
|
478 else: |
|
479 # consider it as valid |
|
480 return True |
|
481 |
|
482 # normalize the data |
|
483 data = re.sub(r"\r", "", data) # normalize eol |
|
484 data = re.sub(r"\n+", "\n", data) # remove empty lines |
|
485 data = re.sub(self.__checksumRe, "", data) # remove checksum line |
|
486 |
|
487 # calculate checksum |
|
488 md5 = hashlib.md5() |
|
489 md5.update(data.encode("utf-8")) |
|
490 calculatedChecksum = base64.b64encode(md5.digest()).decode()\ |
|
491 .rstrip("=") |
|
492 if calculatedChecksum == expectedChecksum: |
|
493 return True |
|
494 else: |
|
495 res = E5MessageBox.yesNo( |
|
496 None, |
|
497 self.tr("Downloading subscription rules"), |
|
498 self.tr( |
|
499 """<p>AdBlock subscription <b>{0}</b> has a wrong""" |
|
500 """ checksum.<br/>""" |
|
501 """Found: {1}<br/>""" |
|
502 """Calculated: {2}<br/>""" |
|
503 """Use it anyway?</p>""") |
|
504 .format(self.__title, expectedChecksum, |
|
505 calculatedChecksum)) |
|
506 return res |
|
507 |
|
508 def saveRules(self): |
|
509 """ |
|
510 Public method to save the subscription rules. |
|
511 """ |
|
512 fileName = self.rulesFileName() |
|
513 if not fileName: |
|
514 return |
|
515 |
|
516 f = QFile(fileName) |
|
517 if not f.open(QIODevice.ReadWrite | QIODevice.Truncate): |
|
518 E5MessageBox.warning( |
|
519 None, |
|
520 self.tr("Saving subscription rules"), |
|
521 self.tr( |
|
522 """Unable to open AdBlock file '{0}' for writing.""") |
|
523 .format(fileName)) |
|
524 return |
|
525 |
|
526 textStream = QTextStream(f) |
|
527 if not self.__rules or not self.__rules[0].isHeader(): |
|
528 textStream << "[Adblock Plus 1.1.1]\n" |
|
529 for rule in self.__rules: |
|
530 textStream << rule.filter() << "\n" |
|
531 |
|
532 def rule(self, offset): |
|
533 """ |
|
534 Public method to get a specific rule. |
|
535 |
|
536 @param offset offset of the rule |
|
537 @type int |
|
538 @return requested rule |
|
539 @rtype AdBlockRule |
|
540 """ |
|
541 if offset >= len(self.__rules): |
|
542 return None |
|
543 |
|
544 return self.__rules[offset] |
|
545 |
|
546 def allRules(self): |
|
547 """ |
|
548 Public method to get the list of rules. |
|
549 |
|
550 @return list of rules |
|
551 @rtype list of AdBlockRule |
|
552 """ |
|
553 return self.__rules[:] |
|
554 |
|
555 def addRule(self, rule): |
|
556 """ |
|
557 Public method to add a rule. |
|
558 |
|
559 @param rule reference to the rule to add |
|
560 @type AdBlockRule |
|
561 @return offset of the rule |
|
562 @rtype int |
|
563 """ |
|
564 self.__rules.append(rule) |
|
565 self.rulesChanged.emit() |
|
566 |
|
567 return len(self.__rules) - 1 |
|
568 |
|
569 def removeRule(self, offset): |
|
570 """ |
|
571 Public method to remove a rule given the offset. |
|
572 |
|
573 @param offset offset of the rule to remove |
|
574 @type int |
|
575 """ |
|
576 if offset < 0 or offset > len(self.__rules): |
|
577 return |
|
578 |
|
579 del self.__rules[offset] |
|
580 self.rulesChanged.emit() |
|
581 |
|
582 def replaceRule(self, rule, offset): |
|
583 """ |
|
584 Public method to replace a rule given the offset. |
|
585 |
|
586 @param rule reference to the rule to set |
|
587 @type AdBlockRule |
|
588 @param offset offset of the rule to remove |
|
589 @type int |
|
590 @return requested rule |
|
591 @rtype AdBlockRule |
|
592 """ |
|
593 if offset >= len(self.__rules): |
|
594 return None |
|
595 |
|
596 self.__rules[offset] = rule |
|
597 self.rulesChanged.emit() |
|
598 |
|
599 return self.__rules[offset] |
|
600 |
|
601 def canEditRules(self): |
|
602 """ |
|
603 Public method to check, if rules can be edited. |
|
604 |
|
605 @return flag indicating rules may be edited |
|
606 @rtype bool |
|
607 """ |
|
608 return self.__custom |
|
609 |
|
610 def canBeRemoved(self): |
|
611 """ |
|
612 Public method to check, if the subscription can be removed. |
|
613 |
|
614 @return flag indicating removal is allowed |
|
615 @rtype bool |
|
616 """ |
|
617 return not self.__custom and not self.__defaultSubscription |
|
618 |
|
619 def setRuleEnabled(self, offset, enabled): |
|
620 """ |
|
621 Public method to enable a specific rule. |
|
622 |
|
623 @param offset offset of the rule |
|
624 @type int |
|
625 @param enabled new enabled state |
|
626 @type bool |
|
627 @return reference to the changed rule |
|
628 @rtype AdBlockRule |
|
629 """ |
|
630 if offset >= len(self.__rules): |
|
631 return None |
|
632 |
|
633 rule = self.__rules[offset] |
|
634 rule.setEnabled(enabled) |
|
635 self.rulesEnabledChanged.emit() |
|
636 |
|
637 if rule.isCSSRule(): |
|
638 from WebBrowser.WebBrowserWindow import WebBrowserWindow |
|
639 WebBrowserWindow.mainWindow().reloadUserStyleSheet() |
|
640 |
|
641 return rule |