WebBrowser/AdBlock/AdBlockSubscription.py

branch
QtWebEngine
changeset 4847
a1a8eac81b54
parent 4631
5c1a96925da4
child 4858
19dff9c9cf26
equal deleted inserted replaced
4846:960e5e18894b 4847:a1a8eac81b54
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2016 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock subscription class.
8 """
9
10 from __future__ import unicode_literals
11
12 import os
13 import re
14 import hashlib
15 import base64
16
17 from PyQt5.QtCore import pyqtSignal, Qt, QObject, QByteArray, QDateTime, \
18 QUrl, QUrlQuery, QCryptographicHash, QFile, QIODevice, QTextStream, \
19 QDate, QTime, qVersion
20 from PyQt5.QtNetwork import QNetworkReply
21
22 from E5Gui import E5MessageBox
23
24 import Utilities
25 import Preferences
26
27
28 class AdBlockSubscription(QObject):
29 """
30 Class implementing the AdBlock subscription.
31
32 @signal changed() emitted after the subscription has changed
33 @signal rulesChanged() emitted after the subscription's rules have changed
34 @signal enabledChanged(bool) emitted after the enabled state was changed
35 """
36 changed = pyqtSignal()
37 rulesChanged = pyqtSignal()
38 enabledChanged = pyqtSignal(bool)
39
40 def __init__(self, url, custom, parent=None, default=False):
41 """
42 Constructor
43
44 @param url AdBlock URL for the subscription (QUrl)
45 @param custom flag indicating a custom subscription (boolean)
46 @param parent reference to the parent object (QObject)
47 @param default flag indicating a default subscription (boolean)
48 """
49 super(AdBlockSubscription, self).__init__(parent)
50
51 self.__custom = custom
52 self.__url = url.toEncoded()
53 self.__enabled = False
54 self.__downloading = None
55 self.__defaultSubscription = default
56
57 self.__title = ""
58 self.__location = QByteArray()
59 self.__lastUpdate = QDateTime()
60 self.__requiresLocation = ""
61 self.__requiresTitle = ""
62
63 self.__updatePeriod = 0 # update period in hours, 0 = use default
64 self.__remoteModified = QDateTime()
65
66 self.__rules = [] # list containing all AdBlock rules
67
68 self.__networkExceptionRules = []
69 self.__networkBlockRules = []
70 self.__domainRestrictedCssRules = []
71 self.__elementHidingRules = ""
72 self.__documentRules = []
73 self.__elemhideRules = []
74
75 self.__checksumRe = re.compile(
76 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""",
77 re.IGNORECASE | re.MULTILINE)
78 self.__expiresRe = re.compile(
79 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""",
80 re.IGNORECASE)
81 self.__remoteModifiedRe = re.compile(
82 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*"""
83 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*"""
84 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""",
85 re.IGNORECASE)
86
87 self.__monthNameToNumber = {
88 "Jan": 1,
89 "Feb": 2,
90 "Mar": 3,
91 "Apr": 4,
92 "May": 5,
93 "Jun": 6,
94 "Jul": 7,
95 "Aug": 8,
96 "Sep": 9,
97 "Oct": 10,
98 "Nov": 11,
99 "Dec": 12
100 }
101
102 self.__parseUrl(url)
103
104 def __parseUrl(self, url):
105 """
106 Private method to parse the AdBlock URL for the subscription.
107
108 @param url AdBlock URL for the subscription (QUrl)
109 """
110 if url.scheme() != "abp":
111 return
112
113 if url.path() != "subscribe":
114 return
115
116 urlQuery = QUrlQuery(url)
117 self.__title = QUrl.fromPercentEncoding(
118 QByteArray(urlQuery.queryItemValue("title").encode()))
119 self.__enabled = urlQuery.queryItemValue("enabled") != "false"
120 self.__location = QByteArray(QUrl.fromPercentEncoding(
121 QByteArray(urlQuery.queryItemValue("location").encode()))
122 .encode("utf-8"))
123
124 # Check for required subscription
125 self.__requiresLocation = QUrl.fromPercentEncoding(
126 QByteArray(urlQuery.queryItemValue(
127 "requiresLocation").encode()))
128 self.__requiresTitle = QUrl.fromPercentEncoding(
129 QByteArray(urlQuery.queryItemValue("requiresTitle").encode()))
130 if self.__requiresLocation and self.__requiresTitle:
131 import Helpviewer.HelpWindow
132 Helpviewer.HelpWindow.HelpWindow.adBlockManager()\
133 .loadRequiredSubscription(self.__requiresLocation,
134 self.__requiresTitle)
135
136 lastUpdateString = urlQuery.queryItemValue("lastUpdate")
137 self.__lastUpdate = QDateTime.fromString(lastUpdateString,
138 Qt.ISODate)
139
140 self.__loadRules()
141
142 def url(self):
143 """
144 Public method to generate the URL for this subscription.
145
146 @return AdBlock URL for the subscription (QUrl)
147 """
148 url = QUrl()
149 url.setScheme("abp")
150 url.setPath("subscribe")
151
152 queryItems = []
153 queryItems.append(("location", bytes(self.__location).decode()))
154 queryItems.append(("title", self.__title))
155 if self.__requiresLocation and self.__requiresTitle:
156 queryItems.append(("requiresLocation", self.__requiresLocation))
157 queryItems.append(("requiresTitle", self.__requiresTitle))
158 if not self.__enabled:
159 queryItems.append(("enabled", "false"))
160 if self.__lastUpdate.isValid():
161 queryItems.append(("lastUpdate",
162 self.__lastUpdate.toString(Qt.ISODate)))
163
164 query = QUrlQuery()
165 query.setQueryItems(queryItems)
166 url.setQuery(query)
167 return url
168
169 def isEnabled(self):
170 """
171 Public method to check, if the subscription is enabled.
172
173 @return flag indicating the enabled status (boolean)
174 """
175 return self.__enabled
176
177 def setEnabled(self, enabled):
178 """
179 Public method to set the enabled status.
180
181 @param enabled flag indicating the enabled status (boolean)
182 """
183 if self.__enabled == enabled:
184 return
185
186 self.__enabled = enabled
187 self.enabledChanged.emit(enabled)
188
189 def title(self):
190 """
191 Public method to get the subscription title.
192
193 @return subscription title (string)
194 """
195 return self.__title
196
197 def setTitle(self, title):
198 """
199 Public method to set the subscription title.
200
201 @param title subscription title (string)
202 """
203 if self.__title == title:
204 return
205
206 self.__title = title
207 self.changed.emit()
208
209 def location(self):
210 """
211 Public method to get the subscription location.
212
213 @return URL of the subscription location (QUrl)
214 """
215 return QUrl.fromEncoded(self.__location)
216
217 def setLocation(self, url):
218 """
219 Public method to set the subscription location.
220
221 @param url URL of the subscription location (QUrl)
222 """
223 if url == self.location():
224 return
225
226 self.__location = url.toEncoded()
227 self.__lastUpdate = QDateTime()
228 self.changed.emit()
229
230 def requiresLocation(self):
231 """
232 Public method to get the location of a required subscription.
233
234 @return location of a required subscription (string)
235 """
236 return self.__requiresLocation
237
238 def lastUpdate(self):
239 """
240 Public method to get the date and time of the last update.
241
242 @return date and time of the last update (QDateTime)
243 """
244 return self.__lastUpdate
245
246 def rulesFileName(self):
247 """
248 Public method to get the name of the rules file.
249
250 @return name of the rules file (string)
251 """
252 if self.location().scheme() == "file":
253 return self.location().toLocalFile()
254
255 if self.__location.isEmpty():
256 return ""
257
258 sha1 = bytes(QCryptographicHash.hash(
259 self.__location, QCryptographicHash.Sha1).toHex()).decode()
260 dataDir = os.path.join(
261 Utilities.getConfigDir(), "web_browser", "subscriptions")
262 if not os.path.exists(dataDir):
263 os.makedirs(dataDir)
264 fileName = os.path.join(
265 dataDir, "adblock_subscription_{0}".format(sha1))
266 return fileName
267
268 def __loadRules(self):
269 """
270 Private method to load the rules of the subscription.
271 """
272 fileName = self.rulesFileName()
273 f = QFile(fileName)
274 if f.exists():
275 if not f.open(QIODevice.ReadOnly):
276 E5MessageBox.warning(
277 None,
278 self.tr("Load subscription rules"),
279 self.tr(
280 """Unable to open AdBlock file '{0}' for reading.""")
281 .format(fileName))
282 else:
283 textStream = QTextStream(f)
284 header = textStream.readLine(1024)
285 if not header.startswith("[Adblock"):
286 E5MessageBox.warning(
287 None,
288 self.tr("Load subscription rules"),
289 self.tr("""AdBlock file '{0}' does not start"""
290 """ with [Adblock.""")
291 .format(fileName))
292 f.close()
293 f.remove()
294 self.__lastUpdate = QDateTime()
295 else:
296 from .AdBlockRule import AdBlockRule
297
298 self.__updatePeriod = 0
299 self.__remoteModified = QDateTime()
300 self.__rules = []
301 self.__rules.append(AdBlockRule(header, self))
302 while not textStream.atEnd():
303 line = textStream.readLine()
304 self.__rules.append(AdBlockRule(line, self))
305 expires = self.__expiresRe.search(line)
306 if expires:
307 period, kind = expires.groups()
308 if kind:
309 # hours
310 self.__updatePeriod = int(period)
311 else:
312 # days
313 self.__updatePeriod = int(period) * 24
314 remoteModified = self.__remoteModifiedRe.search(line)
315 if remoteModified:
316 day, month, year, time, hour, minute = \
317 remoteModified.groups()
318 self.__remoteModified.setDate(
319 QDate(int(year),
320 self.__monthNameToNumber[month],
321 int(day))
322 )
323 if time:
324 self.__remoteModified.setTime(
325 QTime(int(hour), int(minute)))
326 self.__populateCache()
327 self.changed.emit()
328 elif not fileName.endswith("_custom"):
329 self.__lastUpdate = QDateTime()
330
331 self.checkForUpdate()
332
333 def checkForUpdate(self):
334 """
335 Public method to check for an update.
336 """
337 if self.__updatePeriod:
338 updatePeriod = self.__updatePeriod
339 else:
340 updatePeriod = \
341 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24
342 if not self.__lastUpdate.isValid() or \
343 (self.__remoteModified.isValid() and
344 self.__remoteModified.addSecs(updatePeriod * 3600) <
345 QDateTime.currentDateTime()) or \
346 self.__lastUpdate.addSecs(updatePeriod * 3600) < \
347 QDateTime.currentDateTime():
348 self.updateNow()
349
350 def updateNow(self):
351 """
352 Public method to update the subscription immediately.
353 """
354 if self.__downloading is not None:
355 return
356
357 if not self.location().isValid():
358 return
359
360 if self.location().scheme() == "file":
361 self.__lastUpdate = QDateTime.currentDateTime()
362 self.__loadRules()
363 return
364
365 from WebBrowser.WebBrowserWindow import WebBrowserWindow
366 from WebBrowser.Network.FollowRedirectReply import FollowRedirectReply
367 self.__downloading = FollowRedirectReply(
368 self.location(),
369 WebBrowserWindow.networkManager())
370 self.__downloading.finished.connect(self.__rulesDownloaded)
371
372 def __rulesDownloaded(self):
373 """
374 Private slot to deal with the downloaded rules.
375 """
376 reply = self.sender()
377
378 response = reply.readAll()
379 reply.close()
380 self.__downloading = None
381
382 if reply.error() != QNetworkReply.NoError:
383 if not self.__defaultSubscription:
384 # don't show error if we try to load the default
385 E5MessageBox.warning(
386 None,
387 self.tr("Downloading subscription rules"),
388 self.tr(
389 """<p>Subscription rules could not be"""
390 """ downloaded.</p><p>Error: {0}</p>""")
391 .format(reply.errorString()))
392 else:
393 # reset after first download attempt
394 self.__defaultSubscription = False
395 return
396
397 if response.isEmpty():
398 E5MessageBox.warning(
399 None,
400 self.tr("Downloading subscription rules"),
401 self.tr("""Got empty subscription rules."""))
402 return
403
404 fileName = self.rulesFileName()
405 QFile.remove(fileName)
406 f = QFile(fileName)
407 if not f.open(QIODevice.ReadWrite):
408 E5MessageBox.warning(
409 None,
410 self.tr("Downloading subscription rules"),
411 self.tr(
412 """Unable to open AdBlock file '{0}' for writing.""")
413 .file(fileName))
414 return
415 f.write(response)
416 f.close()
417 self.__lastUpdate = QDateTime.currentDateTime()
418 if self.__validateCheckSum(fileName):
419 self.__loadRules()
420 else:
421 QFile.remove(fileName)
422 self.__downloading = None
423 reply.deleteLater()
424
425 def __validateCheckSum(self, fileName):
426 """
427 Private method to check the subscription file's checksum.
428
429 @param fileName name of the file containing the subscription (string)
430 @return flag indicating a valid file (boolean). A file is considered
431 valid, if the checksum is OK or the file does not contain a
432 checksum (i.e. cannot be checked).
433 """
434 try:
435 f = open(fileName, "r", encoding="utf-8")
436 data = f.read()
437 f.close()
438 except (IOError, OSError):
439 return False
440
441 match = re.search(self.__checksumRe, data)
442 if match:
443 expectedChecksum = match.group(1)
444 else:
445 # consider it as valid
446 return True
447
448 # normalize the data
449 data = re.sub(r"\r", "", data) # normalize eol
450 data = re.sub(r"\n+", "\n", data) # remove empty lines
451 data = re.sub(self.__checksumRe, "", data) # remove checksum line
452
453 # calculate checksum
454 md5 = hashlib.md5()
455 md5.update(data.encode("utf-8"))
456 calculatedChecksum = base64.b64encode(md5.digest()).decode()\
457 .rstrip("=")
458 if calculatedChecksum == expectedChecksum:
459 return True
460 else:
461 res = E5MessageBox.yesNo(
462 None,
463 self.tr("Downloading subscription rules"),
464 self.tr(
465 """<p>AdBlock subscription <b>{0}</b> has a wrong"""
466 """ checksum.<br/>"""
467 """Found: {1}<br/>"""
468 """Calculated: {2}<br/>"""
469 """Use it anyway?</p>""")
470 .format(self.__title, expectedChecksum,
471 calculatedChecksum))
472 return res
473
474 def saveRules(self):
475 """
476 Public method to save the subscription rules.
477 """
478 fileName = self.rulesFileName()
479 if not fileName:
480 return
481
482 f = QFile(fileName)
483 if not f.open(QIODevice.ReadWrite | QIODevice.Truncate):
484 E5MessageBox.warning(
485 None,
486 self.tr("Saving subscription rules"),
487 self.tr(
488 """Unable to open AdBlock file '{0}' for writing.""")
489 .format(fileName))
490 return
491
492 textStream = QTextStream(f)
493 if not self.__rules or not self.__rules[0].isHeader():
494 textStream << "[Adblock Plus 1.1.1]\n"
495 for rule in self.__rules:
496 textStream << rule.filter() << "\n"
497
498 def match(self, req, urlDomain, urlString):
499 """
500 Public method to check the subscription for a matching rule.
501
502 @param req reference to the network request (QNetworkRequest)
503 @param urlDomain domain of the URL (string)
504 @param urlString URL (string)
505 @return reference to the rule object or None (AdBlockRule)
506 """
507 for rule in self.__networkExceptionRules:
508 if rule.networkMatch(req, urlDomain, urlString):
509 return None
510
511 for rule in self.__networkBlockRules:
512 if rule.networkMatch(req, urlDomain, urlString):
513 return rule
514
515 return None
516
517 def adBlockDisabledForUrl(self, url):
518 """
519 Public method to check, if AdBlock is disabled for the given URL.
520
521 @param url URL to check (QUrl)
522 @return flag indicating disabled state (boolean)
523 """
524 for rule in self.__documentRules:
525 if rule.urlMatch(url):
526 return True
527
528 return False
529
530 def elemHideDisabledForUrl(self, url):
531 """
532 Public method to check, if element hiding is disabled for the given
533 URL.
534
535 @param url URL to check (QUrl)
536 @return flag indicating disabled state (boolean)
537 """
538 if self.adBlockDisabledForUrl(url):
539 return True
540
541 for rule in self.__elemhideRules:
542 if rule.urlMatch(url):
543 return True
544
545 return False
546
547 def elementHidingRules(self):
548 """
549 Public method to get the element hiding rules.
550
551 @return element hiding rules (string)
552 """
553 return self.__elementHidingRules
554
555 def elementHidingRulesForDomain(self, domain):
556 """
557 Public method to get the element hiding rules for the given domain.
558
559 @param domain domain name (string)
560 @return element hiding rules (string)
561 """
562 rules = ""
563
564 for rule in self.__domainRestrictedCssRules:
565 if rule.matchDomain(domain):
566 rules += rule.cssSelector() + ","
567
568 return rules
569
570 def rule(self, offset):
571 """
572 Public method to get a specific rule.
573
574 @param offset offset of the rule (integer)
575 @return requested rule (AdBlockRule)
576 """
577 if offset >= len(self.__rules):
578 return None
579
580 return self.__rules[offset]
581
582 def allRules(self):
583 """
584 Public method to get the list of rules.
585
586 @return list of rules (list of AdBlockRule)
587 """
588 return self.__rules[:]
589
590 def addRule(self, rule):
591 """
592 Public method to add a rule.
593
594 @param rule reference to the rule to add (AdBlockRule)
595 @return offset of the rule (integer)
596 """
597 self.__rules.append(rule)
598 self.__populateCache()
599 self.rulesChanged.emit()
600
601 return len(self.__rules) - 1
602
603 def removeRule(self, offset):
604 """
605 Public method to remove a rule given the offset.
606
607 @param offset offset of the rule to remove (integer)
608 """
609 if offset < 0 or offset > len(self.__rules):
610 return
611
612 del self.__rules[offset]
613 self.__populateCache()
614 self.rulesChanged.emit()
615
616 def replaceRule(self, rule, offset):
617 """
618 Public method to replace a rule given the offset.
619
620 @param rule reference to the rule to set (AdBlockRule)
621 @param offset offset of the rule to remove (integer)
622 @return requested rule (AdBlockRule)
623 """
624 if offset >= len(self.__rules):
625 return None
626
627 self.__rules[offset] = rule
628 self.__populateCache()
629 self.rulesChanged.emit()
630
631 return self.__rules[offset]
632
633 def __populateCache(self):
634 """
635 Private method to populate the various rule caches.
636 """
637 self.__networkExceptionRules = []
638 self.__networkBlockRules = []
639 self.__domainRestrictedCssRules = []
640 self.__elementHidingRules = ""
641 self.__documentRules = []
642 self.__elemhideRules = []
643
644 for rule in self.__rules:
645 if not rule.isEnabled():
646 continue
647
648 if rule.isCSSRule():
649 if rule.isDomainRestricted():
650 self.__domainRestrictedCssRules.append(rule)
651 else:
652 self.__elementHidingRules += rule.cssSelector() + ","
653 elif rule.isDocument():
654 self.__documentRules.append(rule)
655 elif rule.isElementHiding():
656 self.__elemhideRules.append(rule)
657 elif rule.isException():
658 self.__networkExceptionRules.append(rule)
659 else:
660 self.__networkBlockRules.append(rule)
661
662 def canEditRules(self):
663 """
664 Public method to check, if rules can be edited.
665
666 @return flag indicating rules may be edited (boolean)
667 """
668 return self.__custom
669
670 def canBeRemoved(self):
671 """
672 Public method to check, if the subscription can be removed.
673
674 @return flag indicating removal is allowed (boolean)
675 """
676 return not self.__custom and not self.__defaultSubscription
677
678 def setRuleEnabled(self, offset, enabled):
679 """
680 Public method to enable a specific rule.
681
682 @param offset offset of the rule (integer)
683 @param enabled new enabled state (boolean)
684 @return reference to the changed rule (AdBlockRule)
685 """
686 if offset >= len(self.__rules):
687 return None
688
689 rule = self.__rules[offset]
690 rule.setEnabled(enabled)
691 if rule.isCSSRule():
692 from WebBrowser.WebBrowserWindow import WebBrowserWindow
693 self.__populateCache()
694 WebBrowserWindow.mainWindow().reloadUserStyleSheet()
695
696 return rule

eric ide

mercurial