eric6/Helpviewer/AdBlock/AdBlockSubscription.py

changeset 6942
2602857055c5
parent 6645
ad476851d7e0
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock subscription class.
8 """
9
10 from __future__ import unicode_literals
11
12 import os
13 import re
14 import hashlib
15 import base64
16
17 from PyQt5.QtCore import pyqtSignal, Qt, QObject, QByteArray, QDateTime, \
18 QUrl, QCryptographicHash, QFile, QIODevice, QTextStream, QDate, QTime
19 from PyQt5.QtNetwork import QNetworkReply
20
21 from E5Gui import E5MessageBox
22
23 import Utilities
24 import Preferences
25 from Globals import qVersionTuple
26
27
28 class AdBlockSubscription(QObject):
29 """
30 Class implementing the AdBlock subscription.
31
32 @signal changed() emitted after the subscription has changed
33 @signal rulesChanged() emitted after the subscription's rules have changed
34 @signal enabledChanged(bool) emitted after the enabled state was changed
35 """
36 changed = pyqtSignal()
37 rulesChanged = pyqtSignal()
38 enabledChanged = pyqtSignal(bool)
39
40 def __init__(self, url, custom, parent=None, default=False):
41 """
42 Constructor
43
44 @param url AdBlock URL for the subscription (QUrl)
45 @param custom flag indicating a custom subscription (boolean)
46 @param parent reference to the parent object (QObject)
47 @param default flag indicating a default subscription (boolean)
48 """
49 super(AdBlockSubscription, self).__init__(parent)
50
51 self.__custom = custom
52 self.__url = url.toEncoded()
53 self.__enabled = False
54 self.__downloading = None
55 self.__defaultSubscription = default
56
57 self.__title = ""
58 self.__location = QByteArray()
59 self.__lastUpdate = QDateTime()
60 self.__requiresLocation = ""
61 self.__requiresTitle = ""
62
63 self.__updatePeriod = 0 # update period in hours, 0 = use default
64 self.__remoteModified = QDateTime()
65
66 self.__rules = [] # list containing all AdBlock rules
67
68 self.__networkExceptionRules = []
69 self.__networkBlockRules = []
70 self.__domainRestrictedCssRules = []
71 self.__elementHidingRules = ""
72 self.__documentRules = []
73 self.__elemhideRules = []
74
75 self.__checksumRe = re.compile(
76 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""",
77 re.IGNORECASE | re.MULTILINE)
78 self.__expiresRe = re.compile(
79 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""",
80 re.IGNORECASE)
81 self.__remoteModifiedRe = re.compile(
82 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*"""
83 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*"""
84 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""",
85 re.IGNORECASE)
86
87 self.__monthNameToNumber = {
88 "Jan": 1,
89 "Feb": 2,
90 "Mar": 3,
91 "Apr": 4,
92 "May": 5,
93 "Jun": 6,
94 "Jul": 7,
95 "Aug": 8,
96 "Sep": 9,
97 "Oct": 10,
98 "Nov": 11,
99 "Dec": 12
100 }
101
102 self.__parseUrl(url)
103
104 def __parseUrl(self, url):
105 """
106 Private method to parse the AdBlock URL for the subscription.
107
108 @param url AdBlock URL for the subscription (QUrl)
109 """
110 if url.scheme() != "abp":
111 return
112
113 if url.path() != "subscribe":
114 return
115
116 if qVersionTuple() >= (5, 0, 0):
117 from PyQt5.QtCore import QUrlQuery
118 urlQuery = QUrlQuery(url)
119 self.__title = QUrl.fromPercentEncoding(
120 QByteArray(urlQuery.queryItemValue("title").encode()))
121 self.__enabled = urlQuery.queryItemValue("enabled") != "false"
122 self.__location = QByteArray(QUrl.fromPercentEncoding(
123 QByteArray(urlQuery.queryItemValue("location").encode()))
124 .encode("utf-8"))
125
126 # Check for required subscription
127 self.__requiresLocation = QUrl.fromPercentEncoding(
128 QByteArray(urlQuery.queryItemValue(
129 "requiresLocation").encode()))
130 self.__requiresTitle = QUrl.fromPercentEncoding(
131 QByteArray(urlQuery.queryItemValue("requiresTitle").encode()))
132 if self.__requiresLocation and self.__requiresTitle:
133 import Helpviewer.HelpWindow
134 Helpviewer.HelpWindow.HelpWindow.adBlockManager()\
135 .loadRequiredSubscription(self.__requiresLocation,
136 self.__requiresTitle)
137
138 lastUpdateString = urlQuery.queryItemValue("lastUpdate")
139 self.__lastUpdate = QDateTime.fromString(lastUpdateString,
140 Qt.ISODate)
141 else:
142 self.__title = \
143 QUrl.fromPercentEncoding(url.encodedQueryItemValue(b"title"))
144 self.__enabled = QUrl.fromPercentEncoding(
145 url.encodedQueryItemValue(b"enabled")) != "false"
146 self.__location = QByteArray(QUrl.fromPercentEncoding(
147 url.encodedQueryItemValue(b"location")).encode("utf-8"))
148
149 # Check for required subscription
150 self.__requiresLocation = QUrl.fromPercentEncoding(
151 url.encodedQueryItemValue(b"requiresLocation"))
152 self.__requiresTitle = QUrl.fromPercentEncoding(
153 url.encodedQueryItemValue(b"requiresTitle"))
154 if self.__requiresLocation and self.__requiresTitle:
155 import Helpviewer.HelpWindow
156 Helpviewer.HelpWindow.HelpWindow.adBlockManager()\
157 .loadRequiredSubscription(self.__requiresLocation,
158 self.__requiresTitle)
159
160 lastUpdateByteArray = url.encodedQueryItemValue(b"lastUpdate")
161 lastUpdateString = QUrl.fromPercentEncoding(lastUpdateByteArray)
162 self.__lastUpdate = QDateTime.fromString(lastUpdateString,
163 Qt.ISODate)
164
165 self.__loadRules()
166
167 def url(self):
168 """
169 Public method to generate the URL for this subscription.
170
171 @return AdBlock URL for the subscription (QUrl)
172 """
173 url = QUrl()
174 url.setScheme("abp")
175 url.setPath("subscribe")
176
177 queryItems = []
178 queryItems.append(("location", bytes(self.__location).decode()))
179 queryItems.append(("title", self.__title))
180 if self.__requiresLocation and self.__requiresTitle:
181 queryItems.append(("requiresLocation", self.__requiresLocation))
182 queryItems.append(("requiresTitle", self.__requiresTitle))
183 if not self.__enabled:
184 queryItems.append(("enabled", "false"))
185 if self.__lastUpdate.isValid():
186 queryItems.append(("lastUpdate",
187 self.__lastUpdate.toString(Qt.ISODate)))
188 if qVersionTuple() >= (5, 0, 0):
189 from PyQt5.QtCore import QUrlQuery
190 query = QUrlQuery()
191 query.setQueryItems(queryItems)
192 url.setQuery(query)
193 else:
194 url.setQueryItems(queryItems)
195 return url
196
197 def isEnabled(self):
198 """
199 Public method to check, if the subscription is enabled.
200
201 @return flag indicating the enabled status (boolean)
202 """
203 return self.__enabled
204
205 def setEnabled(self, enabled):
206 """
207 Public method to set the enabled status.
208
209 @param enabled flag indicating the enabled status (boolean)
210 """
211 if self.__enabled == enabled:
212 return
213
214 self.__enabled = enabled
215 self.enabledChanged.emit(enabled)
216
217 def title(self):
218 """
219 Public method to get the subscription title.
220
221 @return subscription title (string)
222 """
223 return self.__title
224
225 def setTitle(self, title):
226 """
227 Public method to set the subscription title.
228
229 @param title subscription title (string)
230 """
231 if self.__title == title:
232 return
233
234 self.__title = title
235 self.changed.emit()
236
237 def location(self):
238 """
239 Public method to get the subscription location.
240
241 @return URL of the subscription location (QUrl)
242 """
243 return QUrl.fromEncoded(self.__location)
244
245 def setLocation(self, url):
246 """
247 Public method to set the subscription location.
248
249 @param url URL of the subscription location (QUrl)
250 """
251 if url == self.location():
252 return
253
254 self.__location = url.toEncoded()
255 self.__lastUpdate = QDateTime()
256 self.changed.emit()
257
258 def requiresLocation(self):
259 """
260 Public method to get the location of a required subscription.
261
262 @return location of a required subscription (string)
263 """
264 return self.__requiresLocation
265
266 def lastUpdate(self):
267 """
268 Public method to get the date and time of the last update.
269
270 @return date and time of the last update (QDateTime)
271 """
272 return self.__lastUpdate
273
274 def rulesFileName(self):
275 """
276 Public method to get the name of the rules file.
277
278 @return name of the rules file (string)
279 """
280 if self.location().scheme() == "file":
281 return self.location().toLocalFile()
282
283 if self.__location.isEmpty():
284 return ""
285
286 sha1 = bytes(QCryptographicHash.hash(
287 self.__location, QCryptographicHash.Sha1).toHex()).decode()
288 dataDir = os.path.join(
289 Utilities.getConfigDir(), "browser", "subscriptions")
290 if not os.path.exists(dataDir):
291 os.makedirs(dataDir)
292 fileName = os.path.join(
293 dataDir, "adblock_subscription_{0}".format(sha1))
294 return fileName
295
296 def __loadRules(self):
297 """
298 Private method to load the rules of the subscription.
299 """
300 fileName = self.rulesFileName()
301 f = QFile(fileName)
302 if f.exists():
303 if not f.open(QIODevice.ReadOnly):
304 E5MessageBox.warning(
305 None,
306 self.tr("Load subscription rules"),
307 self.tr(
308 """Unable to open adblock file '{0}' for reading.""")
309 .format(fileName))
310 else:
311 textStream = QTextStream(f)
312 header = textStream.readLine(1024)
313 if not header.startswith("[Adblock"):
314 E5MessageBox.warning(
315 None,
316 self.tr("Load subscription rules"),
317 self.tr("""AdBlock file '{0}' does not start"""
318 """ with [Adblock.""")
319 .format(fileName))
320 f.close()
321 f.remove()
322 self.__lastUpdate = QDateTime()
323 else:
324 from .AdBlockRule import AdBlockRule
325
326 self.__updatePeriod = 0
327 self.__remoteModified = QDateTime()
328 self.__rules = []
329 self.__rules.append(AdBlockRule(header, self))
330 while not textStream.atEnd():
331 line = textStream.readLine()
332 self.__rules.append(AdBlockRule(line, self))
333 expires = self.__expiresRe.search(line)
334 if expires:
335 period, kind = expires.groups()
336 if kind:
337 # hours
338 self.__updatePeriod = int(period)
339 else:
340 # days
341 self.__updatePeriod = int(period) * 24
342 remoteModified = self.__remoteModifiedRe.search(line)
343 if remoteModified:
344 day, month, year, time, hour, minute = \
345 remoteModified.groups()
346 self.__remoteModified.setDate(
347 QDate(int(year),
348 self.__monthNameToNumber[month],
349 int(day))
350 )
351 if time:
352 self.__remoteModified.setTime(
353 QTime(int(hour), int(minute)))
354 self.__populateCache()
355 self.changed.emit()
356 elif not fileName.endswith("_custom"):
357 self.__lastUpdate = QDateTime()
358
359 self.checkForUpdate()
360
361 def checkForUpdate(self):
362 """
363 Public method to check for an update.
364 """
365 if self.__updatePeriod:
366 updatePeriod = self.__updatePeriod
367 else:
368 updatePeriod = Preferences.getHelp("AdBlockUpdatePeriod") * 24
369 if not self.__lastUpdate.isValid() or \
370 (self.__remoteModified.isValid() and
371 self.__remoteModified.addSecs(updatePeriod * 3600) <
372 QDateTime.currentDateTime()) or \
373 self.__lastUpdate.addSecs(updatePeriod * 3600) < \
374 QDateTime.currentDateTime():
375 self.updateNow()
376
377 def updateNow(self):
378 """
379 Public method to update the subscription immediately.
380 """
381 if self.__downloading is not None:
382 return
383
384 if not self.location().isValid():
385 return
386
387 if self.location().scheme() == "file":
388 self.__lastUpdate = QDateTime.currentDateTime()
389 self.__loadRules()
390 return
391
392 import Helpviewer.HelpWindow
393 from Helpviewer.Network.FollowRedirectReply import FollowRedirectReply
394 self.__downloading = FollowRedirectReply(
395 self.location(),
396 Helpviewer.HelpWindow.HelpWindow.networkAccessManager())
397 self.__downloading.finished.connect(
398 lambda: self.__rulesDownloaded(self.__downloading))
399
400 def __rulesDownloaded(self, reply):
401 """
402 Private slot to deal with the downloaded rules.
403
404 @param reply reference to the network reply
405 @type QNetworkReply
406 """
407 response = reply.readAll()
408 reply.close()
409 self.__downloading = None
410
411 if reply.error() != QNetworkReply.NoError:
412 if not self.__defaultSubscription:
413 # don't show error if we try to load the default
414 E5MessageBox.warning(
415 None,
416 self.tr("Downloading subscription rules"),
417 self.tr(
418 """<p>Subscription rules could not be"""
419 """ downloaded.</p><p>Error: {0}</p>""")
420 .format(reply.errorString()))
421 else:
422 # reset after first download attempt
423 self.__defaultSubscription = False
424 return
425
426 if response.isEmpty():
427 E5MessageBox.warning(
428 None,
429 self.tr("Downloading subscription rules"),
430 self.tr("""Got empty subscription rules."""))
431 return
432
433 fileName = self.rulesFileName()
434 QFile.remove(fileName)
435 f = QFile(fileName)
436 if not f.open(QIODevice.ReadWrite):
437 E5MessageBox.warning(
438 None,
439 self.tr("Downloading subscription rules"),
440 self.tr(
441 """Unable to open adblock file '{0}' for writing.""")
442 .file(fileName))
443 return
444 f.write(response)
445 f.close()
446 self.__lastUpdate = QDateTime.currentDateTime()
447 if self.__validateCheckSum(fileName):
448 self.__loadRules()
449 else:
450 QFile.remove(fileName)
451 self.__downloading = None
452 reply.deleteLater()
453
454 def __validateCheckSum(self, fileName):
455 """
456 Private method to check the subscription file's checksum.
457
458 @param fileName name of the file containing the subscription (string)
459 @return flag indicating a valid file (boolean). A file is considered
460 valid, if the checksum is OK or the file does not contain a
461 checksum (i.e. cannot be checked).
462 """
463 try:
464 f = open(fileName, "r", encoding="utf-8")
465 data = f.read()
466 f.close()
467 except (IOError, OSError):
468 return False
469
470 match = re.search(self.__checksumRe, data)
471 if match:
472 expectedChecksum = match.group(1)
473 else:
474 # consider it as valid
475 return True
476
477 # normalize the data
478 data = re.sub(r"\r", "", data) # normalize eol
479 data = re.sub(r"\n+", "\n", data) # remove empty lines
480 data = re.sub(self.__checksumRe, "", data) # remove checksum line
481
482 # calculate checksum
483 md5 = hashlib.md5()
484 md5.update(data.encode("utf-8"))
485 calculatedChecksum = base64.b64encode(md5.digest()).decode()\
486 .rstrip("=")
487 if calculatedChecksum == expectedChecksum:
488 return True
489 else:
490 res = E5MessageBox.yesNo(
491 None,
492 self.tr("Downloading subscription rules"),
493 self.tr(
494 """<p>AdBlock subscription <b>{0}</b> has a wrong"""
495 """ checksum.<br/>"""
496 """Found: {1}<br/>"""
497 """Calculated: {2}<br/>"""
498 """Use it anyway?</p>""")
499 .format(self.__title, expectedChecksum,
500 calculatedChecksum))
501 return res
502
503 def saveRules(self):
504 """
505 Public method to save the subscription rules.
506 """
507 fileName = self.rulesFileName()
508 if not fileName:
509 return
510
511 f = QFile(fileName)
512 if not f.open(QIODevice.ReadWrite | QIODevice.Truncate):
513 E5MessageBox.warning(
514 None,
515 self.tr("Saving subscription rules"),
516 self.tr(
517 """Unable to open adblock file '{0}' for writing.""")
518 .format(fileName))
519 return
520
521 textStream = QTextStream(f)
522 if not self.__rules or not self.__rules[0].isHeader():
523 textStream << "[Adblock Plus 1.1.1]\n"
524 for rule in self.__rules:
525 textStream << rule.filter() << "\n"
526
527 def match(self, req, urlDomain, urlString):
528 """
529 Public method to check the subscription for a matching rule.
530
531 @param req reference to the network request (QNetworkRequest)
532 @param urlDomain domain of the URL (string)
533 @param urlString URL (string)
534 @return reference to the rule object or None (AdBlockRule)
535 """
536 for rule in self.__networkExceptionRules:
537 if rule.networkMatch(req, urlDomain, urlString):
538 return None
539
540 for rule in self.__networkBlockRules:
541 if rule.networkMatch(req, urlDomain, urlString):
542 return rule
543
544 return None
545
546 def adBlockDisabledForUrl(self, url):
547 """
548 Public method to check, if AdBlock is disabled for the given URL.
549
550 @param url URL to check (QUrl)
551 @return flag indicating disabled state (boolean)
552 """
553 for rule in self.__documentRules:
554 if rule.urlMatch(url):
555 return True
556
557 return False
558
559 def elemHideDisabledForUrl(self, url):
560 """
561 Public method to check, if element hiding is disabled for the given
562 URL.
563
564 @param url URL to check (QUrl)
565 @return flag indicating disabled state (boolean)
566 """
567 if self.adBlockDisabledForUrl(url):
568 return True
569
570 for rule in self.__elemhideRules:
571 if rule.urlMatch(url):
572 return True
573
574 return False
575
576 def elementHidingRules(self):
577 """
578 Public method to get the element hiding rules.
579
580 @return element hiding rules (string)
581 """
582 return self.__elementHidingRules
583
584 def elementHidingRulesForDomain(self, domain):
585 """
586 Public method to get the element hiding rules for the given domain.
587
588 @param domain domain name (string)
589 @return element hiding rules (string)
590 """
591 rules = ""
592
593 for rule in self.__domainRestrictedCssRules:
594 if rule.matchDomain(domain):
595 rules += rule.cssSelector() + ","
596
597 return rules
598
599 def rule(self, offset):
600 """
601 Public method to get a specific rule.
602
603 @param offset offset of the rule (integer)
604 @return requested rule (AdBlockRule)
605 """
606 if offset >= len(self.__rules):
607 return None
608
609 return self.__rules[offset]
610
611 def allRules(self):
612 """
613 Public method to get the list of rules.
614
615 @return list of rules (list of AdBlockRule)
616 """
617 return self.__rules[:]
618
619 def addRule(self, rule):
620 """
621 Public method to add a rule.
622
623 @param rule reference to the rule to add (AdBlockRule)
624 @return offset of the rule (integer)
625 """
626 self.__rules.append(rule)
627 self.__populateCache()
628 self.rulesChanged.emit()
629
630 return len(self.__rules) - 1
631
632 def removeRule(self, offset):
633 """
634 Public method to remove a rule given the offset.
635
636 @param offset offset of the rule to remove (integer)
637 """
638 if offset < 0 or offset > len(self.__rules):
639 return
640
641 del self.__rules[offset]
642 self.__populateCache()
643 self.rulesChanged.emit()
644
645 def replaceRule(self, rule, offset):
646 """
647 Public method to replace a rule given the offset.
648
649 @param rule reference to the rule to set (AdBlockRule)
650 @param offset offset of the rule to remove (integer)
651 @return requested rule (AdBlockRule)
652 """
653 if offset >= len(self.__rules):
654 return None
655
656 self.__rules[offset] = rule
657 self.__populateCache()
658 self.rulesChanged.emit()
659
660 return self.__rules[offset]
661
662 def __populateCache(self):
663 """
664 Private method to populate the various rule caches.
665 """
666 self.__networkExceptionRules = []
667 self.__networkBlockRules = []
668 self.__domainRestrictedCssRules = []
669 self.__elementHidingRules = ""
670 self.__documentRules = []
671 self.__elemhideRules = []
672
673 for rule in self.__rules:
674 if not rule.isEnabled():
675 continue
676
677 if rule.isCSSRule():
678 if rule.isDomainRestricted():
679 self.__domainRestrictedCssRules.append(rule)
680 else:
681 self.__elementHidingRules += rule.cssSelector() + ","
682 elif rule.isDocument():
683 self.__documentRules.append(rule)
684 elif rule.isElementHiding():
685 self.__elemhideRules.append(rule)
686 elif rule.isException():
687 self.__networkExceptionRules.append(rule)
688 else:
689 self.__networkBlockRules.append(rule)
690
691 def canEditRules(self):
692 """
693 Public method to check, if rules can be edited.
694
695 @return flag indicating rules may be edited (boolean)
696 """
697 return self.__custom
698
699 def canBeRemoved(self):
700 """
701 Public method to check, if the subscription can be removed.
702
703 @return flag indicating removal is allowed (boolean)
704 """
705 return not self.__custom and not self.__defaultSubscription
706
707 def setRuleEnabled(self, offset, enabled):
708 """
709 Public method to enable a specific rule.
710
711 @param offset offset of the rule (integer)
712 @param enabled new enabled state (boolean)
713 @return reference to the changed rule (AdBlockRule)
714 """
715 if offset >= len(self.__rules):
716 return None
717
718 rule = self.__rules[offset]
719 rule.setEnabled(enabled)
720 if rule.isCSSRule():
721 import Helpviewer.HelpWindow
722 self.__populateCache()
723 Helpviewer.HelpWindow.HelpWindow.mainWindow()\
724 .reloadUserStyleSheet()
725
726 return rule

eric ide

mercurial