eric6/WebBrowser/AdBlock/AdBlockSubscription.py

changeset 6942
2602857055c5
parent 6645
ad476851d7e0
child 7229
53054eb5b15a
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock subscription class.
8 """
9
10 from __future__ import unicode_literals
11
12 import os
13 import re
14 import hashlib
15 import base64
16
17 from PyQt5.QtCore import pyqtSignal, Qt, QObject, QByteArray, QDateTime, \
18 QUrl, QUrlQuery, QCryptographicHash, QFile, QIODevice, QTextStream, \
19 QDate, QTime
20 from PyQt5.QtNetwork import QNetworkReply, QNetworkRequest
21
22 from E5Gui import E5MessageBox
23
24 import Utilities
25 import Preferences
26
27
28 class AdBlockSubscription(QObject):
29 """
30 Class implementing the AdBlock subscription.
31
32 @signal changed() emitted after the subscription has changed
33 @signal rulesChanged() emitted after the subscription's rules have changed
34 @signal enabledChanged(bool) emitted after the enabled state was changed
35 @signal rulesEnabledChanged() emitted after a rule enabled state was
36 changed
37 """
38 changed = pyqtSignal()
39 rulesChanged = pyqtSignal()
40 enabledChanged = pyqtSignal(bool)
41 rulesEnabledChanged = pyqtSignal()
42
43 def __init__(self, url, custom, parent=None, default=False):
44 """
45 Constructor
46
47 @param url AdBlock URL for the subscription (QUrl)
48 @param custom flag indicating a custom subscription (boolean)
49 @param parent reference to the parent object (QObject)
50 @param default flag indicating a default subscription (boolean)
51 """
52 super(AdBlockSubscription, self).__init__(parent)
53
54 self.__custom = custom
55 self.__url = url.toEncoded()
56 self.__enabled = False
57 self.__downloading = None
58 self.__defaultSubscription = default
59
60 self.__title = ""
61 self.__location = QByteArray()
62 self.__lastUpdate = QDateTime()
63 self.__requiresLocation = ""
64 self.__requiresTitle = ""
65
66 self.__updatePeriod = 0 # update period in hours, 0 = use default
67 self.__remoteModified = QDateTime()
68
69 self.__rules = [] # list containing all AdBlock rules
70
71 self.__checksumRe = re.compile(
72 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""",
73 re.IGNORECASE | re.MULTILINE)
74 self.__expiresRe = re.compile(
75 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""",
76 re.IGNORECASE)
77 self.__remoteModifiedRe = re.compile(
78 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*"""
79 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*"""
80 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""",
81 re.IGNORECASE)
82
83 self.__monthNameToNumber = {
84 "Jan": 1,
85 "Feb": 2,
86 "Mar": 3,
87 "Apr": 4,
88 "May": 5,
89 "Jun": 6,
90 "Jul": 7,
91 "Aug": 8,
92 "Sep": 9,
93 "Oct": 10,
94 "Nov": 11,
95 "Dec": 12
96 }
97
98 self.__parseUrl(url)
99
100 def __parseUrl(self, url):
101 """
102 Private method to parse the AdBlock URL for the subscription.
103
104 @param url AdBlock URL for the subscription
105 @type QUrl
106 """
107 if url.scheme() != "abp":
108 return
109
110 if url.path() != "subscribe":
111 return
112
113 urlQuery = QUrlQuery(url)
114 self.__title = QUrl.fromPercentEncoding(
115 QByteArray(urlQuery.queryItemValue("title").encode()))
116 self.__enabled = urlQuery.queryItemValue("enabled") != "false"
117 self.__location = QByteArray(QUrl.fromPercentEncoding(
118 QByteArray(urlQuery.queryItemValue("location").encode()))
119 .encode("utf-8"))
120
121 # Check for required subscription
122 self.__requiresLocation = QUrl.fromPercentEncoding(
123 QByteArray(urlQuery.queryItemValue(
124 "requiresLocation").encode()))
125 self.__requiresTitle = QUrl.fromPercentEncoding(
126 QByteArray(urlQuery.queryItemValue("requiresTitle").encode()))
127 if self.__requiresLocation and self.__requiresTitle:
128 from WebBrowser.WebBrowserWindow import WebBrowserWindow
129 WebBrowserWindow.adBlockManager().loadRequiredSubscription(
130 self.__requiresLocation, self.__requiresTitle)
131
132 lastUpdateString = urlQuery.queryItemValue("lastUpdate")
133 self.__lastUpdate = QDateTime.fromString(lastUpdateString,
134 Qt.ISODate)
135
136 self.__loadRules()
137
138 def url(self):
139 """
140 Public method to generate the URL for this subscription.
141
142 @return AdBlock URL for the subscription
143 @rtype QUrl
144 """
145 url = QUrl()
146 url.setScheme("abp")
147 url.setPath("subscribe")
148
149 queryItems = []
150 queryItems.append(("location", bytes(self.__location).decode()))
151 queryItems.append(("title", self.__title))
152 if self.__requiresLocation and self.__requiresTitle:
153 queryItems.append(("requiresLocation", self.__requiresLocation))
154 queryItems.append(("requiresTitle", self.__requiresTitle))
155 if not self.__enabled:
156 queryItems.append(("enabled", "false"))
157 if self.__lastUpdate.isValid():
158 queryItems.append(("lastUpdate",
159 self.__lastUpdate.toString(Qt.ISODate)))
160
161 query = QUrlQuery()
162 query.setQueryItems(queryItems)
163 url.setQuery(query)
164 return url
165
166 def isEnabled(self):
167 """
168 Public method to check, if the subscription is enabled.
169
170 @return flag indicating the enabled status
171 @rtype bool
172 """
173 return self.__enabled
174
175 def setEnabled(self, enabled):
176 """
177 Public method to set the enabled status.
178
179 @param enabled flag indicating the enabled status
180 @type bool
181 """
182 if self.__enabled == enabled:
183 return
184
185 self.__enabled = enabled
186 self.enabledChanged.emit(enabled)
187
188 def title(self):
189 """
190 Public method to get the subscription title.
191
192 @return subscription title
193 @rtype string
194 """
195 return self.__title
196
197 def setTitle(self, title):
198 """
199 Public method to set the subscription title.
200
201 @param title subscription title
202 @type str
203 """
204 if self.__title == title:
205 return
206
207 self.__title = title
208 self.changed.emit()
209
210 def location(self):
211 """
212 Public method to get the subscription location.
213
214 @return URL of the subscription location
215 @rtype QUrl
216 """
217 return QUrl.fromEncoded(self.__location)
218
219 def setLocation(self, url):
220 """
221 Public method to set the subscription location.
222
223 @param url URL of the subscription location
224 @type QUrl
225 """
226 if url == self.location():
227 return
228
229 self.__location = url.toEncoded()
230 self.__lastUpdate = QDateTime()
231 self.changed.emit()
232
233 def requiresLocation(self):
234 """
235 Public method to get the location of a required subscription.
236
237 @return location of a required subscription
238 @rtype str
239 """
240 return self.__requiresLocation
241
242 def lastUpdate(self):
243 """
244 Public method to get the date and time of the last update.
245
246 @return date and time of the last update
247 @rtype QDateTime
248 """
249 return self.__lastUpdate
250
251 def rulesFileName(self):
252 """
253 Public method to get the name of the rules file.
254
255 @return name of the rules file
256 @rtype str
257 """
258 if self.location().scheme() == "file":
259 return self.location().toLocalFile()
260
261 if self.__location.isEmpty():
262 return ""
263
264 sha1 = bytes(QCryptographicHash.hash(
265 self.__location, QCryptographicHash.Sha1).toHex()).decode()
266 dataDir = os.path.join(
267 Utilities.getConfigDir(), "web_browser", "subscriptions")
268 if not os.path.exists(dataDir):
269 os.makedirs(dataDir)
270 fileName = os.path.join(
271 dataDir, "adblock_subscription_{0}".format(sha1))
272 return fileName
273
274 def __loadRules(self):
275 """
276 Private method to load the rules of the subscription.
277 """
278 fileName = self.rulesFileName()
279 f = QFile(fileName)
280 if f.exists():
281 if not f.open(QIODevice.ReadOnly):
282 E5MessageBox.warning(
283 None,
284 self.tr("Load subscription rules"),
285 self.tr(
286 """Unable to open AdBlock file '{0}' for reading.""")
287 .format(fileName))
288 else:
289 textStream = QTextStream(f)
290 header = textStream.readLine(1024)
291 if not header.startswith("[Adblock"):
292 E5MessageBox.warning(
293 None,
294 self.tr("Load subscription rules"),
295 self.tr("""AdBlock file '{0}' does not start"""
296 """ with [Adblock.""")
297 .format(fileName))
298 f.close()
299 f.remove()
300 self.__lastUpdate = QDateTime()
301 else:
302 from .AdBlockRule import AdBlockRule
303
304 self.__updatePeriod = 0
305 self.__remoteModified = QDateTime()
306 self.__rules = []
307 self.__rules.append(AdBlockRule(header, self))
308 while not textStream.atEnd():
309 line = textStream.readLine()
310 self.__rules.append(AdBlockRule(line, self))
311 expires = self.__expiresRe.search(line)
312 if expires:
313 period, kind = expires.groups()
314 if kind:
315 # hours
316 self.__updatePeriod = int(period)
317 else:
318 # days
319 self.__updatePeriod = int(period) * 24
320 remoteModified = self.__remoteModifiedRe.search(line)
321 if remoteModified:
322 day, month, year, time, hour, minute = \
323 remoteModified.groups()
324 self.__remoteModified.setDate(
325 QDate(int(year),
326 self.__monthNameToNumber[month],
327 int(day))
328 )
329 if time:
330 self.__remoteModified.setTime(
331 QTime(int(hour), int(minute)))
332 else:
333 # no time given, set it to 23:59
334 self.__remoteModified.setTime(QTime(23, 59))
335 self.changed.emit()
336 elif not fileName.endswith("_custom"):
337 self.__lastUpdate = QDateTime()
338
339 self.checkForUpdate()
340
341 def checkForUpdate(self):
342 """
343 Public method to check for an update.
344 """
345 if self.__updatePeriod:
346 updatePeriod = self.__updatePeriod
347 else:
348 updatePeriod = \
349 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24
350 if not self.__lastUpdate.isValid() or \
351 (self.__remoteModified.isValid() and
352 self.__remoteModified.addSecs(updatePeriod * 3600) <
353 QDateTime.currentDateTime()) or \
354 self.__lastUpdate.addSecs(updatePeriod * 3600) < \
355 QDateTime.currentDateTime():
356 self.updateNow()
357
358 def updateNow(self):
359 """
360 Public method to update the subscription immediately.
361 """
362 if self.__downloading is not None:
363 return
364
365 if not self.location().isValid():
366 return
367
368 if self.location().scheme() == "file":
369 self.__lastUpdate = QDateTime.currentDateTime()
370 self.__loadRules()
371 return
372
373 from WebBrowser.WebBrowserWindow import WebBrowserWindow
374 reply = WebBrowserWindow.networkManager().get(
375 QNetworkRequest(self.location()))
376 reply.finished.connect(
377 lambda: self.__rulesDownloaded(reply))
378 self.__downloading = reply
379
380 def __rulesDownloaded(self, reply):
381 """
382 Private slot to deal with the downloaded rules.
383
384 @param reply reference to the network reply
385 @type QNetworkReply
386 """
387 response = reply.readAll()
388 reply.close()
389 self.__downloading = None
390
391 if reply.error() != QNetworkReply.NoError:
392 if not self.__defaultSubscription:
393 # don't show error if we try to load the default
394 E5MessageBox.warning(
395 None,
396 self.tr("Downloading subscription rules"),
397 self.tr(
398 """<p>Subscription rules could not be"""
399 """ downloaded.</p><p>Error: {0}</p>""")
400 .format(reply.errorString()))
401 else:
402 # reset after first download attempt
403 self.__defaultSubscription = False
404 return
405
406 if response.isEmpty():
407 E5MessageBox.warning(
408 None,
409 self.tr("Downloading subscription rules"),
410 self.tr("""Got empty subscription rules."""))
411 return
412
413 fileName = self.rulesFileName()
414 QFile.remove(fileName)
415 f = QFile(fileName)
416 if not f.open(QIODevice.ReadWrite):
417 E5MessageBox.warning(
418 None,
419 self.tr("Downloading subscription rules"),
420 self.tr(
421 """Unable to open AdBlock file '{0}' for writing.""")
422 .file(fileName))
423 return
424
425 from WebBrowser.WebBrowserWindow import WebBrowserWindow
426 if WebBrowserWindow.adBlockManager().useLimitedEasyList() and \
427 self.url().toString().startswith(
428 WebBrowserWindow.adBlockManager().getDefaultSubscriptionUrl()):
429 limited = True
430 # ignore Third-party advertisers rules for performance
431 # whitelist rules at the end will be used
432 index = response.indexOf(
433 "!---------------------------"
434 "Third-party advertisers"
435 "---------------------------!")
436 part1 = response.left(index)
437 index = response.indexOf(
438 "!-----------------------"
439 "Whitelists to fix broken sites"
440 "------------------------!")
441 part2 = response.mid(index)
442 f.write(part1)
443 f.write(part2)
444 else:
445 limited = False
446 f.write(response)
447 f.close()
448 self.__lastUpdate = QDateTime.currentDateTime()
449 if limited or self.__validateCheckSum(fileName):
450 self.__loadRules()
451 else:
452 QFile.remove(fileName)
453 self.__downloading = None
454 reply.deleteLater()
455
456 def __validateCheckSum(self, fileName):
457 """
458 Private method to check the subscription file's checksum.
459
460 @param fileName name of the file containing the subscription
461 @type str
462 @return flag indicating a valid file. A file is considered
463 valid, if the checksum is OK, the file does not contain a
464 checksum (i.e. cannot be checked) or we are using the limited
465 EasyList (because we fiddled with the original).
466 @rtype bool
467 """
468 try:
469 f = open(fileName, "r", encoding="utf-8")
470 data = f.read()
471 f.close()
472 except (IOError, OSError):
473 return False
474
475 match = re.search(self.__checksumRe, data)
476 if match:
477 expectedChecksum = match.group(1)
478 else:
479 # consider it as valid
480 return True
481
482 # normalize the data
483 data = re.sub(r"\r", "", data) # normalize eol
484 data = re.sub(r"\n+", "\n", data) # remove empty lines
485 data = re.sub(self.__checksumRe, "", data) # remove checksum line
486
487 # calculate checksum
488 md5 = hashlib.md5()
489 md5.update(data.encode("utf-8"))
490 calculatedChecksum = base64.b64encode(md5.digest()).decode()\
491 .rstrip("=")
492 if calculatedChecksum == expectedChecksum:
493 return True
494 else:
495 res = E5MessageBox.yesNo(
496 None,
497 self.tr("Downloading subscription rules"),
498 self.tr(
499 """<p>AdBlock subscription <b>{0}</b> has a wrong"""
500 """ checksum.<br/>"""
501 """Found: {1}<br/>"""
502 """Calculated: {2}<br/>"""
503 """Use it anyway?</p>""")
504 .format(self.__title, expectedChecksum,
505 calculatedChecksum))
506 return res
507
508 def saveRules(self):
509 """
510 Public method to save the subscription rules.
511 """
512 fileName = self.rulesFileName()
513 if not fileName:
514 return
515
516 f = QFile(fileName)
517 if not f.open(QIODevice.ReadWrite | QIODevice.Truncate):
518 E5MessageBox.warning(
519 None,
520 self.tr("Saving subscription rules"),
521 self.tr(
522 """Unable to open AdBlock file '{0}' for writing.""")
523 .format(fileName))
524 return
525
526 textStream = QTextStream(f)
527 if not self.__rules or not self.__rules[0].isHeader():
528 textStream << "[Adblock Plus 1.1.1]\n"
529 for rule in self.__rules:
530 textStream << rule.filter() << "\n"
531
532 def rule(self, offset):
533 """
534 Public method to get a specific rule.
535
536 @param offset offset of the rule
537 @type int
538 @return requested rule
539 @rtype AdBlockRule
540 """
541 if offset >= len(self.__rules):
542 return None
543
544 return self.__rules[offset]
545
546 def allRules(self):
547 """
548 Public method to get the list of rules.
549
550 @return list of rules
551 @rtype list of AdBlockRule
552 """
553 return self.__rules[:]
554
555 def addRule(self, rule):
556 """
557 Public method to add a rule.
558
559 @param rule reference to the rule to add
560 @type AdBlockRule
561 @return offset of the rule
562 @rtype int
563 """
564 self.__rules.append(rule)
565 self.rulesChanged.emit()
566
567 return len(self.__rules) - 1
568
569 def removeRule(self, offset):
570 """
571 Public method to remove a rule given the offset.
572
573 @param offset offset of the rule to remove
574 @type int
575 """
576 if offset < 0 or offset > len(self.__rules):
577 return
578
579 del self.__rules[offset]
580 self.rulesChanged.emit()
581
582 def replaceRule(self, rule, offset):
583 """
584 Public method to replace a rule given the offset.
585
586 @param rule reference to the rule to set
587 @type AdBlockRule
588 @param offset offset of the rule to remove
589 @type int
590 @return requested rule
591 @rtype AdBlockRule
592 """
593 if offset >= len(self.__rules):
594 return None
595
596 self.__rules[offset] = rule
597 self.rulesChanged.emit()
598
599 return self.__rules[offset]
600
601 def canEditRules(self):
602 """
603 Public method to check, if rules can be edited.
604
605 @return flag indicating rules may be edited
606 @rtype bool
607 """
608 return self.__custom
609
610 def canBeRemoved(self):
611 """
612 Public method to check, if the subscription can be removed.
613
614 @return flag indicating removal is allowed
615 @rtype bool
616 """
617 return not self.__custom and not self.__defaultSubscription
618
619 def setRuleEnabled(self, offset, enabled):
620 """
621 Public method to enable a specific rule.
622
623 @param offset offset of the rule
624 @type int
625 @param enabled new enabled state
626 @type bool
627 @return reference to the changed rule
628 @rtype AdBlockRule
629 """
630 if offset >= len(self.__rules):
631 return None
632
633 rule = self.__rules[offset]
634 rule.setEnabled(enabled)
635 self.rulesEnabledChanged.emit()
636
637 if rule.isCSSRule():
638 from WebBrowser.WebBrowserWindow import WebBrowserWindow
639 WebBrowserWindow.mainWindow().reloadUserStyleSheet()
640
641 return rule

eric ide

mercurial