src/eric7/WebBrowser/AdBlock/AdBlockSubscription.py

branch
eric7
changeset 9209
b99e7fd55fd3
parent 9165
17617e5d5473
child 9221
bf71ee032bb4
equal deleted inserted replaced
9208:3fc8dfeb6ebe 9209:b99e7fd55fd3
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2022 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock subscription class.
8 """
9
10 import os
11 import re
12 import hashlib
13 import base64
14
15 from PyQt6.QtCore import (
16 pyqtSignal, Qt, QObject, QByteArray, QDateTime, QUrl, QUrlQuery,
17 QCryptographicHash, QDate, QTime
18 )
19 from PyQt6.QtNetwork import QNetworkReply, QNetworkRequest
20
21 from EricWidgets import EricMessageBox
22
23 import Utilities
24 import Preferences
25
26
27 class AdBlockSubscription(QObject):
28 """
29 Class implementing the AdBlock subscription.
30
31 @signal changed() emitted after the subscription has changed
32 @signal rulesChanged() emitted after the subscription's rules have changed
33 @signal enabledChanged(bool) emitted after the enabled state was changed
34 @signal rulesEnabledChanged() emitted after a rule enabled state was
35 changed
36 """
37 changed = pyqtSignal()
38 rulesChanged = pyqtSignal()
39 enabledChanged = pyqtSignal(bool)
40 rulesEnabledChanged = pyqtSignal()
41
42 def __init__(self, url, custom, parent=None, default=False):
43 """
44 Constructor
45
46 @param url AdBlock URL for the subscription (QUrl)
47 @param custom flag indicating a custom subscription (boolean)
48 @param parent reference to the parent object (QObject)
49 @param default flag indicating a default subscription (boolean)
50 """
51 super().__init__(parent)
52
53 self.__custom = custom
54 self.__url = url.toEncoded()
55 self.__enabled = False
56 self.__downloading = None
57 self.__defaultSubscription = default
58
59 self.__title = ""
60 self.__location = QByteArray()
61 self.__lastUpdate = QDateTime()
62 self.__requiresLocation = ""
63 self.__requiresTitle = ""
64
65 self.__updatePeriod = 0 # update period in hours, 0 = use default
66 self.__remoteModified = QDateTime()
67
68 self.__rules = [] # list containing all AdBlock rules
69
70 self.__checksumRe = re.compile(
71 r"""^\s*!\s*checksum[\s\-:]+([\w\+\/=]+).*\n""",
72 re.IGNORECASE | re.MULTILINE)
73 self.__expiresRe = re.compile(
74 r"""(?:expires:|expires after)\s*(\d+)\s*(hour|h)?""",
75 re.IGNORECASE)
76 self.__remoteModifiedRe = re.compile(
77 r"""!\s*(?:Last modified|Updated):\s*(\d{1,2})\s*"""
78 r"""(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s*"""
79 r"""(\d{2,4})\s*((\d{1,2}):(\d{2}))?""",
80 re.IGNORECASE)
81
82 self.__monthNameToNumber = {
83 "Jan": 1,
84 "Feb": 2,
85 "Mar": 3,
86 "Apr": 4,
87 "May": 5,
88 "Jun": 6,
89 "Jul": 7,
90 "Aug": 8,
91 "Sep": 9,
92 "Oct": 10,
93 "Nov": 11,
94 "Dec": 12
95 }
96
97 self.__parseUrl(url)
98
99 def __parseUrl(self, url):
100 """
101 Private method to parse the AdBlock URL for the subscription.
102
103 @param url AdBlock URL for the subscription
104 @type QUrl
105 """
106 if url.scheme() != "abp":
107 return
108
109 if url.path() != "subscribe":
110 return
111
112 urlQuery = QUrlQuery(url)
113 self.__title = QUrl.fromPercentEncoding(
114 QByteArray(urlQuery.queryItemValue("title").encode()))
115 self.__enabled = urlQuery.queryItemValue("enabled") != "false"
116 self.__location = QByteArray(QUrl.fromPercentEncoding(
117 QByteArray(urlQuery.queryItemValue("location").encode()))
118 .encode("utf-8"))
119
120 # Check for required subscription
121 self.__requiresLocation = QUrl.fromPercentEncoding(
122 QByteArray(urlQuery.queryItemValue(
123 "requiresLocation").encode()))
124 self.__requiresTitle = QUrl.fromPercentEncoding(
125 QByteArray(urlQuery.queryItemValue("requiresTitle").encode()))
126 if self.__requiresLocation and self.__requiresTitle:
127 from WebBrowser.WebBrowserWindow import WebBrowserWindow
128 WebBrowserWindow.adBlockManager().loadRequiredSubscription(
129 self.__requiresLocation, self.__requiresTitle)
130
131 lastUpdateString = urlQuery.queryItemValue("lastUpdate")
132 self.__lastUpdate = QDateTime.fromString(lastUpdateString,
133 Qt.DateFormat.ISODate)
134
135 self.__loadRules()
136
137 def url(self):
138 """
139 Public method to generate the URL for this subscription.
140
141 @return AdBlock URL for the subscription
142 @rtype QUrl
143 """
144 url = QUrl()
145 url.setScheme("abp")
146 url.setPath("subscribe")
147
148 queryItems = []
149 queryItems.append(("location", bytes(self.__location).decode()))
150 queryItems.append(("title", self.__title))
151 if self.__requiresLocation and self.__requiresTitle:
152 queryItems.append(("requiresLocation", self.__requiresLocation))
153 queryItems.append(("requiresTitle", self.__requiresTitle))
154 if not self.__enabled:
155 queryItems.append(("enabled", "false"))
156 if self.__lastUpdate.isValid():
157 queryItems.append(
158 ("lastUpdate",
159 self.__lastUpdate.toString(Qt.DateFormat.ISODate))
160 )
161
162 query = QUrlQuery()
163 query.setQueryItems(queryItems)
164 url.setQuery(query)
165 return url
166
167 def isEnabled(self):
168 """
169 Public method to check, if the subscription is enabled.
170
171 @return flag indicating the enabled status
172 @rtype bool
173 """
174 return self.__enabled
175
176 def setEnabled(self, enabled):
177 """
178 Public method to set the enabled status.
179
180 @param enabled flag indicating the enabled status
181 @type bool
182 """
183 if self.__enabled == enabled:
184 return
185
186 self.__enabled = enabled
187 self.enabledChanged.emit(enabled)
188
189 def title(self):
190 """
191 Public method to get the subscription title.
192
193 @return subscription title
194 @rtype string
195 """
196 return self.__title
197
198 def setTitle(self, title):
199 """
200 Public method to set the subscription title.
201
202 @param title subscription title
203 @type str
204 """
205 if self.__title == title:
206 return
207
208 self.__title = title
209 self.changed.emit()
210
211 def location(self):
212 """
213 Public method to get the subscription location.
214
215 @return URL of the subscription location
216 @rtype QUrl
217 """
218 return QUrl.fromEncoded(self.__location)
219
220 def setLocation(self, url):
221 """
222 Public method to set the subscription location.
223
224 @param url URL of the subscription location
225 @type QUrl
226 """
227 if url == self.location():
228 return
229
230 self.__location = url.toEncoded()
231 self.__lastUpdate = QDateTime()
232 self.changed.emit()
233
234 def requiresLocation(self):
235 """
236 Public method to get the location of a required subscription.
237
238 @return location of a required subscription
239 @rtype str
240 """
241 return self.__requiresLocation
242
243 def lastUpdate(self):
244 """
245 Public method to get the date and time of the last update.
246
247 @return date and time of the last update
248 @rtype QDateTime
249 """
250 return self.__lastUpdate
251
252 def rulesFileName(self):
253 """
254 Public method to get the name of the rules file.
255
256 @return name of the rules file
257 @rtype str
258 """
259 if self.location().scheme() == "file":
260 return self.location().toLocalFile()
261
262 if self.__location.isEmpty():
263 return ""
264
265 sha1 = bytes(QCryptographicHash.hash(
266 self.__location, QCryptographicHash.Algorithm.Sha1).toHex()
267 ).decode()
268 dataDir = os.path.join(
269 Utilities.getConfigDir(), "web_browser", "subscriptions")
270 if not os.path.exists(dataDir):
271 os.makedirs(dataDir)
272 fileName = os.path.join(
273 dataDir, "adblock_subscription_{0}".format(sha1))
274 return fileName
275
276 def __loadRules(self):
277 """
278 Private method to load the rules of the subscription.
279 """
280 fileName = self.rulesFileName()
281 if os.path.exists(fileName):
282 try:
283 with open(fileName, "r", encoding="utf-8") as f:
284 header = f.readline().strip()
285 if not header.startswith("[Adblock"):
286 EricMessageBox.warning(
287 None,
288 self.tr("Load subscription rules"),
289 self.tr("""AdBlock file '{0}' does not start"""
290 """ with [Adblock.""")
291 .format(fileName))
292 f.close()
293 os.unlink(fileName)
294 self.__lastUpdate = QDateTime()
295 else:
296 from .AdBlockRule import AdBlockRule
297
298 self.__updatePeriod = 0
299 self.__remoteModified = QDateTime()
300 self.__rules = []
301 self.__rules.append(AdBlockRule(header, self))
302 for line in f.readlines():
303 line = line.strip()
304 if not line:
305 continue
306 self.__rules.append(AdBlockRule(line, self))
307 expires = self.__expiresRe.search(line)
308 if expires:
309 period, kind = expires.groups()
310 if kind:
311 # hours
312 self.__updatePeriod = int(period)
313 else:
314 # days
315 self.__updatePeriod = int(period) * 24
316 remoteModified = self.__remoteModifiedRe.search(
317 line)
318 if remoteModified:
319 day, month, year, time, hour, minute = (
320 remoteModified.groups()
321 )
322 self.__remoteModified.setDate(
323 QDate(int(year),
324 self.__monthNameToNumber[month],
325 int(day))
326 )
327 if time:
328 self.__remoteModified.setTime(
329 QTime(int(hour), int(minute)))
330 else:
331 # no time given, set it to 23:59
332 self.__remoteModified.setTime(
333 QTime(23, 59))
334 self.changed.emit()
335 except OSError as err:
336 EricMessageBox.warning(
337 None,
338 self.tr("Load subscription rules"),
339 self.tr(
340 """Unable to read AdBlock file '{0}'.\nReason: {1}""")
341 .format(fileName, str(err))
342 )
343
344 elif not fileName.endswith("_custom"):
345 self.__lastUpdate = QDateTime()
346
347 self.checkForUpdate()
348
349 def checkForUpdate(self):
350 """
351 Public method to check for an update.
352 """
353 updatePeriod = (
354 self.__updatePeriod
355 if self.__updatePeriod else
356 Preferences.getWebBrowser("AdBlockUpdatePeriod") * 24
357 )
358 if (
359 not self.__lastUpdate.isValid() or
360 (self.__remoteModified.isValid() and
361 self.__remoteModified.addSecs(updatePeriod * 3600) <
362 QDateTime.currentDateTime()) or
363 self.__lastUpdate.addSecs(updatePeriod * 3600) <
364 QDateTime.currentDateTime()
365 ):
366 self.updateNow()
367
368 def updateNow(self):
369 """
370 Public method to update the subscription immediately.
371 """
372 if self.__downloading is not None:
373 return
374
375 if not self.location().isValid():
376 return
377
378 if self.location().scheme() == "file":
379 self.__lastUpdate = QDateTime.currentDateTime()
380 self.__loadRules()
381 return
382
383 from WebBrowser.WebBrowserWindow import WebBrowserWindow
384 reply = WebBrowserWindow.networkManager().get(
385 QNetworkRequest(self.location()))
386 reply.finished.connect(
387 lambda: self.__rulesDownloaded(reply))
388 self.__downloading = reply
389
390 def __rulesDownloaded(self, reply):
391 """
392 Private slot to deal with the downloaded rules.
393
394 @param reply reference to the network reply
395 @type QNetworkReply
396 """
397 response = bytes(reply.readAll())
398 reply.close()
399 self.__downloading = None
400
401 if reply.error() != QNetworkReply.NetworkError.NoError:
402 if not self.__defaultSubscription:
403 # don't show error if we try to load the default
404 EricMessageBox.warning(
405 None,
406 self.tr("Downloading subscription rules"),
407 self.tr(
408 """<p>Subscription rules could not be"""
409 """ downloaded.</p><p>Error: {0}</p>""")
410 .format(reply.errorString()))
411 else:
412 # reset after first download attempt
413 self.__defaultSubscription = False
414 return
415
416 if not response:
417 EricMessageBox.warning(
418 None,
419 self.tr("Downloading subscription rules"),
420 self.tr("""Got empty subscription rules."""))
421 return
422
423 fileName = self.rulesFileName()
424 try:
425 with open(fileName, "wb") as f:
426 from WebBrowser.WebBrowserWindow import WebBrowserWindow
427 if (
428 WebBrowserWindow.adBlockManager().useLimitedEasyList() and
429 self.url().toString().startswith(
430 WebBrowserWindow.adBlockManager()
431 .getDefaultSubscriptionUrl())
432 ):
433 limited = True
434 # ignore Third-party advertisers rules for performance
435 # whitelist rules at the end will be used
436 index = response.find(
437 b"!---------------------------"
438 b"Third-party advertisers"
439 b"---------------------------!")
440 part1 = response[:index]
441 index = response.find(
442 b"!-----------------------"
443 b"Whitelists to fix broken sites"
444 b"------------------------!")
445 part2 = response[index:]
446 f.write(part1)
447 f.write(part2)
448 else:
449 limited = False
450 f.write(response)
451 f.close()
452 self.__lastUpdate = QDateTime.currentDateTime()
453
454 if limited or self.__validateCheckSum(fileName):
455 self.__loadRules()
456 else:
457 os.unlink(fileName)
458 except OSError:
459 EricMessageBox.warning(
460 None,
461 self.tr("Downloading subscription rules"),
462 self.tr("""Unable to write to AdBlock file '{0}'.""")
463 .file(fileName))
464 self.__downloading = None
465 reply.deleteLater()
466
467 def __validateCheckSum(self, fileName):
468 """
469 Private method to check the subscription file's checksum.
470
471 @param fileName name of the file containing the subscription
472 @type str
473 @return flag indicating a valid file. A file is considered
474 valid, if the checksum is OK, the file does not contain a
475 checksum (i.e. cannot be checked) or we are using the limited
476 EasyList (because we fiddled with the original).
477 @rtype bool
478 """
479 try:
480 with open(fileName, "r", encoding="utf-8") as f:
481 data = f.read()
482 except (OSError, OSError):
483 return False
484
485 match = re.search(self.__checksumRe, data)
486 if match:
487 expectedChecksum = match.group(1)
488 else:
489 # consider it as valid
490 return True
491
492 # normalize the data
493 data = re.sub(r"\r", "", data) # normalize eol
494 data = re.sub(r"\n+", "\n", data) # remove empty lines
495 data = re.sub(self.__checksumRe, "", data) # remove checksum line
496
497 # calculate checksum
498 md5 = hashlib.md5() # secok
499 md5.update(data.encode("utf-8"))
500 calculatedChecksum = (
501 base64.b64encode(md5.digest()).decode().rstrip("=")
502 )
503 if calculatedChecksum == expectedChecksum:
504 return True
505 else:
506 res = EricMessageBox.yesNo(
507 None,
508 self.tr("Downloading subscription rules"),
509 self.tr(
510 """<p>AdBlock subscription <b>{0}</b> has a wrong"""
511 """ checksum.<br/>"""
512 """Found: {1}<br/>"""
513 """Calculated: {2}<br/>"""
514 """Use it anyway?</p>""")
515 .format(self.__title, expectedChecksum,
516 calculatedChecksum))
517 return res
518
519 def saveRules(self):
520 """
521 Public method to save the subscription rules.
522 """
523 fileName = self.rulesFileName()
524 if not fileName:
525 return
526
527 try:
528 with open(fileName, "w", encoding="utf-8") as f:
529 if not self.__rules or not self.__rules[0].isHeader():
530 f.write("[Adblock Plus 2.0]\n")
531 for rule in self.__rules:
532 f.write(rule.filter() + "\n")
533 except OSError:
534 EricMessageBox.warning(
535 None,
536 self.tr("Saving subscription rules"),
537 self.tr("""Unable to write to AdBlock file '{0}'.""")
538 .format(fileName))
539
540 def rule(self, offset):
541 """
542 Public method to get a specific rule.
543
544 @param offset offset of the rule
545 @type int
546 @return requested rule
547 @rtype AdBlockRule
548 """
549 if offset >= len(self.__rules):
550 return None
551
552 return self.__rules[offset]
553
554 def allRules(self):
555 """
556 Public method to get the list of rules.
557
558 @return list of rules
559 @rtype list of AdBlockRule
560 """
561 return self.__rules[:]
562
563 def addRule(self, rule):
564 """
565 Public method to add a rule.
566
567 @param rule reference to the rule to add
568 @type AdBlockRule
569 @return offset of the rule
570 @rtype int
571 """
572 self.__rules.append(rule)
573 self.rulesChanged.emit()
574
575 return len(self.__rules) - 1
576
577 def removeRule(self, offset):
578 """
579 Public method to remove a rule given the offset.
580
581 @param offset offset of the rule to remove
582 @type int
583 """
584 if offset < 0 or offset > len(self.__rules):
585 return
586
587 del self.__rules[offset]
588 self.rulesChanged.emit()
589
590 def replaceRule(self, rule, offset):
591 """
592 Public method to replace a rule given the offset.
593
594 @param rule reference to the rule to set
595 @type AdBlockRule
596 @param offset offset of the rule to remove
597 @type int
598 @return requested rule
599 @rtype AdBlockRule
600 """
601 if offset >= len(self.__rules):
602 return None
603
604 self.__rules[offset] = rule
605 self.rulesChanged.emit()
606
607 return self.__rules[offset]
608
609 def canEditRules(self):
610 """
611 Public method to check, if rules can be edited.
612
613 @return flag indicating rules may be edited
614 @rtype bool
615 """
616 return self.__custom
617
618 def canBeRemoved(self):
619 """
620 Public method to check, if the subscription can be removed.
621
622 @return flag indicating removal is allowed
623 @rtype bool
624 """
625 return not self.__custom and not self.__defaultSubscription
626
627 def setRuleEnabled(self, offset, enabled):
628 """
629 Public method to enable a specific rule.
630
631 @param offset offset of the rule
632 @type int
633 @param enabled new enabled state
634 @type bool
635 @return reference to the changed rule
636 @rtype AdBlockRule
637 """
638 if offset >= len(self.__rules):
639 return None
640
641 rule = self.__rules[offset]
642 rule.setEnabled(enabled)
643 self.rulesEnabledChanged.emit()
644
645 if rule.isCSSRule():
646 from WebBrowser.WebBrowserWindow import WebBrowserWindow
647 WebBrowserWindow.mainWindow().reloadUserStyleSheet()
648
649 return rule

eric ide

mercurial