|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock rule class. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 import re |
|
13 |
|
14 from enum import IntEnum |
|
15 |
|
16 from PyQt5.QtCore import PYQT_VERSION, Qt, QRegExp |
|
17 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo |
|
18 |
|
19 from Globals import qVersionTuple |
|
20 |
|
21 |
|
22 def toSecondLevelDomain(url): |
|
23 """ |
|
24 Module function to get a second level domain from the given URL. |
|
25 |
|
26 @param url URL to extract domain from |
|
27 @type QUrl |
|
28 @return name of second level domain |
|
29 @rtype str |
|
30 """ |
|
31 topLevelDomain = url.topLevelDomain() |
|
32 urlHost = url.host() |
|
33 |
|
34 if not topLevelDomain or not urlHost: |
|
35 return "" |
|
36 |
|
37 domain = urlHost[:len(urlHost) - len(topLevelDomain)] |
|
38 if domain.count(".") == 0: |
|
39 return urlHost |
|
40 |
|
41 while domain.count(".") != 0: |
|
42 domain = domain[domain.find(".") + 1:] |
|
43 |
|
44 return domain + topLevelDomain |
|
45 |
|
46 |
|
47 class AdBlockRuleType(IntEnum): |
|
48 """ |
|
49 Class implementing the rule type enum. |
|
50 """ |
|
51 CssRule = 0 |
|
52 DomainMatchRule = 1 |
|
53 RegExpMatchRule = 2 |
|
54 StringEndsMatchRule = 3 |
|
55 StringContainsMatchRule = 4 |
|
56 MatchAllUrlsRule = 5 |
|
57 Invalid = 6 |
|
58 |
|
59 |
|
60 class AdBlockRuleOption(IntEnum): |
|
61 """ |
|
62 Class implementing the rule option enum. |
|
63 """ |
|
64 NoOption = 0 |
|
65 DomainRestrictedOption = 1 |
|
66 ThirdPartyOption = 2 |
|
67 ObjectOption = 4 |
|
68 SubdocumentOption = 8 |
|
69 XMLHttpRequestOption = 16 |
|
70 ImageOption = 32 |
|
71 ScriptOption = 64 |
|
72 StyleSheetOption = 128 |
|
73 ObjectSubrequestOption = 256 |
|
74 PingOption = 512 |
|
75 MediaOption = 1024 |
|
76 FontOption = 2048 |
|
77 OtherOption = 4096 |
|
78 |
|
79 # Exception only options |
|
80 DocumentOption = 8192 |
|
81 ElementHideOption = 16384 |
|
82 |
|
83 |
|
84 class AdBlockRule(object): |
|
85 """ |
|
86 Class implementing the AdBlock rule. |
|
87 """ |
|
88 def __init__(self, filterRule="", subscription=None): |
|
89 """ |
|
90 Constructor |
|
91 |
|
92 @param filterRule filter string of the rule |
|
93 @type str |
|
94 @param subscription reference to the subscription object |
|
95 @type AdBlockSubscription |
|
96 """ |
|
97 self.__subscription = subscription |
|
98 |
|
99 self.__regExp = None |
|
100 self.__stringMatchers = [] |
|
101 |
|
102 self.__blockedDomains = [] |
|
103 self.__allowedDomains = [] |
|
104 |
|
105 self.__isEnabled = True |
|
106 self.__isException = False |
|
107 self.__isInternalDisabled = False |
|
108 self.__caseSensitivity = Qt.CaseInsensitive |
|
109 |
|
110 self.__type = AdBlockRuleType.StringContainsMatchRule |
|
111 self.__options = AdBlockRuleOption.NoOption |
|
112 self.__exceptions = AdBlockRuleOption.NoOption |
|
113 |
|
114 self.setFilter(filterRule) |
|
115 |
|
116 def subscription(self): |
|
117 """ |
|
118 Public method to get the subscription this rule belongs to. |
|
119 |
|
120 @return subscription of the rule |
|
121 @rtype AdBlockSubscription |
|
122 """ |
|
123 return self.__subscription |
|
124 |
|
125 def setSubscription(self, subscription): |
|
126 """ |
|
127 Public method to set the subscription this rule belongs to. |
|
128 |
|
129 @param subscription subscription of the rule |
|
130 @type AdBlockSubscription |
|
131 """ |
|
132 self.__subscription = subscription |
|
133 |
|
134 def filter(self): |
|
135 """ |
|
136 Public method to get the rule filter string. |
|
137 |
|
138 @return rule filter string |
|
139 @rtype str |
|
140 """ |
|
141 return self.__filter |
|
142 |
|
143 def setFilter(self, filterRule): |
|
144 """ |
|
145 Public method to set the rule filter string. |
|
146 |
|
147 @param filterRule rule filter string |
|
148 @type str |
|
149 """ |
|
150 self.__filter = filterRule |
|
151 self.__parseFilter() |
|
152 |
|
153 def __parseFilter(self): |
|
154 """ |
|
155 Private method to parse the filter pattern. |
|
156 """ |
|
157 parsedLine = self.__filter |
|
158 |
|
159 # empty rule or just a comment |
|
160 if not parsedLine.strip() or parsedLine.startswith("!"): |
|
161 self.__isEnabled = False |
|
162 self.__isInternalDisabled = True |
|
163 self.__type = AdBlockRuleType.Invalid |
|
164 return |
|
165 |
|
166 # CSS element hiding rule |
|
167 if "##" in parsedLine or "#@#" in parsedLine: |
|
168 self.__type = AdBlockRuleType.CssRule |
|
169 pos = parsedLine.find("#") |
|
170 |
|
171 # domain restricted rule |
|
172 if not parsedLine.startswith("##"): |
|
173 domains = parsedLine[:pos] |
|
174 self.__parseDomains(domains, ",") |
|
175 |
|
176 self.__isException = parsedLine[pos + 1] == "@" |
|
177 if self.__isException: |
|
178 self.__matchString = parsedLine[pos + 3:] |
|
179 else: |
|
180 self.__matchString = parsedLine[pos + 2:] |
|
181 |
|
182 # CSS rule cannot have more options -> stop parsing |
|
183 return |
|
184 |
|
185 # Exception always starts with @@ |
|
186 if parsedLine.startswith("@@"): |
|
187 self.__isException = True |
|
188 parsedLine = parsedLine[2:] |
|
189 |
|
190 # Parse all options following '$' character |
|
191 optionsIndex = parsedLine.find("$") |
|
192 if optionsIndex >= 0: |
|
193 options = [opt |
|
194 for opt in parsedLine[optionsIndex + 1:].split(",") |
|
195 if opt] |
|
196 |
|
197 handledOptions = 0 |
|
198 for option in options: |
|
199 if option.startswith("domain="): |
|
200 self.__parseDomains(option[7:], "|") |
|
201 handledOptions += 1 |
|
202 elif option == "match-case": |
|
203 self.__caseSensitivity = Qt.CaseSensitive |
|
204 handledOptions += 1 |
|
205 elif option.endswith("third-party"): |
|
206 self.setOption(AdBlockRuleOption.ThirdPartyOption) |
|
207 self.__setException(AdBlockRuleOption.ThirdPartyOption, |
|
208 option.startswith("~")) |
|
209 handledOptions += 1 |
|
210 elif option.endswith("object"): |
|
211 self.setOption(AdBlockRuleOption.ObjectOption) |
|
212 self.__setException(AdBlockRuleOption.ObjectOption, |
|
213 option.startswith("~")) |
|
214 handledOptions += 1 |
|
215 elif option.endswith("subdocument"): |
|
216 self.setOption(AdBlockRuleOption.SubdocumentOption) |
|
217 self.__setException(AdBlockRuleOption.SubdocumentOption, |
|
218 option.startswith("~")) |
|
219 handledOptions += 1 |
|
220 elif option.endswith("xmlhttprequest"): |
|
221 self.setOption(AdBlockRuleOption.XMLHttpRequestOption) |
|
222 self.__setException(AdBlockRuleOption.XMLHttpRequestOption, |
|
223 option.startswith("~")) |
|
224 handledOptions += 1 |
|
225 elif option.endswith("image"): |
|
226 self.setOption(AdBlockRuleOption.ImageOption) |
|
227 self.__setException(AdBlockRuleOption.ImageOption, |
|
228 option.startswith("~")) |
|
229 elif option.endswith("script"): |
|
230 self.setOption(AdBlockRuleOption.ScriptOption) |
|
231 self.__setException(AdBlockRuleOption.ScriptOption, |
|
232 option.startswith("~")) |
|
233 elif option.endswith("stylesheet"): |
|
234 self.setOption(AdBlockRuleOption.StyleSheetOption) |
|
235 self.__setException(AdBlockRuleOption.StyleSheetOption, |
|
236 option.startswith("~")) |
|
237 elif option.endswith("object-subrequest"): |
|
238 self.setOption(AdBlockRuleOption.ObjectSubrequestOption) |
|
239 self.__setException( |
|
240 AdBlockRuleOption.ObjectSubrequestOption, |
|
241 option.startswith("~")) |
|
242 elif option.endswith("ping"): |
|
243 self.setOption(AdBlockRuleOption.PingOption) |
|
244 self.__setException(AdBlockRuleOption.PingOption, |
|
245 option.startswith("~")) |
|
246 elif option.endswith("media"): |
|
247 self.setOption(AdBlockRuleOption.MediaOption) |
|
248 self.__setException(AdBlockRuleOption.MediaOption, |
|
249 option.startswith("~")) |
|
250 elif option.endswith("font"): |
|
251 self.setOption(AdBlockRuleOption.FontOption) |
|
252 self.__setException(AdBlockRuleOption.FontOption, |
|
253 option.startswith("~")) |
|
254 elif option.endswith("other"): |
|
255 self.setOption(AdBlockRuleOption.OtherOption) |
|
256 self.__setException(AdBlockRuleOption.OtherOption, |
|
257 option.startswith("~")) |
|
258 elif option == "document" and self.__isException: |
|
259 self.setOption(AdBlockRuleOption.DocumentOption) |
|
260 handledOptions += 1 |
|
261 elif option == "elemhide" and self.__isException: |
|
262 self.setOption(AdBlockRuleOption.ElementHideOption) |
|
263 handledOptions += 1 |
|
264 elif option == "collapse": |
|
265 # Hiding placeholders of blocked elements is enabled by |
|
266 # default |
|
267 handledOptions += 1 |
|
268 |
|
269 # If we don't handle all options, it's safer to just disable |
|
270 # this rule |
|
271 if handledOptions != len(options): |
|
272 self.__isInternalDisabled = True |
|
273 self.__type = AdBlockRuleType.Invalid |
|
274 return |
|
275 |
|
276 parsedLine = parsedLine[:optionsIndex] |
|
277 |
|
278 # Rule is classic regexp |
|
279 if parsedLine.startswith("/") and parsedLine.endswith("/"): |
|
280 parsedLine = parsedLine[1:-1] |
|
281 self.__type = AdBlockRuleType.RegExpMatchRule |
|
282 self.__regExp = QRegExp(parsedLine, self.__caseSensitivity, |
|
283 QRegExp.RegExp) |
|
284 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
|
285 return |
|
286 |
|
287 # Remove starting / ending wildcards (*) |
|
288 if parsedLine.startswith("*"): |
|
289 parsedLine = parsedLine[1:] |
|
290 if parsedLine.endswith("*"): |
|
291 parsedLine = parsedLine[:-1] |
|
292 |
|
293 # Fast string matching for domain here |
|
294 if self.__filterIsOnlyDomain(parsedLine): |
|
295 parsedLine = parsedLine[2:-1] |
|
296 self.__type = AdBlockRuleType.DomainMatchRule |
|
297 self.__matchString = parsedLine |
|
298 return |
|
299 |
|
300 # If rule contains '|' only at the end, string matching can be used |
|
301 if self.__filterIsOnlyEndsMatch(parsedLine): |
|
302 parsedLine = parsedLine[:-1] |
|
303 self.__type = AdBlockRuleType.StringEndsMatchRule |
|
304 self.__matchString = parsedLine |
|
305 return |
|
306 |
|
307 # If there is still a wildcard (*) or separator (^) or (|), |
|
308 # the rule must be modified to comply with QRegExp. |
|
309 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: |
|
310 self.__type = AdBlockRuleType.RegExpMatchRule |
|
311 pattern = self.__convertPatternToRegExp(parsedLine) |
|
312 self.__regExp = QRegExp(pattern, self.__caseSensitivity, |
|
313 QRegExp.RegExp) |
|
314 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
|
315 return |
|
316 |
|
317 # This rule matches all URLs |
|
318 if len(parsedLine) == 0: |
|
319 if self.__options == AdBlockRuleOption.NoOption: |
|
320 self.__isInternalDisabled = True |
|
321 self.__type = AdBlockRuleType.Invalid |
|
322 return |
|
323 |
|
324 self.__type = AdBlockRuleType.MatchAllUrlsRule |
|
325 return |
|
326 |
|
327 # no regexp required |
|
328 self.__type = AdBlockRuleType.StringContainsMatchRule |
|
329 self.__matchString = parsedLine |
|
330 |
|
331 def __parseDomains(self, domains, separator): |
|
332 """ |
|
333 Private method to parse a string with a domain list. |
|
334 |
|
335 @param domains list of domains |
|
336 @type str |
|
337 @param separator separator character used by the list |
|
338 @type str |
|
339 """ |
|
340 domainsList = [d for d in domains.split(separator) if d] |
|
341 |
|
342 for domain in domainsList: |
|
343 if not domain: |
|
344 continue |
|
345 if domain.startswith("~"): |
|
346 self.__blockedDomains.append(domain[1:]) |
|
347 else: |
|
348 self.__allowedDomains.append(domain) |
|
349 |
|
350 if bool(self.__blockedDomains) or bool(self.__allowedDomains): |
|
351 self.setOption(AdBlockRuleOption.DomainRestrictedOption) |
|
352 |
|
353 def networkMatch(self, request, domain, encodedUrl): |
|
354 """ |
|
355 Public method to check the rule for a match. |
|
356 |
|
357 @param request reference to the network request |
|
358 @type QWebEngineUrlRequestInfo |
|
359 @param domain domain name |
|
360 @type str |
|
361 @param encodedUrl string encoded URL to be checked |
|
362 @type str |
|
363 @return flag indicating a match |
|
364 @rtype bool |
|
365 """ |
|
366 if self.__type == AdBlockRuleType.CssRule or \ |
|
367 not self.__isEnabled or \ |
|
368 self.__isInternalDisabled: |
|
369 return False |
|
370 |
|
371 matched = self.__stringMatch(domain, encodedUrl) |
|
372 |
|
373 if matched: |
|
374 # check domain restrictions |
|
375 if self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and \ |
|
376 not self.matchDomain(request.firstPartyUrl().host()): |
|
377 return False |
|
378 |
|
379 # check third-party restrictions |
|
380 if self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and \ |
|
381 not self.matchThirdParty(request): |
|
382 return False |
|
383 |
|
384 # check object restrictions |
|
385 if self.__hasOption(AdBlockRuleOption.ObjectOption) and \ |
|
386 not self.matchObject(request): |
|
387 return False |
|
388 |
|
389 # check subdocument restrictions |
|
390 if self.__hasOption(AdBlockRuleOption.SubdocumentOption) and \ |
|
391 not self.matchSubdocument(request): |
|
392 return False |
|
393 |
|
394 # check xmlhttprequest restriction |
|
395 if self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and \ |
|
396 not self.matchXmlHttpRequest(request): |
|
397 return False |
|
398 |
|
399 # check image restriction |
|
400 if self.__hasOption(AdBlockRuleOption.ImageOption) and \ |
|
401 not self.matchImage(request): |
|
402 return False |
|
403 |
|
404 # check script restriction |
|
405 if self.__hasOption(AdBlockRuleOption.ScriptOption) and \ |
|
406 not self.matchScript(request): |
|
407 return False |
|
408 |
|
409 # check stylesheet restriction |
|
410 if self.__hasOption(AdBlockRuleOption.StyleSheetOption) and \ |
|
411 not self.matchStyleSheet(request): |
|
412 return False |
|
413 |
|
414 # check object-subrequest restriction |
|
415 if self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and \ |
|
416 not self.matchObjectSubrequest(request): |
|
417 return False |
|
418 |
|
419 # check ping restriction |
|
420 if self.__hasOption(AdBlockRuleOption.PingOption) and \ |
|
421 not self.matchPing(request): |
|
422 return False |
|
423 |
|
424 # check media restriction |
|
425 if self.__hasOption(AdBlockRuleOption.MediaOption) and \ |
|
426 not self.matchMedia(request): |
|
427 return False |
|
428 |
|
429 # check font restriction |
|
430 if self.__hasOption(AdBlockRuleOption.FontOption) and \ |
|
431 not self.matchFont(request): |
|
432 return False |
|
433 |
|
434 return matched |
|
435 |
|
436 def urlMatch(self, url): |
|
437 """ |
|
438 Public method to check an URL against the rule. |
|
439 |
|
440 @param url URL to check |
|
441 @type QUrl |
|
442 @return flag indicating a match |
|
443 @rtype bool |
|
444 """ |
|
445 if not self.__hasOption(AdBlockRuleOption.DocumentOption) and \ |
|
446 not self.__hasOption(AdBlockRuleOption.ElementHideOption): |
|
447 return False |
|
448 |
|
449 encodedUrl = bytes(url.toEncoded()).decode() |
|
450 domain = url.host() |
|
451 return self.__stringMatch(domain, encodedUrl) |
|
452 |
|
453 def __stringMatch(self, domain, encodedUrl): |
|
454 """ |
|
455 Private method to match a domain string. |
|
456 |
|
457 @param domain domain to match |
|
458 @type str |
|
459 @param encodedUrl URL in encoded form |
|
460 @type str |
|
461 @return flag indicating a match |
|
462 @rtype bool |
|
463 """ |
|
464 matched = False |
|
465 |
|
466 if self.__type == AdBlockRuleType.StringContainsMatchRule: |
|
467 if self.__caseSensitivity == Qt.CaseInsensitive: |
|
468 matched = self.__matchString.lower() in encodedUrl.lower() |
|
469 else: |
|
470 matched = self.__matchString in encodedUrl |
|
471 elif self.__type == AdBlockRuleType.DomainMatchRule: |
|
472 matched = self.__isMatchingDomain(domain, self.__matchString) |
|
473 elif self.__type == AdBlockRuleType.StringEndsMatchRule: |
|
474 if self.__caseSensitivity == Qt.CaseInsensitive: |
|
475 matched = encodedUrl.lower().endswith( |
|
476 self.__matchString.lower()) |
|
477 else: |
|
478 matched = encodedUrl.endswith(self.__matchString) |
|
479 elif self.__type == AdBlockRuleType.RegExpMatchRule: |
|
480 if not self.__isMatchingRegExpStrings(encodedUrl): |
|
481 matched = False |
|
482 else: |
|
483 matched = self.__regExp.indexIn(encodedUrl) != -1 |
|
484 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: |
|
485 matched = True |
|
486 |
|
487 return matched |
|
488 |
|
489 def matchDomain(self, domain): |
|
490 """ |
|
491 Public method to match a domain. |
|
492 |
|
493 @param domain domain name to check |
|
494 @type str |
|
495 @return flag indicating a match |
|
496 @rtype bool |
|
497 """ |
|
498 if not self.__isEnabled: |
|
499 return False |
|
500 |
|
501 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): |
|
502 return True |
|
503 |
|
504 if len(self.__blockedDomains) == 0: |
|
505 for dom in self.__allowedDomains: |
|
506 if self.__isMatchingDomain(domain, dom): |
|
507 return True |
|
508 elif len(self.__allowedDomains) == 0: |
|
509 for dom in self.__blockedDomains: |
|
510 if self.__isMatchingDomain(domain, dom): |
|
511 return False |
|
512 return True |
|
513 else: |
|
514 for dom in self.__blockedDomains: |
|
515 if self.__isMatchingDomain(domain, dom): |
|
516 return False |
|
517 for dom in self.__allowedDomains: |
|
518 if self.__isMatchingDomain(domain, dom): |
|
519 return True |
|
520 |
|
521 return False |
|
522 |
|
523 def matchThirdParty(self, req): |
|
524 """ |
|
525 Public method to match a third-party rule. |
|
526 |
|
527 @param req request object to check |
|
528 @type QWebEngineUrlRequestInfo |
|
529 @return flag indicating a match |
|
530 @rtype boolean |
|
531 """ |
|
532 # Third-party matching should be performed on second-level domains |
|
533 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) |
|
534 host = toSecondLevelDomain(req.requestUrl()) |
|
535 |
|
536 match = firstPartyHost != host |
|
537 |
|
538 if self.__hasException(AdBlockRuleOption.ThirdPartyOption): |
|
539 return not match |
|
540 else: |
|
541 return match |
|
542 |
|
543 def matchObject(self, req): |
|
544 """ |
|
545 Public method to match an object rule. |
|
546 |
|
547 @param req request object to check |
|
548 @type QWebEngineUrlRequestInfo |
|
549 @return flag indicating a match |
|
550 @rtype bool |
|
551 """ |
|
552 match = ( |
|
553 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeObject) |
|
554 |
|
555 if self.__hasException(AdBlockRuleOption.ObjectOption): |
|
556 return not match |
|
557 else: |
|
558 return match |
|
559 |
|
560 def matchSubdocument(self, req): |
|
561 """ |
|
562 Public method to match a sub-document rule. |
|
563 |
|
564 @param req request object to check |
|
565 @type QWebEngineUrlRequestInfo |
|
566 @return flag indicating a match |
|
567 @rtype boolean |
|
568 """ |
|
569 match = ( |
|
570 req.resourceType() == |
|
571 QWebEngineUrlRequestInfo.ResourceTypeSubFrame) |
|
572 |
|
573 if self.__hasException(AdBlockRuleOption.SubdocumentOption): |
|
574 return not match |
|
575 else: |
|
576 return match |
|
577 |
|
578 def matchXmlHttpRequest(self, req): |
|
579 """ |
|
580 Public method to match a XmlHttpRequest rule. |
|
581 |
|
582 @param req request object to check |
|
583 @type QWebEngineUrlRequestInfo |
|
584 @return flag indicating a match |
|
585 @rtype bool |
|
586 """ |
|
587 match = ( |
|
588 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr) |
|
589 |
|
590 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): |
|
591 return not match |
|
592 else: |
|
593 return match |
|
594 |
|
595 def matchImage(self, req): |
|
596 """ |
|
597 Public method to match an Image rule. |
|
598 |
|
599 @param req request object to check |
|
600 @type QWebEngineUrlRequestInfo |
|
601 @return flag indicating a match |
|
602 @rtype bool |
|
603 """ |
|
604 match = ( |
|
605 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeImage) |
|
606 |
|
607 if self.__hasException(AdBlockRuleOption.ImageOption): |
|
608 return not match |
|
609 else: |
|
610 return match |
|
611 |
|
612 def matchScript(self, req): |
|
613 """ |
|
614 Public method to match a Script rule. |
|
615 |
|
616 @param req request object to check |
|
617 @type QWebEngineUrlRequestInfo |
|
618 @return flag indicating a match |
|
619 @rtype bool |
|
620 """ |
|
621 match = ( |
|
622 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeScript) |
|
623 |
|
624 if self.__hasException(AdBlockRuleOption.ScriptOption): |
|
625 return not match |
|
626 else: |
|
627 return match |
|
628 |
|
629 def matchStyleSheet(self, req): |
|
630 """ |
|
631 Public method to match a StyleSheet rule. |
|
632 |
|
633 @param req request object to check |
|
634 @type QWebEngineUrlRequestInfo |
|
635 @return flag indicating a match |
|
636 @rtype bool |
|
637 """ |
|
638 match = ( |
|
639 req.resourceType() == |
|
640 QWebEngineUrlRequestInfo.ResourceTypeStylesheet) |
|
641 |
|
642 if self.__hasException(AdBlockRuleOption.StyleSheetOption): |
|
643 return not match |
|
644 else: |
|
645 return match |
|
646 |
|
647 def matchObjectSubrequest(self, req): |
|
648 """ |
|
649 Public method to match an Object Subrequest rule. |
|
650 |
|
651 @param req request object to check |
|
652 @type QWebEngineUrlRequestInfo |
|
653 @return flag indicating a match |
|
654 @rtype boolean |
|
655 """ |
|
656 match = ( |
|
657 req.resourceType() == |
|
658 QWebEngineUrlRequestInfo.ResourceTypeSubResource) |
|
659 if qVersionTuple() >= (5, 7, 0) and PYQT_VERSION >= 0x50700: |
|
660 match = match or ( |
|
661 req.resourceType() == |
|
662 QWebEngineUrlRequestInfo.ResourceTypePluginResource) |
|
663 |
|
664 if self.__objectSubrequestException: |
|
665 return not match |
|
666 else: |
|
667 return match |
|
668 |
|
669 def matchPing(self, req): |
|
670 """ |
|
671 Public method to match a Ping rule. |
|
672 |
|
673 @param req request object to check |
|
674 @type QWebEngineUrlRequestInfo |
|
675 @return flag indicating a match |
|
676 @rtype bool |
|
677 """ |
|
678 match = ( |
|
679 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypePing) |
|
680 |
|
681 if self.__hasException(AdBlockRuleOption.PingOption): |
|
682 return not match |
|
683 else: |
|
684 return match |
|
685 |
|
686 def matchMedia(self, req): |
|
687 """ |
|
688 Public method to match a Media rule. |
|
689 |
|
690 @param req request object to check |
|
691 @type QWebEngineUrlRequestInfo |
|
692 @return flag indicating a match |
|
693 @rtype bool |
|
694 """ |
|
695 match = ( |
|
696 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeMedia) |
|
697 |
|
698 if self.__hasException(AdBlockRuleOption.MediaOption): |
|
699 return not match |
|
700 else: |
|
701 return match |
|
702 |
|
703 def matchFont(self, req): |
|
704 """ |
|
705 Public method to match a Font rule. |
|
706 |
|
707 @param req request object to check |
|
708 @type QWebEngineUrlRequestInfo |
|
709 @return flag indicating a match |
|
710 @rtype bool |
|
711 """ |
|
712 match = ( |
|
713 req.resourceType() == |
|
714 QWebEngineUrlRequestInfo.ResourceTypeFontResource) |
|
715 |
|
716 if self.__hasException(AdBlockRuleOption.FontOption): |
|
717 return not match |
|
718 else: |
|
719 return match |
|
720 |
|
721 def matchOther(self, req): |
|
722 """ |
|
723 Public method to match any other rule. |
|
724 |
|
725 @param req request object to check |
|
726 @type QWebEngineUrlRequestInfo |
|
727 @return flag indicating a match |
|
728 @rtype bool |
|
729 """ |
|
730 match = req.resourceType() in [ |
|
731 QWebEngineUrlRequestInfo.ResourceTypeSubResource, |
|
732 QWebEngineUrlRequestInfo.ResourceTypeWorker, |
|
733 QWebEngineUrlRequestInfo.ResourceTypeSharedWorker, |
|
734 QWebEngineUrlRequestInfo.ResourceTypeServiceWorker, |
|
735 QWebEngineUrlRequestInfo.ResourceTypePrefetch, |
|
736 QWebEngineUrlRequestInfo.ResourceTypeFavicon, |
|
737 QWebEngineUrlRequestInfo.ResourceTypeUnknown, |
|
738 ] |
|
739 |
|
740 if self.__hasException(AdBlockRuleOption.OtherOption): |
|
741 return not match |
|
742 else: |
|
743 return match |
|
744 |
|
745 def isException(self): |
|
746 """ |
|
747 Public method to check, if the rule defines an exception. |
|
748 |
|
749 @return flag indicating an exception |
|
750 @rtype bool |
|
751 """ |
|
752 return self.__isException |
|
753 |
|
754 def setException(self, exception): |
|
755 """ |
|
756 Public method to set the rule's exception flag. |
|
757 |
|
758 @param exception flag indicating an exception rule |
|
759 @type bool |
|
760 """ |
|
761 self.__isException = exception |
|
762 |
|
763 def isEnabled(self): |
|
764 """ |
|
765 Public method to check, if the rule is enabled. |
|
766 |
|
767 @return flag indicating enabled state |
|
768 @rtype bool |
|
769 """ |
|
770 return self.__isEnabled |
|
771 |
|
772 def setEnabled(self, enabled): |
|
773 """ |
|
774 Public method to set the rule's enabled state. |
|
775 |
|
776 @param enabled flag indicating the new enabled state |
|
777 @type bool |
|
778 """ |
|
779 self.__isEnabled = enabled |
|
780 |
|
781 def isCSSRule(self): |
|
782 """ |
|
783 Public method to check, if the rule is a CSS rule. |
|
784 |
|
785 @return flag indicating a CSS rule |
|
786 @rtype bool |
|
787 """ |
|
788 return self.__type == AdBlockRuleType.CssRule |
|
789 |
|
790 def cssSelector(self): |
|
791 """ |
|
792 Public method to get the CSS selector of the rule. |
|
793 |
|
794 @return CSS selector |
|
795 @rtype str |
|
796 """ |
|
797 return self.__matchString |
|
798 |
|
799 def isDocument(self): |
|
800 """ |
|
801 Public method to check, if this is a document rule. |
|
802 |
|
803 @return flag indicating a document rule |
|
804 @rtype bool |
|
805 """ |
|
806 return self.__hasOption(AdBlockRuleOption.DocumentOption) |
|
807 |
|
808 def isElementHiding(self): |
|
809 """ |
|
810 Public method to check, if this is an element hiding rule. |
|
811 |
|
812 @return flag indicating an element hiding rule |
|
813 @rtype bool |
|
814 """ |
|
815 return self.__hasOption(AdBlockRuleOption.ElementHideOption) |
|
816 |
|
817 def isDomainRestricted(self): |
|
818 """ |
|
819 Public method to check, if this rule is restricted by domain. |
|
820 |
|
821 @return flag indicating a domain restriction |
|
822 @rtype bool |
|
823 """ |
|
824 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) |
|
825 |
|
826 def isComment(self): |
|
827 """ |
|
828 Public method to check, if this is a comment. |
|
829 |
|
830 @return flag indicating a comment |
|
831 @rtype bool |
|
832 """ |
|
833 return self.__filter.startswith("!") |
|
834 |
|
835 def isHeader(self): |
|
836 """ |
|
837 Public method to check, if this is a header. |
|
838 |
|
839 @return flag indicating a header |
|
840 @rtype bool |
|
841 """ |
|
842 return self.__filter.startswith("[Adblock") |
|
843 |
|
844 def isSlow(self): |
|
845 """ |
|
846 Public method to check, if this is a slow rule. |
|
847 |
|
848 @return flag indicating a slow rule |
|
849 @rtype bool |
|
850 """ |
|
851 return self.__regExp is not None |
|
852 |
|
853 def isInternalDisabled(self): |
|
854 """ |
|
855 Public method to check, if this rule was disabled internally. |
|
856 |
|
857 @return flag indicating an internally disabled rule |
|
858 @rtype bool |
|
859 """ |
|
860 return self.__isInternalDisabled |
|
861 |
|
862 def __convertPatternToRegExp(self, wildcardPattern): |
|
863 """ |
|
864 Private method to convert a wildcard pattern to a regular expression. |
|
865 |
|
866 @param wildcardPattern string containing the wildcard pattern |
|
867 @type str |
|
868 @return string containing a regular expression |
|
869 @rtype string |
|
870 """ |
|
871 pattern = wildcardPattern |
|
872 |
|
873 # remove multiple wildcards |
|
874 pattern = re.sub(r"\*+", "*", pattern) |
|
875 # remove anchors following separator placeholder |
|
876 pattern = re.sub(r"\^\|$", "^", pattern) |
|
877 # remove leading wildcards |
|
878 pattern = re.sub(r"^(\*)", "", pattern) |
|
879 # remove trailing wildcards |
|
880 pattern = re.sub(r"(\*)$", "", pattern) |
|
881 # escape special symbols |
|
882 pattern = re.sub(r"(\W)", r"\\\1", pattern) |
|
883 # process extended anchor at expression start |
|
884 pattern = re.sub( |
|
885 r"^\\\|\\\|", |
|
886 r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) |
|
887 # process separator placeholders |
|
888 pattern = re.sub(r"\\\^", r"(?:[^\w\d\-.%]|$)", pattern) |
|
889 # process anchor at expression start |
|
890 pattern = re.sub(r"^\\\|", "^", pattern) |
|
891 # process anchor at expression end |
|
892 pattern = re.sub(r"\\\|$", "$", pattern) |
|
893 # replace wildcards by .* |
|
894 pattern = re.sub(r"\\\*", ".*", pattern) |
|
895 |
|
896 return pattern |
|
897 |
|
898 def __hasOption(self, opt): |
|
899 """ |
|
900 Private method to check, if the given option has been set. |
|
901 |
|
902 @param opt option to check for |
|
903 @type AdBlockRuleOption |
|
904 @return flag indicating the state of the option |
|
905 @rtype bool |
|
906 """ |
|
907 return bool(self.__options & opt) |
|
908 |
|
909 def setOption(self, opt): |
|
910 """ |
|
911 Public method to set the given option. |
|
912 |
|
913 @param opt option to be set |
|
914 @type AdBlockRuleOption |
|
915 """ |
|
916 self.__options |= opt |
|
917 |
|
918 def __hasException(self, opt): |
|
919 """ |
|
920 Private method to check, if the given option has been set as an |
|
921 exception. |
|
922 |
|
923 @param opt option to check for |
|
924 @type AdBlockRuleOption |
|
925 @return flag indicating the exception state of the option |
|
926 @rtype bool |
|
927 """ |
|
928 return bool(self.__exceptions & opt) |
|
929 |
|
930 def __setException(self, opt, on): |
|
931 """ |
|
932 Private method to set the given option as an exception. |
|
933 |
|
934 @param opt option to be set |
|
935 @type AdBlockRuleOption |
|
936 @param on flag indicating to set or unset the exception |
|
937 @type bool |
|
938 """ |
|
939 if on: |
|
940 self.__exceptions |= opt |
|
941 else: |
|
942 self.__exceptions &= ~opt |
|
943 |
|
944 def __filterIsOnlyDomain(self, filterString): |
|
945 """ |
|
946 Private method to check, if the given filter is a domain only filter. |
|
947 |
|
948 @param filterString filter string to be checked |
|
949 @type str |
|
950 @return flag indicating a domain only filter |
|
951 @rtype bool |
|
952 """ |
|
953 if not filterString.endswith("^") or not filterString.startswith("||"): |
|
954 return False |
|
955 |
|
956 for filterChar in filterString: |
|
957 if filterChar in ["/", ":", "?", "=", "&", "*"]: |
|
958 return False |
|
959 |
|
960 return True |
|
961 |
|
962 def __filterIsOnlyEndsMatch(self, filterString): |
|
963 """ |
|
964 Private method to check, if the given filter is to match against the |
|
965 end of a string. |
|
966 |
|
967 @param filterString filter string to be checked |
|
968 @type str |
|
969 @return flag indicating a end of string match filter |
|
970 @rtype bool |
|
971 """ |
|
972 index = 0 |
|
973 for filterChar in filterString: |
|
974 if filterChar in ["^", "*"]: |
|
975 return False |
|
976 elif filterChar == "|": |
|
977 return bool(index == len(filterString) - 1) |
|
978 index += 1 |
|
979 |
|
980 return False |
|
981 |
|
982 def __isMatchingDomain(self, domain, filterString): |
|
983 """ |
|
984 Private method to check, if a given domain matches the given filter |
|
985 string. |
|
986 |
|
987 @param domain domain to be checked |
|
988 @type str |
|
989 @param filterString filter string to check against |
|
990 @type str |
|
991 @return flag indicating a match |
|
992 @rtype bool |
|
993 """ |
|
994 if filterString == domain: |
|
995 return True |
|
996 |
|
997 if not domain.endswith(filterString): |
|
998 return False |
|
999 |
|
1000 index = domain.find(filterString) |
|
1001 |
|
1002 return bool(index > 0 and domain[index - 1] == ".") |
|
1003 |
|
1004 def __isMatchingRegExpStrings(self, url): |
|
1005 """ |
|
1006 Private method to check the given URL against the fixed parts of |
|
1007 the regexp. |
|
1008 |
|
1009 @param url URL to be checked |
|
1010 @type str |
|
1011 @return flag indicating a match |
|
1012 @rtype bool |
|
1013 """ |
|
1014 assert self.__regExp is not None |
|
1015 |
|
1016 for matcher in self.__stringMatchers: |
|
1017 if matcher not in url: |
|
1018 return False |
|
1019 |
|
1020 return True |
|
1021 |
|
1022 def __parseRegExpFilter(self, filterString): |
|
1023 """ |
|
1024 Private method to split the given regular expression into strings that |
|
1025 can be used with 'in'. |
|
1026 |
|
1027 @param filterString regexp filter string to be parsed |
|
1028 @type str |
|
1029 @return fixed string parts of the filter |
|
1030 @rtype list of str |
|
1031 """ |
|
1032 matchers = [] |
|
1033 |
|
1034 startPos = -1 |
|
1035 for index in range(len(filterString)): |
|
1036 filterChar = filterString[index] |
|
1037 if filterChar in ["|", "*", "^"]: |
|
1038 sub = filterString[startPos:index] |
|
1039 if len(sub) > 1: |
|
1040 matchers.append(sub) |
|
1041 startPos = index + 1 |
|
1042 |
|
1043 sub = filterString[startPos:] |
|
1044 if len(sub) > 1: |
|
1045 matchers.append(sub) |
|
1046 |
|
1047 return list(set(matchers)) |
|
1048 |
|
1049 def ruleType(self): |
|
1050 """ |
|
1051 Public method to get the rule type. |
|
1052 |
|
1053 @return rule type |
|
1054 @rtype AdBlockRuleType |
|
1055 """ |
|
1056 return self.__type |
|
1057 |
|
1058 def ruleOptions(self): |
|
1059 """ |
|
1060 Public method to get the rule options. |
|
1061 |
|
1062 @return rule options |
|
1063 @rtype AdBlockRuleOption |
|
1064 """ |
|
1065 return self.__options |
|
1066 |
|
1067 def ruleExceptions(self): |
|
1068 """ |
|
1069 Public method to get the rule exceptions. |
|
1070 |
|
1071 @return rule exceptions |
|
1072 @rtype AdBlockRuleOption |
|
1073 """ |
|
1074 return self.__exceptions |
|
1075 |
|
1076 def matchString(self): |
|
1077 """ |
|
1078 Public method to get the match string. |
|
1079 |
|
1080 @return match string |
|
1081 @rtype str |
|
1082 """ |
|
1083 return self.__matchString |
|
1084 |
|
1085 def caseSensitivity(self): |
|
1086 """ |
|
1087 Public method to get the case sensitivity. |
|
1088 |
|
1089 @return case sensitivity |
|
1090 @rtype Qt.CaseSensitivity |
|
1091 """ |
|
1092 return self.__caseSensitivity |
|
1093 |
|
1094 def allowedDomains(self): |
|
1095 """ |
|
1096 Public method to get a copy of the list of allowed domains. |
|
1097 |
|
1098 @return list of allowed domains |
|
1099 @rtype list of str |
|
1100 """ |
|
1101 return self.__allowedDomains[:] |
|
1102 |
|
1103 def blockedDomains(self): |
|
1104 """ |
|
1105 Public method to get a copy of the list of blocked domains. |
|
1106 |
|
1107 @return list of blocked domains |
|
1108 @rtype list of str |
|
1109 """ |
|
1110 return self.__blockedDomains[:] |
|
1111 |
|
1112 def addBlockedDomains(self, domains): |
|
1113 """ |
|
1114 Public method to add to the list of blocked domains. |
|
1115 |
|
1116 @param domains list of domains to be added |
|
1117 @type str or list of str |
|
1118 """ |
|
1119 if isinstance(domains, list): |
|
1120 self.__blockedDomains.extend(domains) |
|
1121 else: |
|
1122 self.__blockedDomains.append(domains) |
|
1123 |
|
1124 def getRegExpAndMatchers(self): |
|
1125 """ |
|
1126 Public method to get the regular expression and associated string |
|
1127 matchers. |
|
1128 |
|
1129 @return tuple containing the regular expression and the list of |
|
1130 string matchers |
|
1131 @rtype tuple of (QRegExp, list of str) |
|
1132 """ |
|
1133 if self.__regExp is not None: |
|
1134 return (QRegExp(self.__regExp), self.__stringMatchers[:]) |
|
1135 else: |
|
1136 return (None, []) |
|
1137 |
|
1138 def copyFrom(self, other): |
|
1139 """ |
|
1140 Public method to copy another AdBlock rule. |
|
1141 |
|
1142 @param other reference to the AdBlock rule to copy from |
|
1143 @type AdBlockRule |
|
1144 """ |
|
1145 self.__subscription = other.subscription() |
|
1146 self.__type = other.ruleType() |
|
1147 self.__options = other.ruleOptions() |
|
1148 self.__exceptions = other.ruleExceptions() |
|
1149 self.__filter = other.filter() |
|
1150 self.__matchString = other.matchString() |
|
1151 self.__caseSensitivity = other.caseSensitivity() |
|
1152 self.__isEnabled = other.isEnabled() |
|
1153 self.__isException = other.isException() |
|
1154 self.__isInternalDisabled = other.isInternalDisabled() |
|
1155 self.__allowedDomains = other.allowedDomains() |
|
1156 self.__blockedDomains = other.blockedDomains() |
|
1157 self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers() |