|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2009 - 2022 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock rule class. |
|
8 """ |
|
9 |
|
10 import re |
|
11 from enum import IntEnum |
|
12 |
|
13 from PyQt6.QtCore import Qt |
|
14 from PyQt6.QtWebEngineCore import QWebEngineUrlRequestInfo |
|
15 |
|
16 from EricNetwork import EricTldExtractor |
|
17 |
|
18 |
|
19 def toSecondLevelDomain(url): |
|
20 """ |
|
21 Module function to get a second level domain from the given URL. |
|
22 |
|
23 @param url URL to extract domain from |
|
24 @type QUrl |
|
25 @return name of second level domain |
|
26 @rtype str |
|
27 """ |
|
28 topLevelDomain = EricTldExtractor.instance().tld(url.host()) |
|
29 urlHost = url.host() |
|
30 |
|
31 if not topLevelDomain or not urlHost: |
|
32 return "" |
|
33 |
|
34 domain = urlHost[:len(urlHost) - len(topLevelDomain)] |
|
35 if domain.count(".") == 0: |
|
36 return urlHost |
|
37 |
|
38 while domain.count(".") != 0: |
|
39 domain = domain[domain.find(".") + 1:] |
|
40 |
|
41 return domain + topLevelDomain |
|
42 |
|
43 |
|
44 class AdBlockRuleType(IntEnum): |
|
45 """ |
|
46 Class implementing the rule type enum. |
|
47 """ |
|
48 CssRule = 0 |
|
49 DomainMatchRule = 1 |
|
50 RegExpMatchRule = 2 |
|
51 StringEndsMatchRule = 3 |
|
52 StringContainsMatchRule = 4 |
|
53 MatchAllUrlsRule = 5 |
|
54 Invalid = 6 |
|
55 |
|
56 |
|
57 class AdBlockRuleOption(IntEnum): |
|
58 """ |
|
59 Class implementing the rule option enum. |
|
60 """ |
|
61 NoOption = 0 |
|
62 DomainRestrictedOption = 1 |
|
63 ThirdPartyOption = 2 |
|
64 ObjectOption = 4 |
|
65 SubdocumentOption = 8 |
|
66 XMLHttpRequestOption = 16 |
|
67 ImageOption = 32 |
|
68 ScriptOption = 64 |
|
69 StyleSheetOption = 128 |
|
70 ObjectSubrequestOption = 256 |
|
71 PingOption = 512 |
|
72 MediaOption = 1024 |
|
73 FontOption = 2048 |
|
74 OtherOption = 4096 |
|
75 |
|
76 # Exception only options |
|
77 DocumentOption = 8192 |
|
78 ElementHideOption = 16384 |
|
79 |
|
80 |
|
81 class AdBlockRule: |
|
82 """ |
|
83 Class implementing the AdBlock rule. |
|
84 """ |
|
85 def __init__(self, filterRule="", subscription=None): |
|
86 """ |
|
87 Constructor |
|
88 |
|
89 @param filterRule filter string of the rule |
|
90 @type str |
|
91 @param subscription reference to the subscription object |
|
92 @type AdBlockSubscription |
|
93 """ |
|
94 self.__subscription = subscription |
|
95 |
|
96 self.__regExp = None |
|
97 self.__stringMatchers = [] |
|
98 |
|
99 self.__blockedDomains = [] |
|
100 self.__allowedDomains = [] |
|
101 |
|
102 self.__isEnabled = True |
|
103 self.__isException = False |
|
104 self.__isInternalDisabled = False |
|
105 self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive |
|
106 |
|
107 self.__type = AdBlockRuleType.StringContainsMatchRule |
|
108 self.__options = AdBlockRuleOption.NoOption |
|
109 self.__exceptions = AdBlockRuleOption.NoOption |
|
110 |
|
111 self.setFilter(filterRule) |
|
112 |
|
113 def subscription(self): |
|
114 """ |
|
115 Public method to get the subscription this rule belongs to. |
|
116 |
|
117 @return subscription of the rule |
|
118 @rtype AdBlockSubscription |
|
119 """ |
|
120 return self.__subscription |
|
121 |
|
122 def setSubscription(self, subscription): |
|
123 """ |
|
124 Public method to set the subscription this rule belongs to. |
|
125 |
|
126 @param subscription subscription of the rule |
|
127 @type AdBlockSubscription |
|
128 """ |
|
129 self.__subscription = subscription |
|
130 |
|
131 def filter(self): |
|
132 """ |
|
133 Public method to get the rule filter string. |
|
134 |
|
135 @return rule filter string |
|
136 @rtype str |
|
137 """ |
|
138 return self.__filter |
|
139 |
|
140 def setFilter(self, filterRule): |
|
141 """ |
|
142 Public method to set the rule filter string. |
|
143 |
|
144 @param filterRule rule filter string |
|
145 @type str |
|
146 """ |
|
147 self.__filter = filterRule |
|
148 self.__parseFilter() |
|
149 |
|
150 def __parseFilter(self): |
|
151 """ |
|
152 Private method to parse the filter pattern. |
|
153 """ |
|
154 parsedLine = self.__filter |
|
155 |
|
156 # empty rule or just a comment |
|
157 if not parsedLine.strip() or parsedLine.startswith("!"): |
|
158 self.__isEnabled = False |
|
159 self.__isInternalDisabled = True |
|
160 self.__type = AdBlockRuleType.Invalid |
|
161 return |
|
162 |
|
163 # CSS element hiding rule |
|
164 if "##" in parsedLine or "#@#" in parsedLine: |
|
165 self.__type = AdBlockRuleType.CssRule |
|
166 pos = parsedLine.find("#") |
|
167 |
|
168 # domain restricted rule |
|
169 if not parsedLine.startswith("##"): |
|
170 domains = parsedLine[:pos] |
|
171 self.__parseDomains(domains, ",") |
|
172 |
|
173 self.__isException = parsedLine[pos + 1] == "@" |
|
174 if self.__isException: |
|
175 self.__matchString = parsedLine[pos + 3:] |
|
176 else: |
|
177 self.__matchString = parsedLine[pos + 2:] |
|
178 |
|
179 # CSS rule cannot have more options -> stop parsing |
|
180 return |
|
181 |
|
182 # Exception always starts with @@ |
|
183 if parsedLine.startswith("@@"): |
|
184 self.__isException = True |
|
185 parsedLine = parsedLine[2:] |
|
186 |
|
187 # Parse all options following '$' character |
|
188 optionsIndex = parsedLine.find("$") |
|
189 if optionsIndex >= 0: |
|
190 options = [opt |
|
191 for opt in parsedLine[optionsIndex + 1:].split(",") |
|
192 if opt] |
|
193 |
|
194 handledOptions = 0 |
|
195 for option in options: |
|
196 if option.startswith("domain="): |
|
197 self.__parseDomains(option[7:], "|") |
|
198 handledOptions += 1 |
|
199 elif option == "match-case": |
|
200 self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive |
|
201 handledOptions += 1 |
|
202 elif option.endswith("third-party"): |
|
203 self.setOption(AdBlockRuleOption.ThirdPartyOption) |
|
204 self.__setException(AdBlockRuleOption.ThirdPartyOption, |
|
205 option.startswith("~")) |
|
206 handledOptions += 1 |
|
207 elif option.endswith("object"): |
|
208 self.setOption(AdBlockRuleOption.ObjectOption) |
|
209 self.__setException(AdBlockRuleOption.ObjectOption, |
|
210 option.startswith("~")) |
|
211 handledOptions += 1 |
|
212 elif option.endswith("subdocument"): |
|
213 self.setOption(AdBlockRuleOption.SubdocumentOption) |
|
214 self.__setException(AdBlockRuleOption.SubdocumentOption, |
|
215 option.startswith("~")) |
|
216 handledOptions += 1 |
|
217 elif option.endswith("xmlhttprequest"): |
|
218 self.setOption(AdBlockRuleOption.XMLHttpRequestOption) |
|
219 self.__setException(AdBlockRuleOption.XMLHttpRequestOption, |
|
220 option.startswith("~")) |
|
221 handledOptions += 1 |
|
222 elif option.endswith("image"): |
|
223 self.setOption(AdBlockRuleOption.ImageOption) |
|
224 self.__setException(AdBlockRuleOption.ImageOption, |
|
225 option.startswith("~")) |
|
226 elif option.endswith("script"): |
|
227 self.setOption(AdBlockRuleOption.ScriptOption) |
|
228 self.__setException(AdBlockRuleOption.ScriptOption, |
|
229 option.startswith("~")) |
|
230 elif option.endswith("stylesheet"): |
|
231 self.setOption(AdBlockRuleOption.StyleSheetOption) |
|
232 self.__setException(AdBlockRuleOption.StyleSheetOption, |
|
233 option.startswith("~")) |
|
234 elif option.endswith("object-subrequest"): |
|
235 self.setOption(AdBlockRuleOption.ObjectSubrequestOption) |
|
236 self.__setException( |
|
237 AdBlockRuleOption.ObjectSubrequestOption, |
|
238 option.startswith("~")) |
|
239 elif option.endswith("ping"): |
|
240 self.setOption(AdBlockRuleOption.PingOption) |
|
241 self.__setException(AdBlockRuleOption.PingOption, |
|
242 option.startswith("~")) |
|
243 elif option.endswith("media"): |
|
244 self.setOption(AdBlockRuleOption.MediaOption) |
|
245 self.__setException(AdBlockRuleOption.MediaOption, |
|
246 option.startswith("~")) |
|
247 elif option.endswith("font"): |
|
248 self.setOption(AdBlockRuleOption.FontOption) |
|
249 self.__setException(AdBlockRuleOption.FontOption, |
|
250 option.startswith("~")) |
|
251 elif option.endswith("other"): |
|
252 self.setOption(AdBlockRuleOption.OtherOption) |
|
253 self.__setException(AdBlockRuleOption.OtherOption, |
|
254 option.startswith("~")) |
|
255 elif option == "document" and self.__isException: |
|
256 self.setOption(AdBlockRuleOption.DocumentOption) |
|
257 handledOptions += 1 |
|
258 elif option == "elemhide" and self.__isException: |
|
259 self.setOption(AdBlockRuleOption.ElementHideOption) |
|
260 handledOptions += 1 |
|
261 elif option == "collapse": |
|
262 # Hiding placeholders of blocked elements is enabled by |
|
263 # default |
|
264 handledOptions += 1 |
|
265 |
|
266 # If we don't handle all options, it's safer to just disable |
|
267 # this rule |
|
268 if handledOptions != len(options): |
|
269 self.__isInternalDisabled = True |
|
270 self.__type = AdBlockRuleType.Invalid |
|
271 return |
|
272 |
|
273 parsedLine = parsedLine[:optionsIndex] |
|
274 |
|
275 # Rule is classic regexp |
|
276 if parsedLine.startswith("/") and parsedLine.endswith("/"): |
|
277 parsedLine = parsedLine[1:-1] |
|
278 self.__type = AdBlockRuleType.RegExpMatchRule |
|
279 if self.__caseSensitivity: |
|
280 self.__regExp = re.compile(parsedLine) |
|
281 else: |
|
282 self.__regExp = re.compile(parsedLine, re.IGNORECASE) |
|
283 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
|
284 return |
|
285 |
|
286 # Remove starting / ending wildcards (*) |
|
287 if parsedLine.startswith("*"): |
|
288 parsedLine = parsedLine[1:] |
|
289 if parsedLine.endswith("*"): |
|
290 parsedLine = parsedLine[:-1] |
|
291 |
|
292 # Fast string matching for domain here |
|
293 if self.__filterIsOnlyDomain(parsedLine): |
|
294 parsedLine = parsedLine[2:-1] |
|
295 self.__type = AdBlockRuleType.DomainMatchRule |
|
296 self.__matchString = parsedLine |
|
297 return |
|
298 |
|
299 # If rule contains '|' only at the end, string matching can be used |
|
300 if self.__filterIsOnlyEndsMatch(parsedLine): |
|
301 parsedLine = parsedLine[:-1] |
|
302 self.__type = AdBlockRuleType.StringEndsMatchRule |
|
303 self.__matchString = parsedLine |
|
304 return |
|
305 |
|
306 # If there is still a wildcard (*) or separator (^) or (|), |
|
307 # the rule must be modified to comply with re. |
|
308 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: |
|
309 self.__type = AdBlockRuleType.RegExpMatchRule |
|
310 pattern = self.__convertPatternToRegExp(parsedLine) |
|
311 if self.__caseSensitivity: |
|
312 self.__regExp = re.compile(pattern) |
|
313 else: |
|
314 self.__regExp = re.compile(pattern, re.IGNORECASE) |
|
315 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
|
316 return |
|
317 |
|
318 # This rule matches all URLs |
|
319 if len(parsedLine) == 0: |
|
320 if self.__options == AdBlockRuleOption.NoOption: |
|
321 self.__isInternalDisabled = True |
|
322 self.__type = AdBlockRuleType.Invalid |
|
323 return |
|
324 |
|
325 self.__type = AdBlockRuleType.MatchAllUrlsRule |
|
326 return |
|
327 |
|
328 # no regexp required |
|
329 self.__type = AdBlockRuleType.StringContainsMatchRule |
|
330 self.__matchString = parsedLine |
|
331 |
|
332 def __parseDomains(self, domains, separator): |
|
333 """ |
|
334 Private method to parse a string with a domain list. |
|
335 |
|
336 @param domains list of domains |
|
337 @type str |
|
338 @param separator separator character used by the list |
|
339 @type str |
|
340 """ |
|
341 domainsList = [d for d in domains.split(separator) if d] |
|
342 |
|
343 for domain in domainsList: |
|
344 if not domain: |
|
345 continue |
|
346 if domain.startswith("~"): |
|
347 self.__blockedDomains.append(domain[1:]) |
|
348 else: |
|
349 self.__allowedDomains.append(domain) |
|
350 |
|
351 if bool(self.__blockedDomains) or bool(self.__allowedDomains): |
|
352 self.setOption(AdBlockRuleOption.DomainRestrictedOption) |
|
353 |
|
354 def networkMatch(self, request, domain, encodedUrl): |
|
355 """ |
|
356 Public method to check the rule for a match. |
|
357 |
|
358 @param request reference to the network request |
|
359 @type QWebEngineUrlRequestInfo |
|
360 @param domain domain name |
|
361 @type str |
|
362 @param encodedUrl string encoded URL to be checked |
|
363 @type str |
|
364 @return flag indicating a match |
|
365 @rtype bool |
|
366 """ |
|
367 if ( |
|
368 self.__type == AdBlockRuleType.CssRule or |
|
369 not self.__isEnabled or |
|
370 self.__isInternalDisabled |
|
371 ): |
|
372 return False |
|
373 |
|
374 matched = self.__stringMatch(domain, encodedUrl) |
|
375 |
|
376 if matched: |
|
377 # check domain restrictions |
|
378 if ( |
|
379 self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and |
|
380 not self.matchDomain(request.firstPartyUrl().host()) |
|
381 ): |
|
382 return False |
|
383 |
|
384 # check third-party restrictions |
|
385 if ( |
|
386 self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and |
|
387 not self.matchThirdParty(request) |
|
388 ): |
|
389 return False |
|
390 |
|
391 # check object restrictions |
|
392 if ( |
|
393 self.__hasOption(AdBlockRuleOption.ObjectOption) and |
|
394 not self.matchObject(request) |
|
395 ): |
|
396 return False |
|
397 |
|
398 # check subdocument restrictions |
|
399 if ( |
|
400 self.__hasOption(AdBlockRuleOption.SubdocumentOption) and |
|
401 not self.matchSubdocument(request) |
|
402 ): |
|
403 return False |
|
404 |
|
405 # check xmlhttprequest restriction |
|
406 if ( |
|
407 self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and |
|
408 not self.matchXmlHttpRequest(request) |
|
409 ): |
|
410 return False |
|
411 |
|
412 # check image restriction |
|
413 if ( |
|
414 self.__hasOption(AdBlockRuleOption.ImageOption) and |
|
415 not self.matchImage(request) |
|
416 ): |
|
417 return False |
|
418 |
|
419 # check script restriction |
|
420 if ( |
|
421 self.__hasOption(AdBlockRuleOption.ScriptOption) and |
|
422 not self.matchScript(request) |
|
423 ): |
|
424 return False |
|
425 |
|
426 # check stylesheet restriction |
|
427 if ( |
|
428 self.__hasOption(AdBlockRuleOption.StyleSheetOption) and |
|
429 not self.matchStyleSheet(request) |
|
430 ): |
|
431 return False |
|
432 |
|
433 # check object-subrequest restriction |
|
434 if ( |
|
435 self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and |
|
436 not self.matchObjectSubrequest(request) |
|
437 ): |
|
438 return False |
|
439 |
|
440 # check ping restriction |
|
441 if ( |
|
442 self.__hasOption(AdBlockRuleOption.PingOption) and |
|
443 not self.matchPing(request) |
|
444 ): |
|
445 return False |
|
446 |
|
447 # check media restriction |
|
448 if ( |
|
449 self.__hasOption(AdBlockRuleOption.MediaOption) and |
|
450 not self.matchMedia(request) |
|
451 ): |
|
452 return False |
|
453 |
|
454 # check font restriction |
|
455 if ( |
|
456 self.__hasOption(AdBlockRuleOption.FontOption) and |
|
457 not self.matchFont(request) |
|
458 ): |
|
459 return False |
|
460 |
|
461 return matched |
|
462 |
|
463 def urlMatch(self, url): |
|
464 """ |
|
465 Public method to check an URL against the rule. |
|
466 |
|
467 @param url URL to check |
|
468 @type QUrl |
|
469 @return flag indicating a match |
|
470 @rtype bool |
|
471 """ |
|
472 if ( |
|
473 not self.__hasOption(AdBlockRuleOption.DocumentOption) and |
|
474 not self.__hasOption(AdBlockRuleOption.ElementHideOption) |
|
475 ): |
|
476 return False |
|
477 |
|
478 encodedUrl = bytes(url.toEncoded()).decode() |
|
479 domain = url.host() |
|
480 return self.__stringMatch(domain, encodedUrl) |
|
481 |
|
482 def __stringMatch(self, domain, encodedUrl): |
|
483 """ |
|
484 Private method to match a domain string. |
|
485 |
|
486 @param domain domain to match |
|
487 @type str |
|
488 @param encodedUrl URL in encoded form |
|
489 @type str |
|
490 @return flag indicating a match |
|
491 @rtype bool |
|
492 """ |
|
493 matched = False |
|
494 |
|
495 if self.__type == AdBlockRuleType.StringContainsMatchRule: |
|
496 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: |
|
497 matched = self.__matchString.lower() in encodedUrl.lower() |
|
498 else: |
|
499 matched = self.__matchString in encodedUrl |
|
500 elif self.__type == AdBlockRuleType.DomainMatchRule: |
|
501 matched = self.__isMatchingDomain(domain, self.__matchString) |
|
502 elif self.__type == AdBlockRuleType.StringEndsMatchRule: |
|
503 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: |
|
504 matched = encodedUrl.lower().endswith( |
|
505 self.__matchString.lower()) |
|
506 else: |
|
507 matched = encodedUrl.endswith(self.__matchString) |
|
508 elif self.__type == AdBlockRuleType.RegExpMatchRule: |
|
509 if not self.__isMatchingRegExpStrings(encodedUrl): |
|
510 matched = False |
|
511 else: |
|
512 matched = self.__regExp.search(encodedUrl) is not None |
|
513 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: |
|
514 matched = True |
|
515 |
|
516 return matched |
|
517 |
|
518 def matchDomain(self, domain): |
|
519 """ |
|
520 Public method to match a domain. |
|
521 |
|
522 @param domain domain name to check |
|
523 @type str |
|
524 @return flag indicating a match |
|
525 @rtype bool |
|
526 """ |
|
527 if not self.__isEnabled: |
|
528 return False |
|
529 |
|
530 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): |
|
531 return True |
|
532 |
|
533 if len(self.__blockedDomains) == 0: |
|
534 return any(self.__isMatchingDomain(domain, dom) |
|
535 for dom in self.__allowedDomains) |
|
536 elif len(self.__allowedDomains) == 0: |
|
537 return all(not self.__isMatchingDomain(domain, dom) |
|
538 for dom in self.__blockedDomains) |
|
539 else: |
|
540 return ( |
|
541 all(not self.__isMatchingDomain(domain, dom) |
|
542 for dom in self.__blockedDomains) and |
|
543 any(self.__isMatchingDomain(domain, dom) |
|
544 for dom in self.__allowedDomains) |
|
545 ) |
|
546 |
|
547 def matchThirdParty(self, req): |
|
548 """ |
|
549 Public method to match a third-party rule. |
|
550 |
|
551 @param req request object to check |
|
552 @type QWebEngineUrlRequestInfo |
|
553 @return flag indicating a match |
|
554 @rtype boolean |
|
555 """ |
|
556 # Third-party matching should be performed on second-level domains |
|
557 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) |
|
558 host = toSecondLevelDomain(req.requestUrl()) |
|
559 |
|
560 match = firstPartyHost != host |
|
561 |
|
562 if self.__hasException(AdBlockRuleOption.ThirdPartyOption): |
|
563 return not match |
|
564 else: |
|
565 return match |
|
566 |
|
567 def matchObject(self, req): |
|
568 """ |
|
569 Public method to match an object rule. |
|
570 |
|
571 @param req request object to check |
|
572 @type QWebEngineUrlRequestInfo |
|
573 @return flag indicating a match |
|
574 @rtype bool |
|
575 """ |
|
576 match = ( |
|
577 req.resourceType() == |
|
578 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject) |
|
579 |
|
580 if self.__hasException(AdBlockRuleOption.ObjectOption): |
|
581 return not match |
|
582 else: |
|
583 return match |
|
584 |
|
585 def matchSubdocument(self, req): |
|
586 """ |
|
587 Public method to match a sub-document rule. |
|
588 |
|
589 @param req request object to check |
|
590 @type QWebEngineUrlRequestInfo |
|
591 @return flag indicating a match |
|
592 @rtype boolean |
|
593 """ |
|
594 match = ( |
|
595 req.resourceType() == |
|
596 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame) |
|
597 |
|
598 if self.__hasException(AdBlockRuleOption.SubdocumentOption): |
|
599 return not match |
|
600 else: |
|
601 return match |
|
602 |
|
603 def matchXmlHttpRequest(self, req): |
|
604 """ |
|
605 Public method to match a XmlHttpRequest rule. |
|
606 |
|
607 @param req request object to check |
|
608 @type QWebEngineUrlRequestInfo |
|
609 @return flag indicating a match |
|
610 @rtype bool |
|
611 """ |
|
612 match = ( |
|
613 req.resourceType() == |
|
614 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr) |
|
615 |
|
616 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): |
|
617 return not match |
|
618 else: |
|
619 return match |
|
620 |
|
621 def matchImage(self, req): |
|
622 """ |
|
623 Public method to match an Image rule. |
|
624 |
|
625 @param req request object to check |
|
626 @type QWebEngineUrlRequestInfo |
|
627 @return flag indicating a match |
|
628 @rtype bool |
|
629 """ |
|
630 match = ( |
|
631 req.resourceType() == |
|
632 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage) |
|
633 |
|
634 if self.__hasException(AdBlockRuleOption.ImageOption): |
|
635 return not match |
|
636 else: |
|
637 return match |
|
638 |
|
639 def matchScript(self, req): |
|
640 """ |
|
641 Public method to match a Script rule. |
|
642 |
|
643 @param req request object to check |
|
644 @type QWebEngineUrlRequestInfo |
|
645 @return flag indicating a match |
|
646 @rtype bool |
|
647 """ |
|
648 match = ( |
|
649 req.resourceType() == |
|
650 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript) |
|
651 |
|
652 if self.__hasException(AdBlockRuleOption.ScriptOption): |
|
653 return not match |
|
654 else: |
|
655 return match |
|
656 |
|
657 def matchStyleSheet(self, req): |
|
658 """ |
|
659 Public method to match a StyleSheet rule. |
|
660 |
|
661 @param req request object to check |
|
662 @type QWebEngineUrlRequestInfo |
|
663 @return flag indicating a match |
|
664 @rtype bool |
|
665 """ |
|
666 match = ( |
|
667 req.resourceType() == |
|
668 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet) |
|
669 |
|
670 if self.__hasException(AdBlockRuleOption.StyleSheetOption): |
|
671 return not match |
|
672 else: |
|
673 return match |
|
674 |
|
675 def matchObjectSubrequest(self, req): |
|
676 """ |
|
677 Public method to match an Object Subrequest rule. |
|
678 |
|
679 @param req request object to check |
|
680 @type QWebEngineUrlRequestInfo |
|
681 @return flag indicating a match |
|
682 @rtype boolean |
|
683 """ |
|
684 match = ( |
|
685 req.resourceType() == |
|
686 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource |
|
687 ) |
|
688 match = match or ( |
|
689 req.resourceType() == |
|
690 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource |
|
691 ) |
|
692 |
|
693 if self.__objectSubrequestException: |
|
694 return not match |
|
695 else: |
|
696 return match |
|
697 |
|
698 def matchPing(self, req): |
|
699 """ |
|
700 Public method to match a Ping rule. |
|
701 |
|
702 @param req request object to check |
|
703 @type QWebEngineUrlRequestInfo |
|
704 @return flag indicating a match |
|
705 @rtype bool |
|
706 """ |
|
707 match = ( |
|
708 req.resourceType() == |
|
709 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing) |
|
710 |
|
711 if self.__hasException(AdBlockRuleOption.PingOption): |
|
712 return not match |
|
713 else: |
|
714 return match |
|
715 |
|
716 def matchMedia(self, req): |
|
717 """ |
|
718 Public method to match a Media rule. |
|
719 |
|
720 @param req request object to check |
|
721 @type QWebEngineUrlRequestInfo |
|
722 @return flag indicating a match |
|
723 @rtype bool |
|
724 """ |
|
725 match = ( |
|
726 req.resourceType() == |
|
727 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia) |
|
728 |
|
729 if self.__hasException(AdBlockRuleOption.MediaOption): |
|
730 return not match |
|
731 else: |
|
732 return match |
|
733 |
|
734 def matchFont(self, req): |
|
735 """ |
|
736 Public method to match a Font rule. |
|
737 |
|
738 @param req request object to check |
|
739 @type QWebEngineUrlRequestInfo |
|
740 @return flag indicating a match |
|
741 @rtype bool |
|
742 """ |
|
743 match = ( |
|
744 req.resourceType() == |
|
745 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource) |
|
746 |
|
747 if self.__hasException(AdBlockRuleOption.FontOption): |
|
748 return not match |
|
749 else: |
|
750 return match |
|
751 |
|
752 def matchOther(self, req): |
|
753 """ |
|
754 Public method to match any other rule. |
|
755 |
|
756 @param req request object to check |
|
757 @type QWebEngineUrlRequestInfo |
|
758 @return flag indicating a match |
|
759 @rtype bool |
|
760 """ |
|
761 match = req.resourceType() in [ |
|
762 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource, |
|
763 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeWorker, |
|
764 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSharedWorker, |
|
765 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker, |
|
766 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch, |
|
767 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon, |
|
768 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown, |
|
769 ] |
|
770 |
|
771 if self.__hasException(AdBlockRuleOption.OtherOption): |
|
772 return not match |
|
773 else: |
|
774 return match |
|
775 |
|
776 def isException(self): |
|
777 """ |
|
778 Public method to check, if the rule defines an exception. |
|
779 |
|
780 @return flag indicating an exception |
|
781 @rtype bool |
|
782 """ |
|
783 return self.__isException |
|
784 |
|
785 def setException(self, exception): |
|
786 """ |
|
787 Public method to set the rule's exception flag. |
|
788 |
|
789 @param exception flag indicating an exception rule |
|
790 @type bool |
|
791 """ |
|
792 self.__isException = exception |
|
793 |
|
794 def isEnabled(self): |
|
795 """ |
|
796 Public method to check, if the rule is enabled. |
|
797 |
|
798 @return flag indicating enabled state |
|
799 @rtype bool |
|
800 """ |
|
801 return self.__isEnabled |
|
802 |
|
803 def setEnabled(self, enabled): |
|
804 """ |
|
805 Public method to set the rule's enabled state. |
|
806 |
|
807 @param enabled flag indicating the new enabled state |
|
808 @type bool |
|
809 """ |
|
810 self.__isEnabled = enabled |
|
811 |
|
812 def isCSSRule(self): |
|
813 """ |
|
814 Public method to check, if the rule is a CSS rule. |
|
815 |
|
816 @return flag indicating a CSS rule |
|
817 @rtype bool |
|
818 """ |
|
819 return self.__type == AdBlockRuleType.CssRule |
|
820 |
|
821 def cssSelector(self): |
|
822 """ |
|
823 Public method to get the CSS selector of the rule. |
|
824 |
|
825 @return CSS selector |
|
826 @rtype str |
|
827 """ |
|
828 return self.__matchString |
|
829 |
|
830 def isDocument(self): |
|
831 """ |
|
832 Public method to check, if this is a document rule. |
|
833 |
|
834 @return flag indicating a document rule |
|
835 @rtype bool |
|
836 """ |
|
837 return self.__hasOption(AdBlockRuleOption.DocumentOption) |
|
838 |
|
839 def isElementHiding(self): |
|
840 """ |
|
841 Public method to check, if this is an element hiding rule. |
|
842 |
|
843 @return flag indicating an element hiding rule |
|
844 @rtype bool |
|
845 """ |
|
846 return self.__hasOption(AdBlockRuleOption.ElementHideOption) |
|
847 |
|
848 def isDomainRestricted(self): |
|
849 """ |
|
850 Public method to check, if this rule is restricted by domain. |
|
851 |
|
852 @return flag indicating a domain restriction |
|
853 @rtype bool |
|
854 """ |
|
855 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) |
|
856 |
|
857 def isComment(self): |
|
858 """ |
|
859 Public method to check, if this is a comment. |
|
860 |
|
861 @return flag indicating a comment |
|
862 @rtype bool |
|
863 """ |
|
864 return self.__filter.startswith("!") |
|
865 |
|
866 def isHeader(self): |
|
867 """ |
|
868 Public method to check, if this is a header. |
|
869 |
|
870 @return flag indicating a header |
|
871 @rtype bool |
|
872 """ |
|
873 return self.__filter.startswith("[Adblock") |
|
874 |
|
875 def isSlow(self): |
|
876 """ |
|
877 Public method to check, if this is a slow rule. |
|
878 |
|
879 @return flag indicating a slow rule |
|
880 @rtype bool |
|
881 """ |
|
882 return self.__regExp is not None |
|
883 |
|
884 def isInternalDisabled(self): |
|
885 """ |
|
886 Public method to check, if this rule was disabled internally. |
|
887 |
|
888 @return flag indicating an internally disabled rule |
|
889 @rtype bool |
|
890 """ |
|
891 return self.__isInternalDisabled |
|
892 |
|
893 def __convertPatternToRegExp(self, wildcardPattern): |
|
894 """ |
|
895 Private method to convert a wildcard pattern to a regular expression. |
|
896 |
|
897 @param wildcardPattern string containing the wildcard pattern |
|
898 @type str |
|
899 @return string containing a regular expression |
|
900 @rtype string |
|
901 """ |
|
902 pattern = wildcardPattern |
|
903 |
|
904 # remove multiple wildcards |
|
905 pattern = re.sub(r"\*+", "*", pattern) |
|
906 # remove anchors following separator placeholder |
|
907 pattern = re.sub(r"\^\|$", "^", pattern) |
|
908 # remove leading wildcards |
|
909 pattern = re.sub(r"^(\*)", "", pattern) |
|
910 # remove trailing wildcards |
|
911 pattern = re.sub(r"(\*)$", "", pattern) |
|
912 # escape special symbols |
|
913 pattern = re.sub(r"(\W)", r"\\\1", pattern) |
|
914 # process extended anchor at expression start |
|
915 pattern = re.sub( |
|
916 r"^\\\|\\\|", |
|
917 r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) |
|
918 # process separator placeholders |
|
919 pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern) |
|
920 # process anchor at expression start |
|
921 pattern = re.sub(r"^\\\|", "^", pattern) |
|
922 # process anchor at expression end |
|
923 pattern = re.sub(r"\\\|$", "$", pattern) |
|
924 # replace wildcards by .* |
|
925 pattern = re.sub(r"\\\*", ".*", pattern) |
|
926 |
|
927 return pattern |
|
928 |
|
929 def __hasOption(self, opt): |
|
930 """ |
|
931 Private method to check, if the given option has been set. |
|
932 |
|
933 @param opt option to check for |
|
934 @type AdBlockRuleOption |
|
935 @return flag indicating the state of the option |
|
936 @rtype bool |
|
937 """ |
|
938 return bool(self.__options & opt) |
|
939 |
|
940 def setOption(self, opt): |
|
941 """ |
|
942 Public method to set the given option. |
|
943 |
|
944 @param opt option to be set |
|
945 @type AdBlockRuleOption |
|
946 """ |
|
947 self.__options |= opt |
|
948 |
|
949 def __hasException(self, opt): |
|
950 """ |
|
951 Private method to check, if the given option has been set as an |
|
952 exception. |
|
953 |
|
954 @param opt option to check for |
|
955 @type AdBlockRuleOption |
|
956 @return flag indicating the exception state of the option |
|
957 @rtype bool |
|
958 """ |
|
959 return bool(self.__exceptions & opt) |
|
960 |
|
961 def __setException(self, opt, on): |
|
962 """ |
|
963 Private method to set the given option as an exception. |
|
964 |
|
965 @param opt option to be set |
|
966 @type AdBlockRuleOption |
|
967 @param on flag indicating to set or unset the exception |
|
968 @type bool |
|
969 """ |
|
970 if on: |
|
971 self.__exceptions |= opt |
|
972 else: |
|
973 self.__exceptions &= ~opt |
|
974 |
|
975 def __filterIsOnlyDomain(self, filterString): |
|
976 """ |
|
977 Private method to check, if the given filter is a domain only filter. |
|
978 |
|
979 @param filterString filter string to be checked |
|
980 @type str |
|
981 @return flag indicating a domain only filter |
|
982 @rtype bool |
|
983 """ |
|
984 if not filterString.endswith("^") or not filterString.startswith("||"): |
|
985 return False |
|
986 |
|
987 return all(filterChar not in ["/", ":", "?", "=", "&", "*"] |
|
988 for filterChar in filterString) |
|
989 |
|
990 def __filterIsOnlyEndsMatch(self, filterString): |
|
991 """ |
|
992 Private method to check, if the given filter is to match against the |
|
993 end of a string. |
|
994 |
|
995 @param filterString filter string to be checked |
|
996 @type str |
|
997 @return flag indicating a end of string match filter |
|
998 @rtype bool |
|
999 """ |
|
1000 for index, filterChar in enumerate(filterString): |
|
1001 # __IGNORE_WARNING_Y111__ |
|
1002 if filterChar in ["^", "*"]: |
|
1003 return False |
|
1004 elif filterChar == "|": |
|
1005 return index == len(filterString) - 1 |
|
1006 |
|
1007 return False |
|
1008 |
|
1009 def __isMatchingDomain(self, domain, filterString): |
|
1010 """ |
|
1011 Private method to check, if a given domain matches the given filter |
|
1012 string. |
|
1013 |
|
1014 @param domain domain to be checked |
|
1015 @type str |
|
1016 @param filterString filter string to check against |
|
1017 @type str |
|
1018 @return flag indicating a match |
|
1019 @rtype bool |
|
1020 """ |
|
1021 if filterString == domain: |
|
1022 return True |
|
1023 |
|
1024 if not domain.endswith(filterString): |
|
1025 return False |
|
1026 |
|
1027 index = domain.find(filterString) |
|
1028 |
|
1029 return bool(index > 0 and domain[index - 1] == ".") |
|
1030 |
|
1031 def __isMatchingRegExpStrings(self, url): |
|
1032 """ |
|
1033 Private method to check the given URL against the fixed parts of |
|
1034 the regexp. |
|
1035 |
|
1036 @param url URL to be checked |
|
1037 @type str |
|
1038 @return flag indicating a match |
|
1039 @rtype bool |
|
1040 """ |
|
1041 if self.__regExp is not None: |
|
1042 return all(matcher in url for matcher in self.__stringMatchers) |
|
1043 |
|
1044 return True |
|
1045 |
|
1046 def __parseRegExpFilter(self, filterString): |
|
1047 """ |
|
1048 Private method to split the given regular expression into strings that |
|
1049 can be used with 'in'. |
|
1050 |
|
1051 @param filterString regexp filter string to be parsed |
|
1052 @type str |
|
1053 @return fixed string parts of the filter |
|
1054 @rtype list of str |
|
1055 """ |
|
1056 matchers = [] |
|
1057 |
|
1058 startPos = -1 |
|
1059 for index in range(len(filterString)): |
|
1060 filterChar = filterString[index] |
|
1061 if filterChar in ["|", "*", "^"]: |
|
1062 sub = filterString[startPos:index] |
|
1063 if len(sub) > 1: |
|
1064 matchers.append(sub) |
|
1065 startPos = index + 1 |
|
1066 |
|
1067 sub = filterString[startPos:] |
|
1068 if len(sub) > 1: |
|
1069 matchers.append(sub) |
|
1070 |
|
1071 return list(set(matchers)) |
|
1072 |
|
1073 def ruleType(self): |
|
1074 """ |
|
1075 Public method to get the rule type. |
|
1076 |
|
1077 @return rule type |
|
1078 @rtype AdBlockRuleType |
|
1079 """ |
|
1080 return self.__type |
|
1081 |
|
1082 def ruleOptions(self): |
|
1083 """ |
|
1084 Public method to get the rule options. |
|
1085 |
|
1086 @return rule options |
|
1087 @rtype AdBlockRuleOption |
|
1088 """ |
|
1089 return self.__options |
|
1090 |
|
1091 def ruleExceptions(self): |
|
1092 """ |
|
1093 Public method to get the rule exceptions. |
|
1094 |
|
1095 @return rule exceptions |
|
1096 @rtype AdBlockRuleOption |
|
1097 """ |
|
1098 return self.__exceptions |
|
1099 |
|
1100 def matchString(self): |
|
1101 """ |
|
1102 Public method to get the match string. |
|
1103 |
|
1104 @return match string |
|
1105 @rtype str |
|
1106 """ |
|
1107 return self.__matchString |
|
1108 |
|
1109 def caseSensitivity(self): |
|
1110 """ |
|
1111 Public method to get the case sensitivity. |
|
1112 |
|
1113 @return case sensitivity |
|
1114 @rtype Qt.CaseSensitivity |
|
1115 """ |
|
1116 return self.__caseSensitivity |
|
1117 |
|
1118 def allowedDomains(self): |
|
1119 """ |
|
1120 Public method to get a copy of the list of allowed domains. |
|
1121 |
|
1122 @return list of allowed domains |
|
1123 @rtype list of str |
|
1124 """ |
|
1125 return self.__allowedDomains[:] |
|
1126 |
|
1127 def blockedDomains(self): |
|
1128 """ |
|
1129 Public method to get a copy of the list of blocked domains. |
|
1130 |
|
1131 @return list of blocked domains |
|
1132 @rtype list of str |
|
1133 """ |
|
1134 return self.__blockedDomains[:] |
|
1135 |
|
1136 def addBlockedDomains(self, domains): |
|
1137 """ |
|
1138 Public method to add to the list of blocked domains. |
|
1139 |
|
1140 @param domains list of domains to be added |
|
1141 @type str or list of str |
|
1142 """ |
|
1143 if isinstance(domains, list): |
|
1144 self.__blockedDomains.extend(domains) |
|
1145 else: |
|
1146 self.__blockedDomains.append(domains) |
|
1147 |
|
1148 def getRegExpAndMatchers(self): |
|
1149 """ |
|
1150 Public method to get the regular expression and associated string |
|
1151 matchers. |
|
1152 |
|
1153 @return tuple containing the regular expression and the list of |
|
1154 string matchers |
|
1155 @rtype tuple of (re.Pattern, list of str) |
|
1156 """ |
|
1157 if self.__regExp is not None: |
|
1158 return (re.compile(self.__regExp.pattern), |
|
1159 self.__stringMatchers[:]) |
|
1160 else: |
|
1161 return (None, []) |
|
1162 |
|
1163 def copyFrom(self, other): |
|
1164 """ |
|
1165 Public method to copy another AdBlock rule. |
|
1166 |
|
1167 @param other reference to the AdBlock rule to copy from |
|
1168 @type AdBlockRule |
|
1169 """ |
|
1170 self.__subscription = other.subscription() |
|
1171 self.__type = other.ruleType() |
|
1172 self.__options = other.ruleOptions() |
|
1173 self.__exceptions = other.ruleExceptions() |
|
1174 self.__filter = other.filter() |
|
1175 self.__matchString = other.matchString() |
|
1176 self.__caseSensitivity = other.caseSensitivity() |
|
1177 self.__isEnabled = other.isEnabled() |
|
1178 self.__isException = other.isException() |
|
1179 self.__isInternalDisabled = other.isInternalDisabled() |
|
1180 self.__allowedDomains = other.allowedDomains() |
|
1181 self.__blockedDomains = other.blockedDomains() |
|
1182 self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers() |