|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2009 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock rule class. |
|
8 """ |
|
9 |
|
10 import re |
|
11 from enum import IntEnum |
|
12 |
|
13 from PyQt5.QtCore import Qt |
|
14 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo |
|
15 |
|
16 |
|
17 def toSecondLevelDomain(url): |
|
18 """ |
|
19 Module function to get a second level domain from the given URL. |
|
20 |
|
21 @param url URL to extract domain from |
|
22 @type QUrl |
|
23 @return name of second level domain |
|
24 @rtype str |
|
25 """ |
|
26 topLevelDomain = url.topLevelDomain() |
|
27 urlHost = url.host() |
|
28 |
|
29 if not topLevelDomain or not urlHost: |
|
30 return "" |
|
31 |
|
32 domain = urlHost[:len(urlHost) - len(topLevelDomain)] |
|
33 if domain.count(".") == 0: |
|
34 return urlHost |
|
35 |
|
36 while domain.count(".") != 0: |
|
37 domain = domain[domain.find(".") + 1:] |
|
38 |
|
39 return domain + topLevelDomain |
|
40 |
|
41 |
|
42 class AdBlockRuleType(IntEnum): |
|
43 """ |
|
44 Class implementing the rule type enum. |
|
45 """ |
|
46 CssRule = 0 |
|
47 DomainMatchRule = 1 |
|
48 RegExpMatchRule = 2 |
|
49 StringEndsMatchRule = 3 |
|
50 StringContainsMatchRule = 4 |
|
51 MatchAllUrlsRule = 5 |
|
52 Invalid = 6 |
|
53 |
|
54 |
|
55 class AdBlockRuleOption(IntEnum): |
|
56 """ |
|
57 Class implementing the rule option enum. |
|
58 """ |
|
59 NoOption = 0 |
|
60 DomainRestrictedOption = 1 |
|
61 ThirdPartyOption = 2 |
|
62 ObjectOption = 4 |
|
63 SubdocumentOption = 8 |
|
64 XMLHttpRequestOption = 16 |
|
65 ImageOption = 32 |
|
66 ScriptOption = 64 |
|
67 StyleSheetOption = 128 |
|
68 ObjectSubrequestOption = 256 |
|
69 PingOption = 512 |
|
70 MediaOption = 1024 |
|
71 FontOption = 2048 |
|
72 OtherOption = 4096 |
|
73 |
|
74 # Exception only options |
|
75 DocumentOption = 8192 |
|
76 ElementHideOption = 16384 |
|
77 |
|
78 |
|
79 class AdBlockRule: |
|
80 """ |
|
81 Class implementing the AdBlock rule. |
|
82 """ |
|
83 def __init__(self, filterRule="", subscription=None): |
|
84 """ |
|
85 Constructor |
|
86 |
|
87 @param filterRule filter string of the rule |
|
88 @type str |
|
89 @param subscription reference to the subscription object |
|
90 @type AdBlockSubscription |
|
91 """ |
|
92 self.__subscription = subscription |
|
93 |
|
94 self.__regExp = None |
|
95 self.__stringMatchers = [] |
|
96 |
|
97 self.__blockedDomains = [] |
|
98 self.__allowedDomains = [] |
|
99 |
|
100 self.__isEnabled = True |
|
101 self.__isException = False |
|
102 self.__isInternalDisabled = False |
|
103 self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive |
|
104 |
|
105 self.__type = AdBlockRuleType.StringContainsMatchRule |
|
106 self.__options = AdBlockRuleOption.NoOption |
|
107 self.__exceptions = AdBlockRuleOption.NoOption |
|
108 |
|
109 self.setFilter(filterRule) |
|
110 |
|
111 def subscription(self): |
|
112 """ |
|
113 Public method to get the subscription this rule belongs to. |
|
114 |
|
115 @return subscription of the rule |
|
116 @rtype AdBlockSubscription |
|
117 """ |
|
118 return self.__subscription |
|
119 |
|
120 def setSubscription(self, subscription): |
|
121 """ |
|
122 Public method to set the subscription this rule belongs to. |
|
123 |
|
124 @param subscription subscription of the rule |
|
125 @type AdBlockSubscription |
|
126 """ |
|
127 self.__subscription = subscription |
|
128 |
|
129 def filter(self): |
|
130 """ |
|
131 Public method to get the rule filter string. |
|
132 |
|
133 @return rule filter string |
|
134 @rtype str |
|
135 """ |
|
136 return self.__filter |
|
137 |
|
138 def setFilter(self, filterRule): |
|
139 """ |
|
140 Public method to set the rule filter string. |
|
141 |
|
142 @param filterRule rule filter string |
|
143 @type str |
|
144 """ |
|
145 self.__filter = filterRule |
|
146 self.__parseFilter() |
|
147 |
|
148 def __parseFilter(self): |
|
149 """ |
|
150 Private method to parse the filter pattern. |
|
151 """ |
|
152 parsedLine = self.__filter |
|
153 |
|
154 # empty rule or just a comment |
|
155 if not parsedLine.strip() or parsedLine.startswith("!"): |
|
156 self.__isEnabled = False |
|
157 self.__isInternalDisabled = True |
|
158 self.__type = AdBlockRuleType.Invalid |
|
159 return |
|
160 |
|
161 # CSS element hiding rule |
|
162 if "##" in parsedLine or "#@#" in parsedLine: |
|
163 self.__type = AdBlockRuleType.CssRule |
|
164 pos = parsedLine.find("#") |
|
165 |
|
166 # domain restricted rule |
|
167 if not parsedLine.startswith("##"): |
|
168 domains = parsedLine[:pos] |
|
169 self.__parseDomains(domains, ",") |
|
170 |
|
171 self.__isException = parsedLine[pos + 1] == "@" |
|
172 if self.__isException: |
|
173 self.__matchString = parsedLine[pos + 3:] |
|
174 else: |
|
175 self.__matchString = parsedLine[pos + 2:] |
|
176 |
|
177 # CSS rule cannot have more options -> stop parsing |
|
178 return |
|
179 |
|
180 # Exception always starts with @@ |
|
181 if parsedLine.startswith("@@"): |
|
182 self.__isException = True |
|
183 parsedLine = parsedLine[2:] |
|
184 |
|
185 # Parse all options following '$' character |
|
186 optionsIndex = parsedLine.find("$") |
|
187 if optionsIndex >= 0: |
|
188 options = [opt |
|
189 for opt in parsedLine[optionsIndex + 1:].split(",") |
|
190 if opt] |
|
191 |
|
192 handledOptions = 0 |
|
193 for option in options: |
|
194 if option.startswith("domain="): |
|
195 self.__parseDomains(option[7:], "|") |
|
196 handledOptions += 1 |
|
197 elif option == "match-case": |
|
198 self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive |
|
199 handledOptions += 1 |
|
200 elif option.endswith("third-party"): |
|
201 self.setOption(AdBlockRuleOption.ThirdPartyOption) |
|
202 self.__setException(AdBlockRuleOption.ThirdPartyOption, |
|
203 option.startswith("~")) |
|
204 handledOptions += 1 |
|
205 elif option.endswith("object"): |
|
206 self.setOption(AdBlockRuleOption.ObjectOption) |
|
207 self.__setException(AdBlockRuleOption.ObjectOption, |
|
208 option.startswith("~")) |
|
209 handledOptions += 1 |
|
210 elif option.endswith("subdocument"): |
|
211 self.setOption(AdBlockRuleOption.SubdocumentOption) |
|
212 self.__setException(AdBlockRuleOption.SubdocumentOption, |
|
213 option.startswith("~")) |
|
214 handledOptions += 1 |
|
215 elif option.endswith("xmlhttprequest"): |
|
216 self.setOption(AdBlockRuleOption.XMLHttpRequestOption) |
|
217 self.__setException(AdBlockRuleOption.XMLHttpRequestOption, |
|
218 option.startswith("~")) |
|
219 handledOptions += 1 |
|
220 elif option.endswith("image"): |
|
221 self.setOption(AdBlockRuleOption.ImageOption) |
|
222 self.__setException(AdBlockRuleOption.ImageOption, |
|
223 option.startswith("~")) |
|
224 elif option.endswith("script"): |
|
225 self.setOption(AdBlockRuleOption.ScriptOption) |
|
226 self.__setException(AdBlockRuleOption.ScriptOption, |
|
227 option.startswith("~")) |
|
228 elif option.endswith("stylesheet"): |
|
229 self.setOption(AdBlockRuleOption.StyleSheetOption) |
|
230 self.__setException(AdBlockRuleOption.StyleSheetOption, |
|
231 option.startswith("~")) |
|
232 elif option.endswith("object-subrequest"): |
|
233 self.setOption(AdBlockRuleOption.ObjectSubrequestOption) |
|
234 self.__setException( |
|
235 AdBlockRuleOption.ObjectSubrequestOption, |
|
236 option.startswith("~")) |
|
237 elif option.endswith("ping"): |
|
238 self.setOption(AdBlockRuleOption.PingOption) |
|
239 self.__setException(AdBlockRuleOption.PingOption, |
|
240 option.startswith("~")) |
|
241 elif option.endswith("media"): |
|
242 self.setOption(AdBlockRuleOption.MediaOption) |
|
243 self.__setException(AdBlockRuleOption.MediaOption, |
|
244 option.startswith("~")) |
|
245 elif option.endswith("font"): |
|
246 self.setOption(AdBlockRuleOption.FontOption) |
|
247 self.__setException(AdBlockRuleOption.FontOption, |
|
248 option.startswith("~")) |
|
249 elif option.endswith("other"): |
|
250 self.setOption(AdBlockRuleOption.OtherOption) |
|
251 self.__setException(AdBlockRuleOption.OtherOption, |
|
252 option.startswith("~")) |
|
253 elif option == "document" and self.__isException: |
|
254 self.setOption(AdBlockRuleOption.DocumentOption) |
|
255 handledOptions += 1 |
|
256 elif option == "elemhide" and self.__isException: |
|
257 self.setOption(AdBlockRuleOption.ElementHideOption) |
|
258 handledOptions += 1 |
|
259 elif option == "collapse": |
|
260 # Hiding placeholders of blocked elements is enabled by |
|
261 # default |
|
262 handledOptions += 1 |
|
263 |
|
264 # If we don't handle all options, it's safer to just disable |
|
265 # this rule |
|
266 if handledOptions != len(options): |
|
267 self.__isInternalDisabled = True |
|
268 self.__type = AdBlockRuleType.Invalid |
|
269 return |
|
270 |
|
271 parsedLine = parsedLine[:optionsIndex] |
|
272 |
|
273 # Rule is classic regexp |
|
274 if parsedLine.startswith("/") and parsedLine.endswith("/"): |
|
275 parsedLine = parsedLine[1:-1] |
|
276 self.__type = AdBlockRuleType.RegExpMatchRule |
|
277 if self.__caseSensitivity: |
|
278 self.__regExp = re.compile(parsedLine) |
|
279 else: |
|
280 self.__regExp = re.compile(parsedLine, re.IGNORECASE) |
|
281 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
|
282 return |
|
283 |
|
284 # Remove starting / ending wildcards (*) |
|
285 if parsedLine.startswith("*"): |
|
286 parsedLine = parsedLine[1:] |
|
287 if parsedLine.endswith("*"): |
|
288 parsedLine = parsedLine[:-1] |
|
289 |
|
290 # Fast string matching for domain here |
|
291 if self.__filterIsOnlyDomain(parsedLine): |
|
292 parsedLine = parsedLine[2:-1] |
|
293 self.__type = AdBlockRuleType.DomainMatchRule |
|
294 self.__matchString = parsedLine |
|
295 return |
|
296 |
|
297 # If rule contains '|' only at the end, string matching can be used |
|
298 if self.__filterIsOnlyEndsMatch(parsedLine): |
|
299 parsedLine = parsedLine[:-1] |
|
300 self.__type = AdBlockRuleType.StringEndsMatchRule |
|
301 self.__matchString = parsedLine |
|
302 return |
|
303 |
|
304 # If there is still a wildcard (*) or separator (^) or (|), |
|
305 # the rule must be modified to comply with re. |
|
306 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: |
|
307 self.__type = AdBlockRuleType.RegExpMatchRule |
|
308 pattern = self.__convertPatternToRegExp(parsedLine) |
|
309 if self.__caseSensitivity: |
|
310 self.__regExp = re.compile(pattern) |
|
311 else: |
|
312 self.__regExp = re.compile(pattern, re.IGNORECASE) |
|
313 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
|
314 return |
|
315 |
|
316 # This rule matches all URLs |
|
317 if len(parsedLine) == 0: |
|
318 if self.__options == AdBlockRuleOption.NoOption: |
|
319 self.__isInternalDisabled = True |
|
320 self.__type = AdBlockRuleType.Invalid |
|
321 return |
|
322 |
|
323 self.__type = AdBlockRuleType.MatchAllUrlsRule |
|
324 return |
|
325 |
|
326 # no regexp required |
|
327 self.__type = AdBlockRuleType.StringContainsMatchRule |
|
328 self.__matchString = parsedLine |
|
329 |
|
330 def __parseDomains(self, domains, separator): |
|
331 """ |
|
332 Private method to parse a string with a domain list. |
|
333 |
|
334 @param domains list of domains |
|
335 @type str |
|
336 @param separator separator character used by the list |
|
337 @type str |
|
338 """ |
|
339 domainsList = [d for d in domains.split(separator) if d] |
|
340 |
|
341 for domain in domainsList: |
|
342 if not domain: |
|
343 continue |
|
344 if domain.startswith("~"): |
|
345 self.__blockedDomains.append(domain[1:]) |
|
346 else: |
|
347 self.__allowedDomains.append(domain) |
|
348 |
|
349 if bool(self.__blockedDomains) or bool(self.__allowedDomains): |
|
350 self.setOption(AdBlockRuleOption.DomainRestrictedOption) |
|
351 |
|
352 def networkMatch(self, request, domain, encodedUrl): |
|
353 """ |
|
354 Public method to check the rule for a match. |
|
355 |
|
356 @param request reference to the network request |
|
357 @type QWebEngineUrlRequestInfo |
|
358 @param domain domain name |
|
359 @type str |
|
360 @param encodedUrl string encoded URL to be checked |
|
361 @type str |
|
362 @return flag indicating a match |
|
363 @rtype bool |
|
364 """ |
|
365 if ( |
|
366 self.__type == AdBlockRuleType.CssRule or |
|
367 not self.__isEnabled or |
|
368 self.__isInternalDisabled |
|
369 ): |
|
370 return False |
|
371 |
|
372 matched = self.__stringMatch(domain, encodedUrl) |
|
373 |
|
374 if matched: |
|
375 # check domain restrictions |
|
376 if ( |
|
377 self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and |
|
378 not self.matchDomain(request.firstPartyUrl().host()) |
|
379 ): |
|
380 return False |
|
381 |
|
382 # check third-party restrictions |
|
383 if ( |
|
384 self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and |
|
385 not self.matchThirdParty(request) |
|
386 ): |
|
387 return False |
|
388 |
|
389 # check object restrictions |
|
390 if ( |
|
391 self.__hasOption(AdBlockRuleOption.ObjectOption) and |
|
392 not self.matchObject(request) |
|
393 ): |
|
394 return False |
|
395 |
|
396 # check subdocument restrictions |
|
397 if ( |
|
398 self.__hasOption(AdBlockRuleOption.SubdocumentOption) and |
|
399 not self.matchSubdocument(request) |
|
400 ): |
|
401 return False |
|
402 |
|
403 # check xmlhttprequest restriction |
|
404 if ( |
|
405 self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and |
|
406 not self.matchXmlHttpRequest(request) |
|
407 ): |
|
408 return False |
|
409 |
|
410 # check image restriction |
|
411 if ( |
|
412 self.__hasOption(AdBlockRuleOption.ImageOption) and |
|
413 not self.matchImage(request) |
|
414 ): |
|
415 return False |
|
416 |
|
417 # check script restriction |
|
418 if ( |
|
419 self.__hasOption(AdBlockRuleOption.ScriptOption) and |
|
420 not self.matchScript(request) |
|
421 ): |
|
422 return False |
|
423 |
|
424 # check stylesheet restriction |
|
425 if ( |
|
426 self.__hasOption(AdBlockRuleOption.StyleSheetOption) and |
|
427 not self.matchStyleSheet(request) |
|
428 ): |
|
429 return False |
|
430 |
|
431 # check object-subrequest restriction |
|
432 if ( |
|
433 self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and |
|
434 not self.matchObjectSubrequest(request) |
|
435 ): |
|
436 return False |
|
437 |
|
438 # check ping restriction |
|
439 if ( |
|
440 self.__hasOption(AdBlockRuleOption.PingOption) and |
|
441 not self.matchPing(request) |
|
442 ): |
|
443 return False |
|
444 |
|
445 # check media restriction |
|
446 if ( |
|
447 self.__hasOption(AdBlockRuleOption.MediaOption) and |
|
448 not self.matchMedia(request) |
|
449 ): |
|
450 return False |
|
451 |
|
452 # check font restriction |
|
453 if ( |
|
454 self.__hasOption(AdBlockRuleOption.FontOption) and |
|
455 not self.matchFont(request) |
|
456 ): |
|
457 return False |
|
458 |
|
459 return matched |
|
460 |
|
461 def urlMatch(self, url): |
|
462 """ |
|
463 Public method to check an URL against the rule. |
|
464 |
|
465 @param url URL to check |
|
466 @type QUrl |
|
467 @return flag indicating a match |
|
468 @rtype bool |
|
469 """ |
|
470 if ( |
|
471 not self.__hasOption(AdBlockRuleOption.DocumentOption) and |
|
472 not self.__hasOption(AdBlockRuleOption.ElementHideOption) |
|
473 ): |
|
474 return False |
|
475 |
|
476 encodedUrl = bytes(url.toEncoded()).decode() |
|
477 domain = url.host() |
|
478 return self.__stringMatch(domain, encodedUrl) |
|
479 |
|
480 def __stringMatch(self, domain, encodedUrl): |
|
481 """ |
|
482 Private method to match a domain string. |
|
483 |
|
484 @param domain domain to match |
|
485 @type str |
|
486 @param encodedUrl URL in encoded form |
|
487 @type str |
|
488 @return flag indicating a match |
|
489 @rtype bool |
|
490 """ |
|
491 matched = False |
|
492 |
|
493 if self.__type == AdBlockRuleType.StringContainsMatchRule: |
|
494 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: |
|
495 matched = self.__matchString.lower() in encodedUrl.lower() |
|
496 else: |
|
497 matched = self.__matchString in encodedUrl |
|
498 elif self.__type == AdBlockRuleType.DomainMatchRule: |
|
499 matched = self.__isMatchingDomain(domain, self.__matchString) |
|
500 elif self.__type == AdBlockRuleType.StringEndsMatchRule: |
|
501 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: |
|
502 matched = encodedUrl.lower().endswith( |
|
503 self.__matchString.lower()) |
|
504 else: |
|
505 matched = encodedUrl.endswith(self.__matchString) |
|
506 elif self.__type == AdBlockRuleType.RegExpMatchRule: |
|
507 if not self.__isMatchingRegExpStrings(encodedUrl): |
|
508 matched = False |
|
509 else: |
|
510 matched = self.__regExp.search(encodedUrl) is not None |
|
511 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: |
|
512 matched = True |
|
513 |
|
514 return matched |
|
515 |
|
516 def matchDomain(self, domain): |
|
517 """ |
|
518 Public method to match a domain. |
|
519 |
|
520 @param domain domain name to check |
|
521 @type str |
|
522 @return flag indicating a match |
|
523 @rtype bool |
|
524 """ |
|
525 if not self.__isEnabled: |
|
526 return False |
|
527 |
|
528 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): |
|
529 return True |
|
530 |
|
531 if len(self.__blockedDomains) == 0: |
|
532 return any(self.__isMatchingDomain(domain, dom) |
|
533 for dom in self.__allowedDomains) |
|
534 elif len(self.__allowedDomains) == 0: |
|
535 return all(not self.__isMatchingDomain(domain, dom) |
|
536 for dom in self.__blockedDomains) |
|
537 else: |
|
538 return ( |
|
539 all(not self.__isMatchingDomain(domain, dom) |
|
540 for dom in self.__blockedDomains) and |
|
541 any(self.__isMatchingDomain(domain, dom) |
|
542 for dom in self.__allowedDomains) |
|
543 ) |
|
544 |
|
545 def matchThirdParty(self, req): |
|
546 """ |
|
547 Public method to match a third-party rule. |
|
548 |
|
549 @param req request object to check |
|
550 @type QWebEngineUrlRequestInfo |
|
551 @return flag indicating a match |
|
552 @rtype boolean |
|
553 """ |
|
554 # Third-party matching should be performed on second-level domains |
|
555 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) |
|
556 host = toSecondLevelDomain(req.requestUrl()) |
|
557 |
|
558 match = firstPartyHost != host |
|
559 |
|
560 if self.__hasException(AdBlockRuleOption.ThirdPartyOption): |
|
561 return not match |
|
562 else: |
|
563 return match |
|
564 |
|
565 def matchObject(self, req): |
|
566 """ |
|
567 Public method to match an object rule. |
|
568 |
|
569 @param req request object to check |
|
570 @type QWebEngineUrlRequestInfo |
|
571 @return flag indicating a match |
|
572 @rtype bool |
|
573 """ |
|
574 match = ( |
|
575 req.resourceType() == |
|
576 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject) |
|
577 |
|
578 if self.__hasException(AdBlockRuleOption.ObjectOption): |
|
579 return not match |
|
580 else: |
|
581 return match |
|
582 |
|
583 def matchSubdocument(self, req): |
|
584 """ |
|
585 Public method to match a sub-document rule. |
|
586 |
|
587 @param req request object to check |
|
588 @type QWebEngineUrlRequestInfo |
|
589 @return flag indicating a match |
|
590 @rtype boolean |
|
591 """ |
|
592 match = ( |
|
593 req.resourceType() == |
|
594 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame) |
|
595 |
|
596 if self.__hasException(AdBlockRuleOption.SubdocumentOption): |
|
597 return not match |
|
598 else: |
|
599 return match |
|
600 |
|
601 def matchXmlHttpRequest(self, req): |
|
602 """ |
|
603 Public method to match a XmlHttpRequest rule. |
|
604 |
|
605 @param req request object to check |
|
606 @type QWebEngineUrlRequestInfo |
|
607 @return flag indicating a match |
|
608 @rtype bool |
|
609 """ |
|
610 match = ( |
|
611 req.resourceType() == |
|
612 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr) |
|
613 |
|
614 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): |
|
615 return not match |
|
616 else: |
|
617 return match |
|
618 |
|
619 def matchImage(self, req): |
|
620 """ |
|
621 Public method to match an Image rule. |
|
622 |
|
623 @param req request object to check |
|
624 @type QWebEngineUrlRequestInfo |
|
625 @return flag indicating a match |
|
626 @rtype bool |
|
627 """ |
|
628 match = ( |
|
629 req.resourceType() == |
|
630 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage) |
|
631 |
|
632 if self.__hasException(AdBlockRuleOption.ImageOption): |
|
633 return not match |
|
634 else: |
|
635 return match |
|
636 |
|
637 def matchScript(self, req): |
|
638 """ |
|
639 Public method to match a Script rule. |
|
640 |
|
641 @param req request object to check |
|
642 @type QWebEngineUrlRequestInfo |
|
643 @return flag indicating a match |
|
644 @rtype bool |
|
645 """ |
|
646 match = ( |
|
647 req.resourceType() == |
|
648 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript) |
|
649 |
|
650 if self.__hasException(AdBlockRuleOption.ScriptOption): |
|
651 return not match |
|
652 else: |
|
653 return match |
|
654 |
|
655 def matchStyleSheet(self, req): |
|
656 """ |
|
657 Public method to match a StyleSheet rule. |
|
658 |
|
659 @param req request object to check |
|
660 @type QWebEngineUrlRequestInfo |
|
661 @return flag indicating a match |
|
662 @rtype bool |
|
663 """ |
|
664 match = ( |
|
665 req.resourceType() == |
|
666 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet) |
|
667 |
|
668 if self.__hasException(AdBlockRuleOption.StyleSheetOption): |
|
669 return not match |
|
670 else: |
|
671 return match |
|
672 |
|
673 def matchObjectSubrequest(self, req): |
|
674 """ |
|
675 Public method to match an Object Subrequest rule. |
|
676 |
|
677 @param req request object to check |
|
678 @type QWebEngineUrlRequestInfo |
|
679 @return flag indicating a match |
|
680 @rtype boolean |
|
681 """ |
|
682 match = ( |
|
683 req.resourceType() == |
|
684 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource |
|
685 ) |
|
686 match = match or ( |
|
687 req.resourceType() == |
|
688 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource |
|
689 ) |
|
690 |
|
691 if self.__objectSubrequestException: |
|
692 return not match |
|
693 else: |
|
694 return match |
|
695 |
|
696 def matchPing(self, req): |
|
697 """ |
|
698 Public method to match a Ping rule. |
|
699 |
|
700 @param req request object to check |
|
701 @type QWebEngineUrlRequestInfo |
|
702 @return flag indicating a match |
|
703 @rtype bool |
|
704 """ |
|
705 match = ( |
|
706 req.resourceType() == |
|
707 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing) |
|
708 |
|
709 if self.__hasException(AdBlockRuleOption.PingOption): |
|
710 return not match |
|
711 else: |
|
712 return match |
|
713 |
|
714 def matchMedia(self, req): |
|
715 """ |
|
716 Public method to match a Media rule. |
|
717 |
|
718 @param req request object to check |
|
719 @type QWebEngineUrlRequestInfo |
|
720 @return flag indicating a match |
|
721 @rtype bool |
|
722 """ |
|
723 match = ( |
|
724 req.resourceType() == |
|
725 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia) |
|
726 |
|
727 if self.__hasException(AdBlockRuleOption.MediaOption): |
|
728 return not match |
|
729 else: |
|
730 return match |
|
731 |
|
732 def matchFont(self, req): |
|
733 """ |
|
734 Public method to match a Font rule. |
|
735 |
|
736 @param req request object to check |
|
737 @type QWebEngineUrlRequestInfo |
|
738 @return flag indicating a match |
|
739 @rtype bool |
|
740 """ |
|
741 match = ( |
|
742 req.resourceType() == |
|
743 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource) |
|
744 |
|
745 if self.__hasException(AdBlockRuleOption.FontOption): |
|
746 return not match |
|
747 else: |
|
748 return match |
|
749 |
|
750 def matchOther(self, req): |
|
751 """ |
|
752 Public method to match any other rule. |
|
753 |
|
754 @param req request object to check |
|
755 @type QWebEngineUrlRequestInfo |
|
756 @return flag indicating a match |
|
757 @rtype bool |
|
758 """ |
|
759 match = req.resourceType() in [ |
|
760 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource, |
|
761 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeWorker, |
|
762 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSharedWorker, |
|
763 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker, |
|
764 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch, |
|
765 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon, |
|
766 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown, |
|
767 ] |
|
768 |
|
769 if self.__hasException(AdBlockRuleOption.OtherOption): |
|
770 return not match |
|
771 else: |
|
772 return match |
|
773 |
|
774 def isException(self): |
|
775 """ |
|
776 Public method to check, if the rule defines an exception. |
|
777 |
|
778 @return flag indicating an exception |
|
779 @rtype bool |
|
780 """ |
|
781 return self.__isException |
|
782 |
|
783 def setException(self, exception): |
|
784 """ |
|
785 Public method to set the rule's exception flag. |
|
786 |
|
787 @param exception flag indicating an exception rule |
|
788 @type bool |
|
789 """ |
|
790 self.__isException = exception |
|
791 |
|
792 def isEnabled(self): |
|
793 """ |
|
794 Public method to check, if the rule is enabled. |
|
795 |
|
796 @return flag indicating enabled state |
|
797 @rtype bool |
|
798 """ |
|
799 return self.__isEnabled |
|
800 |
|
801 def setEnabled(self, enabled): |
|
802 """ |
|
803 Public method to set the rule's enabled state. |
|
804 |
|
805 @param enabled flag indicating the new enabled state |
|
806 @type bool |
|
807 """ |
|
808 self.__isEnabled = enabled |
|
809 |
|
810 def isCSSRule(self): |
|
811 """ |
|
812 Public method to check, if the rule is a CSS rule. |
|
813 |
|
814 @return flag indicating a CSS rule |
|
815 @rtype bool |
|
816 """ |
|
817 return self.__type == AdBlockRuleType.CssRule |
|
818 |
|
819 def cssSelector(self): |
|
820 """ |
|
821 Public method to get the CSS selector of the rule. |
|
822 |
|
823 @return CSS selector |
|
824 @rtype str |
|
825 """ |
|
826 return self.__matchString |
|
827 |
|
828 def isDocument(self): |
|
829 """ |
|
830 Public method to check, if this is a document rule. |
|
831 |
|
832 @return flag indicating a document rule |
|
833 @rtype bool |
|
834 """ |
|
835 return self.__hasOption(AdBlockRuleOption.DocumentOption) |
|
836 |
|
837 def isElementHiding(self): |
|
838 """ |
|
839 Public method to check, if this is an element hiding rule. |
|
840 |
|
841 @return flag indicating an element hiding rule |
|
842 @rtype bool |
|
843 """ |
|
844 return self.__hasOption(AdBlockRuleOption.ElementHideOption) |
|
845 |
|
846 def isDomainRestricted(self): |
|
847 """ |
|
848 Public method to check, if this rule is restricted by domain. |
|
849 |
|
850 @return flag indicating a domain restriction |
|
851 @rtype bool |
|
852 """ |
|
853 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) |
|
854 |
|
855 def isComment(self): |
|
856 """ |
|
857 Public method to check, if this is a comment. |
|
858 |
|
859 @return flag indicating a comment |
|
860 @rtype bool |
|
861 """ |
|
862 return self.__filter.startswith("!") |
|
863 |
|
864 def isHeader(self): |
|
865 """ |
|
866 Public method to check, if this is a header. |
|
867 |
|
868 @return flag indicating a header |
|
869 @rtype bool |
|
870 """ |
|
871 return self.__filter.startswith("[Adblock") |
|
872 |
|
873 def isSlow(self): |
|
874 """ |
|
875 Public method to check, if this is a slow rule. |
|
876 |
|
877 @return flag indicating a slow rule |
|
878 @rtype bool |
|
879 """ |
|
880 return self.__regExp is not None |
|
881 |
|
882 def isInternalDisabled(self): |
|
883 """ |
|
884 Public method to check, if this rule was disabled internally. |
|
885 |
|
886 @return flag indicating an internally disabled rule |
|
887 @rtype bool |
|
888 """ |
|
889 return self.__isInternalDisabled |
|
890 |
|
891 def __convertPatternToRegExp(self, wildcardPattern): |
|
892 """ |
|
893 Private method to convert a wildcard pattern to a regular expression. |
|
894 |
|
895 @param wildcardPattern string containing the wildcard pattern |
|
896 @type str |
|
897 @return string containing a regular expression |
|
898 @rtype string |
|
899 """ |
|
900 pattern = wildcardPattern |
|
901 |
|
902 # remove multiple wildcards |
|
903 pattern = re.sub(r"\*+", "*", pattern) |
|
904 # remove anchors following separator placeholder |
|
905 pattern = re.sub(r"\^\|$", "^", pattern) |
|
906 # remove leading wildcards |
|
907 pattern = re.sub(r"^(\*)", "", pattern) |
|
908 # remove trailing wildcards |
|
909 pattern = re.sub(r"(\*)$", "", pattern) |
|
910 # escape special symbols |
|
911 pattern = re.sub(r"(\W)", r"\\\1", pattern) |
|
912 # process extended anchor at expression start |
|
913 pattern = re.sub( |
|
914 r"^\\\|\\\|", |
|
915 r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) |
|
916 # process separator placeholders |
|
917 pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern) |
|
918 # process anchor at expression start |
|
919 pattern = re.sub(r"^\\\|", "^", pattern) |
|
920 # process anchor at expression end |
|
921 pattern = re.sub(r"\\\|$", "$", pattern) |
|
922 # replace wildcards by .* |
|
923 pattern = re.sub(r"\\\*", ".*", pattern) |
|
924 |
|
925 return pattern |
|
926 |
|
927 def __hasOption(self, opt): |
|
928 """ |
|
929 Private method to check, if the given option has been set. |
|
930 |
|
931 @param opt option to check for |
|
932 @type AdBlockRuleOption |
|
933 @return flag indicating the state of the option |
|
934 @rtype bool |
|
935 """ |
|
936 return bool(self.__options & opt) |
|
937 |
|
938 def setOption(self, opt): |
|
939 """ |
|
940 Public method to set the given option. |
|
941 |
|
942 @param opt option to be set |
|
943 @type AdBlockRuleOption |
|
944 """ |
|
945 self.__options |= opt |
|
946 |
|
947 def __hasException(self, opt): |
|
948 """ |
|
949 Private method to check, if the given option has been set as an |
|
950 exception. |
|
951 |
|
952 @param opt option to check for |
|
953 @type AdBlockRuleOption |
|
954 @return flag indicating the exception state of the option |
|
955 @rtype bool |
|
956 """ |
|
957 return bool(self.__exceptions & opt) |
|
958 |
|
959 def __setException(self, opt, on): |
|
960 """ |
|
961 Private method to set the given option as an exception. |
|
962 |
|
963 @param opt option to be set |
|
964 @type AdBlockRuleOption |
|
965 @param on flag indicating to set or unset the exception |
|
966 @type bool |
|
967 """ |
|
968 if on: |
|
969 self.__exceptions |= opt |
|
970 else: |
|
971 self.__exceptions &= ~opt |
|
972 |
|
973 def __filterIsOnlyDomain(self, filterString): |
|
974 """ |
|
975 Private method to check, if the given filter is a domain only filter. |
|
976 |
|
977 @param filterString filter string to be checked |
|
978 @type str |
|
979 @return flag indicating a domain only filter |
|
980 @rtype bool |
|
981 """ |
|
982 if not filterString.endswith("^") or not filterString.startswith("||"): |
|
983 return False |
|
984 |
|
985 return all(filterChar not in ["/", ":", "?", "=", "&", "*"] |
|
986 for filterChar in filterString) |
|
987 |
|
988 def __filterIsOnlyEndsMatch(self, filterString): |
|
989 """ |
|
990 Private method to check, if the given filter is to match against the |
|
991 end of a string. |
|
992 |
|
993 @param filterString filter string to be checked |
|
994 @type str |
|
995 @return flag indicating a end of string match filter |
|
996 @rtype bool |
|
997 """ |
|
998 for index, filterChar in enumerate(filterString): |
|
999 # __IGNORE_WARNING_Y111__ |
|
1000 if filterChar in ["^", "*"]: |
|
1001 return False |
|
1002 elif filterChar == "|": |
|
1003 return index == len(filterString) - 1 |
|
1004 |
|
1005 return False |
|
1006 |
|
1007 def __isMatchingDomain(self, domain, filterString): |
|
1008 """ |
|
1009 Private method to check, if a given domain matches the given filter |
|
1010 string. |
|
1011 |
|
1012 @param domain domain to be checked |
|
1013 @type str |
|
1014 @param filterString filter string to check against |
|
1015 @type str |
|
1016 @return flag indicating a match |
|
1017 @rtype bool |
|
1018 """ |
|
1019 if filterString == domain: |
|
1020 return True |
|
1021 |
|
1022 if not domain.endswith(filterString): |
|
1023 return False |
|
1024 |
|
1025 index = domain.find(filterString) |
|
1026 |
|
1027 return bool(index > 0 and domain[index - 1] == ".") |
|
1028 |
|
1029 def __isMatchingRegExpStrings(self, url): |
|
1030 """ |
|
1031 Private method to check the given URL against the fixed parts of |
|
1032 the regexp. |
|
1033 |
|
1034 @param url URL to be checked |
|
1035 @type str |
|
1036 @return flag indicating a match |
|
1037 @rtype bool |
|
1038 """ |
|
1039 if self.__regExp is not None: |
|
1040 return all(matcher in url for matcher in self.__stringMatchers) |
|
1041 |
|
1042 return True |
|
1043 |
|
1044 def __parseRegExpFilter(self, filterString): |
|
1045 """ |
|
1046 Private method to split the given regular expression into strings that |
|
1047 can be used with 'in'. |
|
1048 |
|
1049 @param filterString regexp filter string to be parsed |
|
1050 @type str |
|
1051 @return fixed string parts of the filter |
|
1052 @rtype list of str |
|
1053 """ |
|
1054 matchers = [] |
|
1055 |
|
1056 startPos = -1 |
|
1057 for index in range(len(filterString)): |
|
1058 filterChar = filterString[index] |
|
1059 if filterChar in ["|", "*", "^"]: |
|
1060 sub = filterString[startPos:index] |
|
1061 if len(sub) > 1: |
|
1062 matchers.append(sub) |
|
1063 startPos = index + 1 |
|
1064 |
|
1065 sub = filterString[startPos:] |
|
1066 if len(sub) > 1: |
|
1067 matchers.append(sub) |
|
1068 |
|
1069 return list(set(matchers)) |
|
1070 |
|
1071 def ruleType(self): |
|
1072 """ |
|
1073 Public method to get the rule type. |
|
1074 |
|
1075 @return rule type |
|
1076 @rtype AdBlockRuleType |
|
1077 """ |
|
1078 return self.__type |
|
1079 |
|
1080 def ruleOptions(self): |
|
1081 """ |
|
1082 Public method to get the rule options. |
|
1083 |
|
1084 @return rule options |
|
1085 @rtype AdBlockRuleOption |
|
1086 """ |
|
1087 return self.__options |
|
1088 |
|
1089 def ruleExceptions(self): |
|
1090 """ |
|
1091 Public method to get the rule exceptions. |
|
1092 |
|
1093 @return rule exceptions |
|
1094 @rtype AdBlockRuleOption |
|
1095 """ |
|
1096 return self.__exceptions |
|
1097 |
|
1098 def matchString(self): |
|
1099 """ |
|
1100 Public method to get the match string. |
|
1101 |
|
1102 @return match string |
|
1103 @rtype str |
|
1104 """ |
|
1105 return self.__matchString |
|
1106 |
|
1107 def caseSensitivity(self): |
|
1108 """ |
|
1109 Public method to get the case sensitivity. |
|
1110 |
|
1111 @return case sensitivity |
|
1112 @rtype Qt.CaseSensitivity |
|
1113 """ |
|
1114 return self.__caseSensitivity |
|
1115 |
|
1116 def allowedDomains(self): |
|
1117 """ |
|
1118 Public method to get a copy of the list of allowed domains. |
|
1119 |
|
1120 @return list of allowed domains |
|
1121 @rtype list of str |
|
1122 """ |
|
1123 return self.__allowedDomains[:] |
|
1124 |
|
1125 def blockedDomains(self): |
|
1126 """ |
|
1127 Public method to get a copy of the list of blocked domains. |
|
1128 |
|
1129 @return list of blocked domains |
|
1130 @rtype list of str |
|
1131 """ |
|
1132 return self.__blockedDomains[:] |
|
1133 |
|
1134 def addBlockedDomains(self, domains): |
|
1135 """ |
|
1136 Public method to add to the list of blocked domains. |
|
1137 |
|
1138 @param domains list of domains to be added |
|
1139 @type str or list of str |
|
1140 """ |
|
1141 if isinstance(domains, list): |
|
1142 self.__blockedDomains.extend(domains) |
|
1143 else: |
|
1144 self.__blockedDomains.append(domains) |
|
1145 |
|
1146 def getRegExpAndMatchers(self): |
|
1147 """ |
|
1148 Public method to get the regular expression and associated string |
|
1149 matchers. |
|
1150 |
|
1151 @return tuple containing the regular expression and the list of |
|
1152 string matchers |
|
1153 @rtype tuple of (re.Pattern, list of str) |
|
1154 """ |
|
1155 if self.__regExp is not None: |
|
1156 return (re.compile(self.__regExp.pattern), |
|
1157 self.__stringMatchers[:]) |
|
1158 else: |
|
1159 return (None, []) |
|
1160 |
|
1161 def copyFrom(self, other): |
|
1162 """ |
|
1163 Public method to copy another AdBlock rule. |
|
1164 |
|
1165 @param other reference to the AdBlock rule to copy from |
|
1166 @type AdBlockRule |
|
1167 """ |
|
1168 self.__subscription = other.subscription() |
|
1169 self.__type = other.ruleType() |
|
1170 self.__options = other.ruleOptions() |
|
1171 self.__exceptions = other.ruleExceptions() |
|
1172 self.__filter = other.filter() |
|
1173 self.__matchString = other.matchString() |
|
1174 self.__caseSensitivity = other.caseSensitivity() |
|
1175 self.__isEnabled = other.isEnabled() |
|
1176 self.__isException = other.isException() |
|
1177 self.__isInternalDisabled = other.isInternalDisabled() |
|
1178 self.__allowedDomains = other.allowedDomains() |
|
1179 self.__blockedDomains = other.blockedDomains() |
|
1180 self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers() |