eric7/WebBrowser/AdBlock/AdBlockRule.py

branch
eric7
changeset 8312
800c432b34c8
parent 8207
d359172d11be
child 8318
962bce857696
equal deleted inserted replaced
8311:4e8b98454baa 8312:800c432b34c8
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock rule class.
8 """
9
10 import re
11 from enum import IntEnum
12
13 from PyQt5.QtCore import Qt
14 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo
15
16
17 def toSecondLevelDomain(url):
18 """
19 Module function to get a second level domain from the given URL.
20
21 @param url URL to extract domain from
22 @type QUrl
23 @return name of second level domain
24 @rtype str
25 """
26 topLevelDomain = url.topLevelDomain()
27 urlHost = url.host()
28
29 if not topLevelDomain or not urlHost:
30 return ""
31
32 domain = urlHost[:len(urlHost) - len(topLevelDomain)]
33 if domain.count(".") == 0:
34 return urlHost
35
36 while domain.count(".") != 0:
37 domain = domain[domain.find(".") + 1:]
38
39 return domain + topLevelDomain
40
41
42 class AdBlockRuleType(IntEnum):
43 """
44 Class implementing the rule type enum.
45 """
46 CssRule = 0
47 DomainMatchRule = 1
48 RegExpMatchRule = 2
49 StringEndsMatchRule = 3
50 StringContainsMatchRule = 4
51 MatchAllUrlsRule = 5
52 Invalid = 6
53
54
55 class AdBlockRuleOption(IntEnum):
56 """
57 Class implementing the rule option enum.
58 """
59 NoOption = 0
60 DomainRestrictedOption = 1
61 ThirdPartyOption = 2
62 ObjectOption = 4
63 SubdocumentOption = 8
64 XMLHttpRequestOption = 16
65 ImageOption = 32
66 ScriptOption = 64
67 StyleSheetOption = 128
68 ObjectSubrequestOption = 256
69 PingOption = 512
70 MediaOption = 1024
71 FontOption = 2048
72 OtherOption = 4096
73
74 # Exception only options
75 DocumentOption = 8192
76 ElementHideOption = 16384
77
78
79 class AdBlockRule:
80 """
81 Class implementing the AdBlock rule.
82 """
83 def __init__(self, filterRule="", subscription=None):
84 """
85 Constructor
86
87 @param filterRule filter string of the rule
88 @type str
89 @param subscription reference to the subscription object
90 @type AdBlockSubscription
91 """
92 self.__subscription = subscription
93
94 self.__regExp = None
95 self.__stringMatchers = []
96
97 self.__blockedDomains = []
98 self.__allowedDomains = []
99
100 self.__isEnabled = True
101 self.__isException = False
102 self.__isInternalDisabled = False
103 self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive
104
105 self.__type = AdBlockRuleType.StringContainsMatchRule
106 self.__options = AdBlockRuleOption.NoOption
107 self.__exceptions = AdBlockRuleOption.NoOption
108
109 self.setFilter(filterRule)
110
111 def subscription(self):
112 """
113 Public method to get the subscription this rule belongs to.
114
115 @return subscription of the rule
116 @rtype AdBlockSubscription
117 """
118 return self.__subscription
119
120 def setSubscription(self, subscription):
121 """
122 Public method to set the subscription this rule belongs to.
123
124 @param subscription subscription of the rule
125 @type AdBlockSubscription
126 """
127 self.__subscription = subscription
128
129 def filter(self):
130 """
131 Public method to get the rule filter string.
132
133 @return rule filter string
134 @rtype str
135 """
136 return self.__filter
137
138 def setFilter(self, filterRule):
139 """
140 Public method to set the rule filter string.
141
142 @param filterRule rule filter string
143 @type str
144 """
145 self.__filter = filterRule
146 self.__parseFilter()
147
148 def __parseFilter(self):
149 """
150 Private method to parse the filter pattern.
151 """
152 parsedLine = self.__filter
153
154 # empty rule or just a comment
155 if not parsedLine.strip() or parsedLine.startswith("!"):
156 self.__isEnabled = False
157 self.__isInternalDisabled = True
158 self.__type = AdBlockRuleType.Invalid
159 return
160
161 # CSS element hiding rule
162 if "##" in parsedLine or "#@#" in parsedLine:
163 self.__type = AdBlockRuleType.CssRule
164 pos = parsedLine.find("#")
165
166 # domain restricted rule
167 if not parsedLine.startswith("##"):
168 domains = parsedLine[:pos]
169 self.__parseDomains(domains, ",")
170
171 self.__isException = parsedLine[pos + 1] == "@"
172 if self.__isException:
173 self.__matchString = parsedLine[pos + 3:]
174 else:
175 self.__matchString = parsedLine[pos + 2:]
176
177 # CSS rule cannot have more options -> stop parsing
178 return
179
180 # Exception always starts with @@
181 if parsedLine.startswith("@@"):
182 self.__isException = True
183 parsedLine = parsedLine[2:]
184
185 # Parse all options following '$' character
186 optionsIndex = parsedLine.find("$")
187 if optionsIndex >= 0:
188 options = [opt
189 for opt in parsedLine[optionsIndex + 1:].split(",")
190 if opt]
191
192 handledOptions = 0
193 for option in options:
194 if option.startswith("domain="):
195 self.__parseDomains(option[7:], "|")
196 handledOptions += 1
197 elif option == "match-case":
198 self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive
199 handledOptions += 1
200 elif option.endswith("third-party"):
201 self.setOption(AdBlockRuleOption.ThirdPartyOption)
202 self.__setException(AdBlockRuleOption.ThirdPartyOption,
203 option.startswith("~"))
204 handledOptions += 1
205 elif option.endswith("object"):
206 self.setOption(AdBlockRuleOption.ObjectOption)
207 self.__setException(AdBlockRuleOption.ObjectOption,
208 option.startswith("~"))
209 handledOptions += 1
210 elif option.endswith("subdocument"):
211 self.setOption(AdBlockRuleOption.SubdocumentOption)
212 self.__setException(AdBlockRuleOption.SubdocumentOption,
213 option.startswith("~"))
214 handledOptions += 1
215 elif option.endswith("xmlhttprequest"):
216 self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
217 self.__setException(AdBlockRuleOption.XMLHttpRequestOption,
218 option.startswith("~"))
219 handledOptions += 1
220 elif option.endswith("image"):
221 self.setOption(AdBlockRuleOption.ImageOption)
222 self.__setException(AdBlockRuleOption.ImageOption,
223 option.startswith("~"))
224 elif option.endswith("script"):
225 self.setOption(AdBlockRuleOption.ScriptOption)
226 self.__setException(AdBlockRuleOption.ScriptOption,
227 option.startswith("~"))
228 elif option.endswith("stylesheet"):
229 self.setOption(AdBlockRuleOption.StyleSheetOption)
230 self.__setException(AdBlockRuleOption.StyleSheetOption,
231 option.startswith("~"))
232 elif option.endswith("object-subrequest"):
233 self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
234 self.__setException(
235 AdBlockRuleOption.ObjectSubrequestOption,
236 option.startswith("~"))
237 elif option.endswith("ping"):
238 self.setOption(AdBlockRuleOption.PingOption)
239 self.__setException(AdBlockRuleOption.PingOption,
240 option.startswith("~"))
241 elif option.endswith("media"):
242 self.setOption(AdBlockRuleOption.MediaOption)
243 self.__setException(AdBlockRuleOption.MediaOption,
244 option.startswith("~"))
245 elif option.endswith("font"):
246 self.setOption(AdBlockRuleOption.FontOption)
247 self.__setException(AdBlockRuleOption.FontOption,
248 option.startswith("~"))
249 elif option.endswith("other"):
250 self.setOption(AdBlockRuleOption.OtherOption)
251 self.__setException(AdBlockRuleOption.OtherOption,
252 option.startswith("~"))
253 elif option == "document" and self.__isException:
254 self.setOption(AdBlockRuleOption.DocumentOption)
255 handledOptions += 1
256 elif option == "elemhide" and self.__isException:
257 self.setOption(AdBlockRuleOption.ElementHideOption)
258 handledOptions += 1
259 elif option == "collapse":
260 # Hiding placeholders of blocked elements is enabled by
261 # default
262 handledOptions += 1
263
264 # If we don't handle all options, it's safer to just disable
265 # this rule
266 if handledOptions != len(options):
267 self.__isInternalDisabled = True
268 self.__type = AdBlockRuleType.Invalid
269 return
270
271 parsedLine = parsedLine[:optionsIndex]
272
273 # Rule is classic regexp
274 if parsedLine.startswith("/") and parsedLine.endswith("/"):
275 parsedLine = parsedLine[1:-1]
276 self.__type = AdBlockRuleType.RegExpMatchRule
277 if self.__caseSensitivity:
278 self.__regExp = re.compile(parsedLine)
279 else:
280 self.__regExp = re.compile(parsedLine, re.IGNORECASE)
281 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
282 return
283
284 # Remove starting / ending wildcards (*)
285 if parsedLine.startswith("*"):
286 parsedLine = parsedLine[1:]
287 if parsedLine.endswith("*"):
288 parsedLine = parsedLine[:-1]
289
290 # Fast string matching for domain here
291 if self.__filterIsOnlyDomain(parsedLine):
292 parsedLine = parsedLine[2:-1]
293 self.__type = AdBlockRuleType.DomainMatchRule
294 self.__matchString = parsedLine
295 return
296
297 # If rule contains '|' only at the end, string matching can be used
298 if self.__filterIsOnlyEndsMatch(parsedLine):
299 parsedLine = parsedLine[:-1]
300 self.__type = AdBlockRuleType.StringEndsMatchRule
301 self.__matchString = parsedLine
302 return
303
304 # If there is still a wildcard (*) or separator (^) or (|),
305 # the rule must be modified to comply with re.
306 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
307 self.__type = AdBlockRuleType.RegExpMatchRule
308 pattern = self.__convertPatternToRegExp(parsedLine)
309 if self.__caseSensitivity:
310 self.__regExp = re.compile(pattern)
311 else:
312 self.__regExp = re.compile(pattern, re.IGNORECASE)
313 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
314 return
315
316 # This rule matches all URLs
317 if len(parsedLine) == 0:
318 if self.__options == AdBlockRuleOption.NoOption:
319 self.__isInternalDisabled = True
320 self.__type = AdBlockRuleType.Invalid
321 return
322
323 self.__type = AdBlockRuleType.MatchAllUrlsRule
324 return
325
326 # no regexp required
327 self.__type = AdBlockRuleType.StringContainsMatchRule
328 self.__matchString = parsedLine
329
330 def __parseDomains(self, domains, separator):
331 """
332 Private method to parse a string with a domain list.
333
334 @param domains list of domains
335 @type str
336 @param separator separator character used by the list
337 @type str
338 """
339 domainsList = [d for d in domains.split(separator) if d]
340
341 for domain in domainsList:
342 if not domain:
343 continue
344 if domain.startswith("~"):
345 self.__blockedDomains.append(domain[1:])
346 else:
347 self.__allowedDomains.append(domain)
348
349 if bool(self.__blockedDomains) or bool(self.__allowedDomains):
350 self.setOption(AdBlockRuleOption.DomainRestrictedOption)
351
352 def networkMatch(self, request, domain, encodedUrl):
353 """
354 Public method to check the rule for a match.
355
356 @param request reference to the network request
357 @type QWebEngineUrlRequestInfo
358 @param domain domain name
359 @type str
360 @param encodedUrl string encoded URL to be checked
361 @type str
362 @return flag indicating a match
363 @rtype bool
364 """
365 if (
366 self.__type == AdBlockRuleType.CssRule or
367 not self.__isEnabled or
368 self.__isInternalDisabled
369 ):
370 return False
371
372 matched = self.__stringMatch(domain, encodedUrl)
373
374 if matched:
375 # check domain restrictions
376 if (
377 self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and
378 not self.matchDomain(request.firstPartyUrl().host())
379 ):
380 return False
381
382 # check third-party restrictions
383 if (
384 self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and
385 not self.matchThirdParty(request)
386 ):
387 return False
388
389 # check object restrictions
390 if (
391 self.__hasOption(AdBlockRuleOption.ObjectOption) and
392 not self.matchObject(request)
393 ):
394 return False
395
396 # check subdocument restrictions
397 if (
398 self.__hasOption(AdBlockRuleOption.SubdocumentOption) and
399 not self.matchSubdocument(request)
400 ):
401 return False
402
403 # check xmlhttprequest restriction
404 if (
405 self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and
406 not self.matchXmlHttpRequest(request)
407 ):
408 return False
409
410 # check image restriction
411 if (
412 self.__hasOption(AdBlockRuleOption.ImageOption) and
413 not self.matchImage(request)
414 ):
415 return False
416
417 # check script restriction
418 if (
419 self.__hasOption(AdBlockRuleOption.ScriptOption) and
420 not self.matchScript(request)
421 ):
422 return False
423
424 # check stylesheet restriction
425 if (
426 self.__hasOption(AdBlockRuleOption.StyleSheetOption) and
427 not self.matchStyleSheet(request)
428 ):
429 return False
430
431 # check object-subrequest restriction
432 if (
433 self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and
434 not self.matchObjectSubrequest(request)
435 ):
436 return False
437
438 # check ping restriction
439 if (
440 self.__hasOption(AdBlockRuleOption.PingOption) and
441 not self.matchPing(request)
442 ):
443 return False
444
445 # check media restriction
446 if (
447 self.__hasOption(AdBlockRuleOption.MediaOption) and
448 not self.matchMedia(request)
449 ):
450 return False
451
452 # check font restriction
453 if (
454 self.__hasOption(AdBlockRuleOption.FontOption) and
455 not self.matchFont(request)
456 ):
457 return False
458
459 return matched
460
461 def urlMatch(self, url):
462 """
463 Public method to check an URL against the rule.
464
465 @param url URL to check
466 @type QUrl
467 @return flag indicating a match
468 @rtype bool
469 """
470 if (
471 not self.__hasOption(AdBlockRuleOption.DocumentOption) and
472 not self.__hasOption(AdBlockRuleOption.ElementHideOption)
473 ):
474 return False
475
476 encodedUrl = bytes(url.toEncoded()).decode()
477 domain = url.host()
478 return self.__stringMatch(domain, encodedUrl)
479
480 def __stringMatch(self, domain, encodedUrl):
481 """
482 Private method to match a domain string.
483
484 @param domain domain to match
485 @type str
486 @param encodedUrl URL in encoded form
487 @type str
488 @return flag indicating a match
489 @rtype bool
490 """
491 matched = False
492
493 if self.__type == AdBlockRuleType.StringContainsMatchRule:
494 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
495 matched = self.__matchString.lower() in encodedUrl.lower()
496 else:
497 matched = self.__matchString in encodedUrl
498 elif self.__type == AdBlockRuleType.DomainMatchRule:
499 matched = self.__isMatchingDomain(domain, self.__matchString)
500 elif self.__type == AdBlockRuleType.StringEndsMatchRule:
501 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
502 matched = encodedUrl.lower().endswith(
503 self.__matchString.lower())
504 else:
505 matched = encodedUrl.endswith(self.__matchString)
506 elif self.__type == AdBlockRuleType.RegExpMatchRule:
507 if not self.__isMatchingRegExpStrings(encodedUrl):
508 matched = False
509 else:
510 matched = self.__regExp.search(encodedUrl) is not None
511 elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
512 matched = True
513
514 return matched
515
516 def matchDomain(self, domain):
517 """
518 Public method to match a domain.
519
520 @param domain domain name to check
521 @type str
522 @return flag indicating a match
523 @rtype bool
524 """
525 if not self.__isEnabled:
526 return False
527
528 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
529 return True
530
531 if len(self.__blockedDomains) == 0:
532 return any(self.__isMatchingDomain(domain, dom)
533 for dom in self.__allowedDomains)
534 elif len(self.__allowedDomains) == 0:
535 return all(not self.__isMatchingDomain(domain, dom)
536 for dom in self.__blockedDomains)
537 else:
538 return (
539 all(not self.__isMatchingDomain(domain, dom)
540 for dom in self.__blockedDomains) and
541 any(self.__isMatchingDomain(domain, dom)
542 for dom in self.__allowedDomains)
543 )
544
545 def matchThirdParty(self, req):
546 """
547 Public method to match a third-party rule.
548
549 @param req request object to check
550 @type QWebEngineUrlRequestInfo
551 @return flag indicating a match
552 @rtype boolean
553 """
554 # Third-party matching should be performed on second-level domains
555 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
556 host = toSecondLevelDomain(req.requestUrl())
557
558 match = firstPartyHost != host
559
560 if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
561 return not match
562 else:
563 return match
564
565 def matchObject(self, req):
566 """
567 Public method to match an object rule.
568
569 @param req request object to check
570 @type QWebEngineUrlRequestInfo
571 @return flag indicating a match
572 @rtype bool
573 """
574 match = (
575 req.resourceType() ==
576 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject)
577
578 if self.__hasException(AdBlockRuleOption.ObjectOption):
579 return not match
580 else:
581 return match
582
583 def matchSubdocument(self, req):
584 """
585 Public method to match a sub-document rule.
586
587 @param req request object to check
588 @type QWebEngineUrlRequestInfo
589 @return flag indicating a match
590 @rtype boolean
591 """
592 match = (
593 req.resourceType() ==
594 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame)
595
596 if self.__hasException(AdBlockRuleOption.SubdocumentOption):
597 return not match
598 else:
599 return match
600
601 def matchXmlHttpRequest(self, req):
602 """
603 Public method to match a XmlHttpRequest rule.
604
605 @param req request object to check
606 @type QWebEngineUrlRequestInfo
607 @return flag indicating a match
608 @rtype bool
609 """
610 match = (
611 req.resourceType() ==
612 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr)
613
614 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
615 return not match
616 else:
617 return match
618
619 def matchImage(self, req):
620 """
621 Public method to match an Image rule.
622
623 @param req request object to check
624 @type QWebEngineUrlRequestInfo
625 @return flag indicating a match
626 @rtype bool
627 """
628 match = (
629 req.resourceType() ==
630 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage)
631
632 if self.__hasException(AdBlockRuleOption.ImageOption):
633 return not match
634 else:
635 return match
636
637 def matchScript(self, req):
638 """
639 Public method to match a Script rule.
640
641 @param req request object to check
642 @type QWebEngineUrlRequestInfo
643 @return flag indicating a match
644 @rtype bool
645 """
646 match = (
647 req.resourceType() ==
648 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript)
649
650 if self.__hasException(AdBlockRuleOption.ScriptOption):
651 return not match
652 else:
653 return match
654
655 def matchStyleSheet(self, req):
656 """
657 Public method to match a StyleSheet rule.
658
659 @param req request object to check
660 @type QWebEngineUrlRequestInfo
661 @return flag indicating a match
662 @rtype bool
663 """
664 match = (
665 req.resourceType() ==
666 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet)
667
668 if self.__hasException(AdBlockRuleOption.StyleSheetOption):
669 return not match
670 else:
671 return match
672
673 def matchObjectSubrequest(self, req):
674 """
675 Public method to match an Object Subrequest rule.
676
677 @param req request object to check
678 @type QWebEngineUrlRequestInfo
679 @return flag indicating a match
680 @rtype boolean
681 """
682 match = (
683 req.resourceType() ==
684 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource
685 )
686 match = match or (
687 req.resourceType() ==
688 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource
689 )
690
691 if self.__objectSubrequestException:
692 return not match
693 else:
694 return match
695
696 def matchPing(self, req):
697 """
698 Public method to match a Ping rule.
699
700 @param req request object to check
701 @type QWebEngineUrlRequestInfo
702 @return flag indicating a match
703 @rtype bool
704 """
705 match = (
706 req.resourceType() ==
707 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing)
708
709 if self.__hasException(AdBlockRuleOption.PingOption):
710 return not match
711 else:
712 return match
713
714 def matchMedia(self, req):
715 """
716 Public method to match a Media rule.
717
718 @param req request object to check
719 @type QWebEngineUrlRequestInfo
720 @return flag indicating a match
721 @rtype bool
722 """
723 match = (
724 req.resourceType() ==
725 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia)
726
727 if self.__hasException(AdBlockRuleOption.MediaOption):
728 return not match
729 else:
730 return match
731
732 def matchFont(self, req):
733 """
734 Public method to match a Font rule.
735
736 @param req request object to check
737 @type QWebEngineUrlRequestInfo
738 @return flag indicating a match
739 @rtype bool
740 """
741 match = (
742 req.resourceType() ==
743 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource)
744
745 if self.__hasException(AdBlockRuleOption.FontOption):
746 return not match
747 else:
748 return match
749
750 def matchOther(self, req):
751 """
752 Public method to match any other rule.
753
754 @param req request object to check
755 @type QWebEngineUrlRequestInfo
756 @return flag indicating a match
757 @rtype bool
758 """
759 match = req.resourceType() in [
760 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource,
761 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeWorker,
762 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSharedWorker,
763 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker,
764 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch,
765 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon,
766 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown,
767 ]
768
769 if self.__hasException(AdBlockRuleOption.OtherOption):
770 return not match
771 else:
772 return match
773
774 def isException(self):
775 """
776 Public method to check, if the rule defines an exception.
777
778 @return flag indicating an exception
779 @rtype bool
780 """
781 return self.__isException
782
783 def setException(self, exception):
784 """
785 Public method to set the rule's exception flag.
786
787 @param exception flag indicating an exception rule
788 @type bool
789 """
790 self.__isException = exception
791
792 def isEnabled(self):
793 """
794 Public method to check, if the rule is enabled.
795
796 @return flag indicating enabled state
797 @rtype bool
798 """
799 return self.__isEnabled
800
801 def setEnabled(self, enabled):
802 """
803 Public method to set the rule's enabled state.
804
805 @param enabled flag indicating the new enabled state
806 @type bool
807 """
808 self.__isEnabled = enabled
809
810 def isCSSRule(self):
811 """
812 Public method to check, if the rule is a CSS rule.
813
814 @return flag indicating a CSS rule
815 @rtype bool
816 """
817 return self.__type == AdBlockRuleType.CssRule
818
819 def cssSelector(self):
820 """
821 Public method to get the CSS selector of the rule.
822
823 @return CSS selector
824 @rtype str
825 """
826 return self.__matchString
827
828 def isDocument(self):
829 """
830 Public method to check, if this is a document rule.
831
832 @return flag indicating a document rule
833 @rtype bool
834 """
835 return self.__hasOption(AdBlockRuleOption.DocumentOption)
836
837 def isElementHiding(self):
838 """
839 Public method to check, if this is an element hiding rule.
840
841 @return flag indicating an element hiding rule
842 @rtype bool
843 """
844 return self.__hasOption(AdBlockRuleOption.ElementHideOption)
845
846 def isDomainRestricted(self):
847 """
848 Public method to check, if this rule is restricted by domain.
849
850 @return flag indicating a domain restriction
851 @rtype bool
852 """
853 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
854
855 def isComment(self):
856 """
857 Public method to check, if this is a comment.
858
859 @return flag indicating a comment
860 @rtype bool
861 """
862 return self.__filter.startswith("!")
863
864 def isHeader(self):
865 """
866 Public method to check, if this is a header.
867
868 @return flag indicating a header
869 @rtype bool
870 """
871 return self.__filter.startswith("[Adblock")
872
873 def isSlow(self):
874 """
875 Public method to check, if this is a slow rule.
876
877 @return flag indicating a slow rule
878 @rtype bool
879 """
880 return self.__regExp is not None
881
882 def isInternalDisabled(self):
883 """
884 Public method to check, if this rule was disabled internally.
885
886 @return flag indicating an internally disabled rule
887 @rtype bool
888 """
889 return self.__isInternalDisabled
890
891 def __convertPatternToRegExp(self, wildcardPattern):
892 """
893 Private method to convert a wildcard pattern to a regular expression.
894
895 @param wildcardPattern string containing the wildcard pattern
896 @type str
897 @return string containing a regular expression
898 @rtype string
899 """
900 pattern = wildcardPattern
901
902 # remove multiple wildcards
903 pattern = re.sub(r"\*+", "*", pattern)
904 # remove anchors following separator placeholder
905 pattern = re.sub(r"\^\|$", "^", pattern)
906 # remove leading wildcards
907 pattern = re.sub(r"^(\*)", "", pattern)
908 # remove trailing wildcards
909 pattern = re.sub(r"(\*)$", "", pattern)
910 # escape special symbols
911 pattern = re.sub(r"(\W)", r"\\\1", pattern)
912 # process extended anchor at expression start
913 pattern = re.sub(
914 r"^\\\|\\\|",
915 r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
916 # process separator placeholders
917 pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern)
918 # process anchor at expression start
919 pattern = re.sub(r"^\\\|", "^", pattern)
920 # process anchor at expression end
921 pattern = re.sub(r"\\\|$", "$", pattern)
922 # replace wildcards by .*
923 pattern = re.sub(r"\\\*", ".*", pattern)
924
925 return pattern
926
927 def __hasOption(self, opt):
928 """
929 Private method to check, if the given option has been set.
930
931 @param opt option to check for
932 @type AdBlockRuleOption
933 @return flag indicating the state of the option
934 @rtype bool
935 """
936 return bool(self.__options & opt)
937
938 def setOption(self, opt):
939 """
940 Public method to set the given option.
941
942 @param opt option to be set
943 @type AdBlockRuleOption
944 """
945 self.__options |= opt
946
947 def __hasException(self, opt):
948 """
949 Private method to check, if the given option has been set as an
950 exception.
951
952 @param opt option to check for
953 @type AdBlockRuleOption
954 @return flag indicating the exception state of the option
955 @rtype bool
956 """
957 return bool(self.__exceptions & opt)
958
959 def __setException(self, opt, on):
960 """
961 Private method to set the given option as an exception.
962
963 @param opt option to be set
964 @type AdBlockRuleOption
965 @param on flag indicating to set or unset the exception
966 @type bool
967 """
968 if on:
969 self.__exceptions |= opt
970 else:
971 self.__exceptions &= ~opt
972
973 def __filterIsOnlyDomain(self, filterString):
974 """
975 Private method to check, if the given filter is a domain only filter.
976
977 @param filterString filter string to be checked
978 @type str
979 @return flag indicating a domain only filter
980 @rtype bool
981 """
982 if not filterString.endswith("^") or not filterString.startswith("||"):
983 return False
984
985 return all(filterChar not in ["/", ":", "?", "=", "&", "*"]
986 for filterChar in filterString)
987
988 def __filterIsOnlyEndsMatch(self, filterString):
989 """
990 Private method to check, if the given filter is to match against the
991 end of a string.
992
993 @param filterString filter string to be checked
994 @type str
995 @return flag indicating a end of string match filter
996 @rtype bool
997 """
998 for index, filterChar in enumerate(filterString):
999 # __IGNORE_WARNING_Y111__
1000 if filterChar in ["^", "*"]:
1001 return False
1002 elif filterChar == "|":
1003 return index == len(filterString) - 1
1004
1005 return False
1006
1007 def __isMatchingDomain(self, domain, filterString):
1008 """
1009 Private method to check, if a given domain matches the given filter
1010 string.
1011
1012 @param domain domain to be checked
1013 @type str
1014 @param filterString filter string to check against
1015 @type str
1016 @return flag indicating a match
1017 @rtype bool
1018 """
1019 if filterString == domain:
1020 return True
1021
1022 if not domain.endswith(filterString):
1023 return False
1024
1025 index = domain.find(filterString)
1026
1027 return bool(index > 0 and domain[index - 1] == ".")
1028
1029 def __isMatchingRegExpStrings(self, url):
1030 """
1031 Private method to check the given URL against the fixed parts of
1032 the regexp.
1033
1034 @param url URL to be checked
1035 @type str
1036 @return flag indicating a match
1037 @rtype bool
1038 """
1039 if self.__regExp is not None:
1040 return all(matcher in url for matcher in self.__stringMatchers)
1041
1042 return True
1043
1044 def __parseRegExpFilter(self, filterString):
1045 """
1046 Private method to split the given regular expression into strings that
1047 can be used with 'in'.
1048
1049 @param filterString regexp filter string to be parsed
1050 @type str
1051 @return fixed string parts of the filter
1052 @rtype list of str
1053 """
1054 matchers = []
1055
1056 startPos = -1
1057 for index in range(len(filterString)):
1058 filterChar = filterString[index]
1059 if filterChar in ["|", "*", "^"]:
1060 sub = filterString[startPos:index]
1061 if len(sub) > 1:
1062 matchers.append(sub)
1063 startPos = index + 1
1064
1065 sub = filterString[startPos:]
1066 if len(sub) > 1:
1067 matchers.append(sub)
1068
1069 return list(set(matchers))
1070
1071 def ruleType(self):
1072 """
1073 Public method to get the rule type.
1074
1075 @return rule type
1076 @rtype AdBlockRuleType
1077 """
1078 return self.__type
1079
1080 def ruleOptions(self):
1081 """
1082 Public method to get the rule options.
1083
1084 @return rule options
1085 @rtype AdBlockRuleOption
1086 """
1087 return self.__options
1088
1089 def ruleExceptions(self):
1090 """
1091 Public method to get the rule exceptions.
1092
1093 @return rule exceptions
1094 @rtype AdBlockRuleOption
1095 """
1096 return self.__exceptions
1097
1098 def matchString(self):
1099 """
1100 Public method to get the match string.
1101
1102 @return match string
1103 @rtype str
1104 """
1105 return self.__matchString
1106
1107 def caseSensitivity(self):
1108 """
1109 Public method to get the case sensitivity.
1110
1111 @return case sensitivity
1112 @rtype Qt.CaseSensitivity
1113 """
1114 return self.__caseSensitivity
1115
1116 def allowedDomains(self):
1117 """
1118 Public method to get a copy of the list of allowed domains.
1119
1120 @return list of allowed domains
1121 @rtype list of str
1122 """
1123 return self.__allowedDomains[:]
1124
1125 def blockedDomains(self):
1126 """
1127 Public method to get a copy of the list of blocked domains.
1128
1129 @return list of blocked domains
1130 @rtype list of str
1131 """
1132 return self.__blockedDomains[:]
1133
1134 def addBlockedDomains(self, domains):
1135 """
1136 Public method to add to the list of blocked domains.
1137
1138 @param domains list of domains to be added
1139 @type str or list of str
1140 """
1141 if isinstance(domains, list):
1142 self.__blockedDomains.extend(domains)
1143 else:
1144 self.__blockedDomains.append(domains)
1145
1146 def getRegExpAndMatchers(self):
1147 """
1148 Public method to get the regular expression and associated string
1149 matchers.
1150
1151 @return tuple containing the regular expression and the list of
1152 string matchers
1153 @rtype tuple of (re.Pattern, list of str)
1154 """
1155 if self.__regExp is not None:
1156 return (re.compile(self.__regExp.pattern),
1157 self.__stringMatchers[:])
1158 else:
1159 return (None, [])
1160
1161 def copyFrom(self, other):
1162 """
1163 Public method to copy another AdBlock rule.
1164
1165 @param other reference to the AdBlock rule to copy from
1166 @type AdBlockRule
1167 """
1168 self.__subscription = other.subscription()
1169 self.__type = other.ruleType()
1170 self.__options = other.ruleOptions()
1171 self.__exceptions = other.ruleExceptions()
1172 self.__filter = other.filter()
1173 self.__matchString = other.matchString()
1174 self.__caseSensitivity = other.caseSensitivity()
1175 self.__isEnabled = other.isEnabled()
1176 self.__isException = other.isException()
1177 self.__isInternalDisabled = other.isInternalDisabled()
1178 self.__allowedDomains = other.allowedDomains()
1179 self.__blockedDomains = other.blockedDomains()
1180 self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()

eric ide

mercurial