src/eric7/WebBrowser/AdBlock/AdBlockRule.py

branch
eric7
changeset 9209
b99e7fd55fd3
parent 8881
54e42bc2437a
child 9221
bf71ee032bb4
equal deleted inserted replaced
9208:3fc8dfeb6ebe 9209:b99e7fd55fd3
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2022 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock rule class.
8 """
9
10 import re
11 from enum import IntEnum
12
13 from PyQt6.QtCore import Qt
14 from PyQt6.QtWebEngineCore import QWebEngineUrlRequestInfo
15
16 from EricNetwork import EricTldExtractor
17
18
19 def toSecondLevelDomain(url):
20 """
21 Module function to get a second level domain from the given URL.
22
23 @param url URL to extract domain from
24 @type QUrl
25 @return name of second level domain
26 @rtype str
27 """
28 topLevelDomain = EricTldExtractor.instance().tld(url.host())
29 urlHost = url.host()
30
31 if not topLevelDomain or not urlHost:
32 return ""
33
34 domain = urlHost[:len(urlHost) - len(topLevelDomain)]
35 if domain.count(".") == 0:
36 return urlHost
37
38 while domain.count(".") != 0:
39 domain = domain[domain.find(".") + 1:]
40
41 return domain + topLevelDomain
42
43
44 class AdBlockRuleType(IntEnum):
45 """
46 Class implementing the rule type enum.
47 """
48 CssRule = 0
49 DomainMatchRule = 1
50 RegExpMatchRule = 2
51 StringEndsMatchRule = 3
52 StringContainsMatchRule = 4
53 MatchAllUrlsRule = 5
54 Invalid = 6
55
56
57 class AdBlockRuleOption(IntEnum):
58 """
59 Class implementing the rule option enum.
60 """
61 NoOption = 0
62 DomainRestrictedOption = 1
63 ThirdPartyOption = 2
64 ObjectOption = 4
65 SubdocumentOption = 8
66 XMLHttpRequestOption = 16
67 ImageOption = 32
68 ScriptOption = 64
69 StyleSheetOption = 128
70 ObjectSubrequestOption = 256
71 PingOption = 512
72 MediaOption = 1024
73 FontOption = 2048
74 OtherOption = 4096
75
76 # Exception only options
77 DocumentOption = 8192
78 ElementHideOption = 16384
79
80
81 class AdBlockRule:
82 """
83 Class implementing the AdBlock rule.
84 """
85 def __init__(self, filterRule="", subscription=None):
86 """
87 Constructor
88
89 @param filterRule filter string of the rule
90 @type str
91 @param subscription reference to the subscription object
92 @type AdBlockSubscription
93 """
94 self.__subscription = subscription
95
96 self.__regExp = None
97 self.__stringMatchers = []
98
99 self.__blockedDomains = []
100 self.__allowedDomains = []
101
102 self.__isEnabled = True
103 self.__isException = False
104 self.__isInternalDisabled = False
105 self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive
106
107 self.__type = AdBlockRuleType.StringContainsMatchRule
108 self.__options = AdBlockRuleOption.NoOption
109 self.__exceptions = AdBlockRuleOption.NoOption
110
111 self.setFilter(filterRule)
112
113 def subscription(self):
114 """
115 Public method to get the subscription this rule belongs to.
116
117 @return subscription of the rule
118 @rtype AdBlockSubscription
119 """
120 return self.__subscription
121
122 def setSubscription(self, subscription):
123 """
124 Public method to set the subscription this rule belongs to.
125
126 @param subscription subscription of the rule
127 @type AdBlockSubscription
128 """
129 self.__subscription = subscription
130
131 def filter(self):
132 """
133 Public method to get the rule filter string.
134
135 @return rule filter string
136 @rtype str
137 """
138 return self.__filter
139
140 def setFilter(self, filterRule):
141 """
142 Public method to set the rule filter string.
143
144 @param filterRule rule filter string
145 @type str
146 """
147 self.__filter = filterRule
148 self.__parseFilter()
149
150 def __parseFilter(self):
151 """
152 Private method to parse the filter pattern.
153 """
154 parsedLine = self.__filter
155
156 # empty rule or just a comment
157 if not parsedLine.strip() or parsedLine.startswith("!"):
158 self.__isEnabled = False
159 self.__isInternalDisabled = True
160 self.__type = AdBlockRuleType.Invalid
161 return
162
163 # CSS element hiding rule
164 if "##" in parsedLine or "#@#" in parsedLine:
165 self.__type = AdBlockRuleType.CssRule
166 pos = parsedLine.find("#")
167
168 # domain restricted rule
169 if not parsedLine.startswith("##"):
170 domains = parsedLine[:pos]
171 self.__parseDomains(domains, ",")
172
173 self.__isException = parsedLine[pos + 1] == "@"
174 if self.__isException:
175 self.__matchString = parsedLine[pos + 3:]
176 else:
177 self.__matchString = parsedLine[pos + 2:]
178
179 # CSS rule cannot have more options -> stop parsing
180 return
181
182 # Exception always starts with @@
183 if parsedLine.startswith("@@"):
184 self.__isException = True
185 parsedLine = parsedLine[2:]
186
187 # Parse all options following '$' character
188 optionsIndex = parsedLine.find("$")
189 if optionsIndex >= 0:
190 options = [opt
191 for opt in parsedLine[optionsIndex + 1:].split(",")
192 if opt]
193
194 handledOptions = 0
195 for option in options:
196 if option.startswith("domain="):
197 self.__parseDomains(option[7:], "|")
198 handledOptions += 1
199 elif option == "match-case":
200 self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive
201 handledOptions += 1
202 elif option.endswith("third-party"):
203 self.setOption(AdBlockRuleOption.ThirdPartyOption)
204 self.__setException(AdBlockRuleOption.ThirdPartyOption,
205 option.startswith("~"))
206 handledOptions += 1
207 elif option.endswith("object"):
208 self.setOption(AdBlockRuleOption.ObjectOption)
209 self.__setException(AdBlockRuleOption.ObjectOption,
210 option.startswith("~"))
211 handledOptions += 1
212 elif option.endswith("subdocument"):
213 self.setOption(AdBlockRuleOption.SubdocumentOption)
214 self.__setException(AdBlockRuleOption.SubdocumentOption,
215 option.startswith("~"))
216 handledOptions += 1
217 elif option.endswith("xmlhttprequest"):
218 self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
219 self.__setException(AdBlockRuleOption.XMLHttpRequestOption,
220 option.startswith("~"))
221 handledOptions += 1
222 elif option.endswith("image"):
223 self.setOption(AdBlockRuleOption.ImageOption)
224 self.__setException(AdBlockRuleOption.ImageOption,
225 option.startswith("~"))
226 elif option.endswith("script"):
227 self.setOption(AdBlockRuleOption.ScriptOption)
228 self.__setException(AdBlockRuleOption.ScriptOption,
229 option.startswith("~"))
230 elif option.endswith("stylesheet"):
231 self.setOption(AdBlockRuleOption.StyleSheetOption)
232 self.__setException(AdBlockRuleOption.StyleSheetOption,
233 option.startswith("~"))
234 elif option.endswith("object-subrequest"):
235 self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
236 self.__setException(
237 AdBlockRuleOption.ObjectSubrequestOption,
238 option.startswith("~"))
239 elif option.endswith("ping"):
240 self.setOption(AdBlockRuleOption.PingOption)
241 self.__setException(AdBlockRuleOption.PingOption,
242 option.startswith("~"))
243 elif option.endswith("media"):
244 self.setOption(AdBlockRuleOption.MediaOption)
245 self.__setException(AdBlockRuleOption.MediaOption,
246 option.startswith("~"))
247 elif option.endswith("font"):
248 self.setOption(AdBlockRuleOption.FontOption)
249 self.__setException(AdBlockRuleOption.FontOption,
250 option.startswith("~"))
251 elif option.endswith("other"):
252 self.setOption(AdBlockRuleOption.OtherOption)
253 self.__setException(AdBlockRuleOption.OtherOption,
254 option.startswith("~"))
255 elif option == "document" and self.__isException:
256 self.setOption(AdBlockRuleOption.DocumentOption)
257 handledOptions += 1
258 elif option == "elemhide" and self.__isException:
259 self.setOption(AdBlockRuleOption.ElementHideOption)
260 handledOptions += 1
261 elif option == "collapse":
262 # Hiding placeholders of blocked elements is enabled by
263 # default
264 handledOptions += 1
265
266 # If we don't handle all options, it's safer to just disable
267 # this rule
268 if handledOptions != len(options):
269 self.__isInternalDisabled = True
270 self.__type = AdBlockRuleType.Invalid
271 return
272
273 parsedLine = parsedLine[:optionsIndex]
274
275 # Rule is classic regexp
276 if parsedLine.startswith("/") and parsedLine.endswith("/"):
277 parsedLine = parsedLine[1:-1]
278 self.__type = AdBlockRuleType.RegExpMatchRule
279 if self.__caseSensitivity:
280 self.__regExp = re.compile(parsedLine)
281 else:
282 self.__regExp = re.compile(parsedLine, re.IGNORECASE)
283 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
284 return
285
286 # Remove starting / ending wildcards (*)
287 if parsedLine.startswith("*"):
288 parsedLine = parsedLine[1:]
289 if parsedLine.endswith("*"):
290 parsedLine = parsedLine[:-1]
291
292 # Fast string matching for domain here
293 if self.__filterIsOnlyDomain(parsedLine):
294 parsedLine = parsedLine[2:-1]
295 self.__type = AdBlockRuleType.DomainMatchRule
296 self.__matchString = parsedLine
297 return
298
299 # If rule contains '|' only at the end, string matching can be used
300 if self.__filterIsOnlyEndsMatch(parsedLine):
301 parsedLine = parsedLine[:-1]
302 self.__type = AdBlockRuleType.StringEndsMatchRule
303 self.__matchString = parsedLine
304 return
305
306 # If there is still a wildcard (*) or separator (^) or (|),
307 # the rule must be modified to comply with re.
308 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
309 self.__type = AdBlockRuleType.RegExpMatchRule
310 pattern = self.__convertPatternToRegExp(parsedLine)
311 if self.__caseSensitivity:
312 self.__regExp = re.compile(pattern)
313 else:
314 self.__regExp = re.compile(pattern, re.IGNORECASE)
315 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
316 return
317
318 # This rule matches all URLs
319 if len(parsedLine) == 0:
320 if self.__options == AdBlockRuleOption.NoOption:
321 self.__isInternalDisabled = True
322 self.__type = AdBlockRuleType.Invalid
323 return
324
325 self.__type = AdBlockRuleType.MatchAllUrlsRule
326 return
327
328 # no regexp required
329 self.__type = AdBlockRuleType.StringContainsMatchRule
330 self.__matchString = parsedLine
331
332 def __parseDomains(self, domains, separator):
333 """
334 Private method to parse a string with a domain list.
335
336 @param domains list of domains
337 @type str
338 @param separator separator character used by the list
339 @type str
340 """
341 domainsList = [d for d in domains.split(separator) if d]
342
343 for domain in domainsList:
344 if not domain:
345 continue
346 if domain.startswith("~"):
347 self.__blockedDomains.append(domain[1:])
348 else:
349 self.__allowedDomains.append(domain)
350
351 if bool(self.__blockedDomains) or bool(self.__allowedDomains):
352 self.setOption(AdBlockRuleOption.DomainRestrictedOption)
353
354 def networkMatch(self, request, domain, encodedUrl):
355 """
356 Public method to check the rule for a match.
357
358 @param request reference to the network request
359 @type QWebEngineUrlRequestInfo
360 @param domain domain name
361 @type str
362 @param encodedUrl string encoded URL to be checked
363 @type str
364 @return flag indicating a match
365 @rtype bool
366 """
367 if (
368 self.__type == AdBlockRuleType.CssRule or
369 not self.__isEnabled or
370 self.__isInternalDisabled
371 ):
372 return False
373
374 matched = self.__stringMatch(domain, encodedUrl)
375
376 if matched:
377 # check domain restrictions
378 if (
379 self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and
380 not self.matchDomain(request.firstPartyUrl().host())
381 ):
382 return False
383
384 # check third-party restrictions
385 if (
386 self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and
387 not self.matchThirdParty(request)
388 ):
389 return False
390
391 # check object restrictions
392 if (
393 self.__hasOption(AdBlockRuleOption.ObjectOption) and
394 not self.matchObject(request)
395 ):
396 return False
397
398 # check subdocument restrictions
399 if (
400 self.__hasOption(AdBlockRuleOption.SubdocumentOption) and
401 not self.matchSubdocument(request)
402 ):
403 return False
404
405 # check xmlhttprequest restriction
406 if (
407 self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and
408 not self.matchXmlHttpRequest(request)
409 ):
410 return False
411
412 # check image restriction
413 if (
414 self.__hasOption(AdBlockRuleOption.ImageOption) and
415 not self.matchImage(request)
416 ):
417 return False
418
419 # check script restriction
420 if (
421 self.__hasOption(AdBlockRuleOption.ScriptOption) and
422 not self.matchScript(request)
423 ):
424 return False
425
426 # check stylesheet restriction
427 if (
428 self.__hasOption(AdBlockRuleOption.StyleSheetOption) and
429 not self.matchStyleSheet(request)
430 ):
431 return False
432
433 # check object-subrequest restriction
434 if (
435 self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and
436 not self.matchObjectSubrequest(request)
437 ):
438 return False
439
440 # check ping restriction
441 if (
442 self.__hasOption(AdBlockRuleOption.PingOption) and
443 not self.matchPing(request)
444 ):
445 return False
446
447 # check media restriction
448 if (
449 self.__hasOption(AdBlockRuleOption.MediaOption) and
450 not self.matchMedia(request)
451 ):
452 return False
453
454 # check font restriction
455 if (
456 self.__hasOption(AdBlockRuleOption.FontOption) and
457 not self.matchFont(request)
458 ):
459 return False
460
461 return matched
462
463 def urlMatch(self, url):
464 """
465 Public method to check an URL against the rule.
466
467 @param url URL to check
468 @type QUrl
469 @return flag indicating a match
470 @rtype bool
471 """
472 if (
473 not self.__hasOption(AdBlockRuleOption.DocumentOption) and
474 not self.__hasOption(AdBlockRuleOption.ElementHideOption)
475 ):
476 return False
477
478 encodedUrl = bytes(url.toEncoded()).decode()
479 domain = url.host()
480 return self.__stringMatch(domain, encodedUrl)
481
482 def __stringMatch(self, domain, encodedUrl):
483 """
484 Private method to match a domain string.
485
486 @param domain domain to match
487 @type str
488 @param encodedUrl URL in encoded form
489 @type str
490 @return flag indicating a match
491 @rtype bool
492 """
493 matched = False
494
495 if self.__type == AdBlockRuleType.StringContainsMatchRule:
496 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
497 matched = self.__matchString.lower() in encodedUrl.lower()
498 else:
499 matched = self.__matchString in encodedUrl
500 elif self.__type == AdBlockRuleType.DomainMatchRule:
501 matched = self.__isMatchingDomain(domain, self.__matchString)
502 elif self.__type == AdBlockRuleType.StringEndsMatchRule:
503 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
504 matched = encodedUrl.lower().endswith(
505 self.__matchString.lower())
506 else:
507 matched = encodedUrl.endswith(self.__matchString)
508 elif self.__type == AdBlockRuleType.RegExpMatchRule:
509 if not self.__isMatchingRegExpStrings(encodedUrl):
510 matched = False
511 else:
512 matched = self.__regExp.search(encodedUrl) is not None
513 elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
514 matched = True
515
516 return matched
517
518 def matchDomain(self, domain):
519 """
520 Public method to match a domain.
521
522 @param domain domain name to check
523 @type str
524 @return flag indicating a match
525 @rtype bool
526 """
527 if not self.__isEnabled:
528 return False
529
530 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
531 return True
532
533 if len(self.__blockedDomains) == 0:
534 return any(self.__isMatchingDomain(domain, dom)
535 for dom in self.__allowedDomains)
536 elif len(self.__allowedDomains) == 0:
537 return all(not self.__isMatchingDomain(domain, dom)
538 for dom in self.__blockedDomains)
539 else:
540 return (
541 all(not self.__isMatchingDomain(domain, dom)
542 for dom in self.__blockedDomains) and
543 any(self.__isMatchingDomain(domain, dom)
544 for dom in self.__allowedDomains)
545 )
546
547 def matchThirdParty(self, req):
548 """
549 Public method to match a third-party rule.
550
551 @param req request object to check
552 @type QWebEngineUrlRequestInfo
553 @return flag indicating a match
554 @rtype boolean
555 """
556 # Third-party matching should be performed on second-level domains
557 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
558 host = toSecondLevelDomain(req.requestUrl())
559
560 match = firstPartyHost != host
561
562 if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
563 return not match
564 else:
565 return match
566
567 def matchObject(self, req):
568 """
569 Public method to match an object rule.
570
571 @param req request object to check
572 @type QWebEngineUrlRequestInfo
573 @return flag indicating a match
574 @rtype bool
575 """
576 match = (
577 req.resourceType() ==
578 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject)
579
580 if self.__hasException(AdBlockRuleOption.ObjectOption):
581 return not match
582 else:
583 return match
584
585 def matchSubdocument(self, req):
586 """
587 Public method to match a sub-document rule.
588
589 @param req request object to check
590 @type QWebEngineUrlRequestInfo
591 @return flag indicating a match
592 @rtype boolean
593 """
594 match = (
595 req.resourceType() ==
596 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame)
597
598 if self.__hasException(AdBlockRuleOption.SubdocumentOption):
599 return not match
600 else:
601 return match
602
603 def matchXmlHttpRequest(self, req):
604 """
605 Public method to match a XmlHttpRequest rule.
606
607 @param req request object to check
608 @type QWebEngineUrlRequestInfo
609 @return flag indicating a match
610 @rtype bool
611 """
612 match = (
613 req.resourceType() ==
614 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr)
615
616 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
617 return not match
618 else:
619 return match
620
621 def matchImage(self, req):
622 """
623 Public method to match an Image rule.
624
625 @param req request object to check
626 @type QWebEngineUrlRequestInfo
627 @return flag indicating a match
628 @rtype bool
629 """
630 match = (
631 req.resourceType() ==
632 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage)
633
634 if self.__hasException(AdBlockRuleOption.ImageOption):
635 return not match
636 else:
637 return match
638
639 def matchScript(self, req):
640 """
641 Public method to match a Script rule.
642
643 @param req request object to check
644 @type QWebEngineUrlRequestInfo
645 @return flag indicating a match
646 @rtype bool
647 """
648 match = (
649 req.resourceType() ==
650 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript)
651
652 if self.__hasException(AdBlockRuleOption.ScriptOption):
653 return not match
654 else:
655 return match
656
657 def matchStyleSheet(self, req):
658 """
659 Public method to match a StyleSheet rule.
660
661 @param req request object to check
662 @type QWebEngineUrlRequestInfo
663 @return flag indicating a match
664 @rtype bool
665 """
666 match = (
667 req.resourceType() ==
668 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet)
669
670 if self.__hasException(AdBlockRuleOption.StyleSheetOption):
671 return not match
672 else:
673 return match
674
675 def matchObjectSubrequest(self, req):
676 """
677 Public method to match an Object Subrequest rule.
678
679 @param req request object to check
680 @type QWebEngineUrlRequestInfo
681 @return flag indicating a match
682 @rtype boolean
683 """
684 match = (
685 req.resourceType() ==
686 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource
687 )
688 match = match or (
689 req.resourceType() ==
690 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource
691 )
692
693 if self.__objectSubrequestException:
694 return not match
695 else:
696 return match
697
698 def matchPing(self, req):
699 """
700 Public method to match a Ping rule.
701
702 @param req request object to check
703 @type QWebEngineUrlRequestInfo
704 @return flag indicating a match
705 @rtype bool
706 """
707 match = (
708 req.resourceType() ==
709 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing)
710
711 if self.__hasException(AdBlockRuleOption.PingOption):
712 return not match
713 else:
714 return match
715
716 def matchMedia(self, req):
717 """
718 Public method to match a Media rule.
719
720 @param req request object to check
721 @type QWebEngineUrlRequestInfo
722 @return flag indicating a match
723 @rtype bool
724 """
725 match = (
726 req.resourceType() ==
727 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia)
728
729 if self.__hasException(AdBlockRuleOption.MediaOption):
730 return not match
731 else:
732 return match
733
734 def matchFont(self, req):
735 """
736 Public method to match a Font rule.
737
738 @param req request object to check
739 @type QWebEngineUrlRequestInfo
740 @return flag indicating a match
741 @rtype bool
742 """
743 match = (
744 req.resourceType() ==
745 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource)
746
747 if self.__hasException(AdBlockRuleOption.FontOption):
748 return not match
749 else:
750 return match
751
752 def matchOther(self, req):
753 """
754 Public method to match any other rule.
755
756 @param req request object to check
757 @type QWebEngineUrlRequestInfo
758 @return flag indicating a match
759 @rtype bool
760 """
761 match = req.resourceType() in [
762 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource,
763 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeWorker,
764 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSharedWorker,
765 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker,
766 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch,
767 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon,
768 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown,
769 ]
770
771 if self.__hasException(AdBlockRuleOption.OtherOption):
772 return not match
773 else:
774 return match
775
776 def isException(self):
777 """
778 Public method to check, if the rule defines an exception.
779
780 @return flag indicating an exception
781 @rtype bool
782 """
783 return self.__isException
784
785 def setException(self, exception):
786 """
787 Public method to set the rule's exception flag.
788
789 @param exception flag indicating an exception rule
790 @type bool
791 """
792 self.__isException = exception
793
794 def isEnabled(self):
795 """
796 Public method to check, if the rule is enabled.
797
798 @return flag indicating enabled state
799 @rtype bool
800 """
801 return self.__isEnabled
802
803 def setEnabled(self, enabled):
804 """
805 Public method to set the rule's enabled state.
806
807 @param enabled flag indicating the new enabled state
808 @type bool
809 """
810 self.__isEnabled = enabled
811
812 def isCSSRule(self):
813 """
814 Public method to check, if the rule is a CSS rule.
815
816 @return flag indicating a CSS rule
817 @rtype bool
818 """
819 return self.__type == AdBlockRuleType.CssRule
820
821 def cssSelector(self):
822 """
823 Public method to get the CSS selector of the rule.
824
825 @return CSS selector
826 @rtype str
827 """
828 return self.__matchString
829
830 def isDocument(self):
831 """
832 Public method to check, if this is a document rule.
833
834 @return flag indicating a document rule
835 @rtype bool
836 """
837 return self.__hasOption(AdBlockRuleOption.DocumentOption)
838
839 def isElementHiding(self):
840 """
841 Public method to check, if this is an element hiding rule.
842
843 @return flag indicating an element hiding rule
844 @rtype bool
845 """
846 return self.__hasOption(AdBlockRuleOption.ElementHideOption)
847
848 def isDomainRestricted(self):
849 """
850 Public method to check, if this rule is restricted by domain.
851
852 @return flag indicating a domain restriction
853 @rtype bool
854 """
855 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
856
857 def isComment(self):
858 """
859 Public method to check, if this is a comment.
860
861 @return flag indicating a comment
862 @rtype bool
863 """
864 return self.__filter.startswith("!")
865
866 def isHeader(self):
867 """
868 Public method to check, if this is a header.
869
870 @return flag indicating a header
871 @rtype bool
872 """
873 return self.__filter.startswith("[Adblock")
874
875 def isSlow(self):
876 """
877 Public method to check, if this is a slow rule.
878
879 @return flag indicating a slow rule
880 @rtype bool
881 """
882 return self.__regExp is not None
883
884 def isInternalDisabled(self):
885 """
886 Public method to check, if this rule was disabled internally.
887
888 @return flag indicating an internally disabled rule
889 @rtype bool
890 """
891 return self.__isInternalDisabled
892
893 def __convertPatternToRegExp(self, wildcardPattern):
894 """
895 Private method to convert a wildcard pattern to a regular expression.
896
897 @param wildcardPattern string containing the wildcard pattern
898 @type str
899 @return string containing a regular expression
900 @rtype string
901 """
902 pattern = wildcardPattern
903
904 # remove multiple wildcards
905 pattern = re.sub(r"\*+", "*", pattern)
906 # remove anchors following separator placeholder
907 pattern = re.sub(r"\^\|$", "^", pattern)
908 # remove leading wildcards
909 pattern = re.sub(r"^(\*)", "", pattern)
910 # remove trailing wildcards
911 pattern = re.sub(r"(\*)$", "", pattern)
912 # escape special symbols
913 pattern = re.sub(r"(\W)", r"\\\1", pattern)
914 # process extended anchor at expression start
915 pattern = re.sub(
916 r"^\\\|\\\|",
917 r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
918 # process separator placeholders
919 pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern)
920 # process anchor at expression start
921 pattern = re.sub(r"^\\\|", "^", pattern)
922 # process anchor at expression end
923 pattern = re.sub(r"\\\|$", "$", pattern)
924 # replace wildcards by .*
925 pattern = re.sub(r"\\\*", ".*", pattern)
926
927 return pattern
928
929 def __hasOption(self, opt):
930 """
931 Private method to check, if the given option has been set.
932
933 @param opt option to check for
934 @type AdBlockRuleOption
935 @return flag indicating the state of the option
936 @rtype bool
937 """
938 return bool(self.__options & opt)
939
940 def setOption(self, opt):
941 """
942 Public method to set the given option.
943
944 @param opt option to be set
945 @type AdBlockRuleOption
946 """
947 self.__options |= opt
948
949 def __hasException(self, opt):
950 """
951 Private method to check, if the given option has been set as an
952 exception.
953
954 @param opt option to check for
955 @type AdBlockRuleOption
956 @return flag indicating the exception state of the option
957 @rtype bool
958 """
959 return bool(self.__exceptions & opt)
960
961 def __setException(self, opt, on):
962 """
963 Private method to set the given option as an exception.
964
965 @param opt option to be set
966 @type AdBlockRuleOption
967 @param on flag indicating to set or unset the exception
968 @type bool
969 """
970 if on:
971 self.__exceptions |= opt
972 else:
973 self.__exceptions &= ~opt
974
975 def __filterIsOnlyDomain(self, filterString):
976 """
977 Private method to check, if the given filter is a domain only filter.
978
979 @param filterString filter string to be checked
980 @type str
981 @return flag indicating a domain only filter
982 @rtype bool
983 """
984 if not filterString.endswith("^") or not filterString.startswith("||"):
985 return False
986
987 return all(filterChar not in ["/", ":", "?", "=", "&", "*"]
988 for filterChar in filterString)
989
990 def __filterIsOnlyEndsMatch(self, filterString):
991 """
992 Private method to check, if the given filter is to match against the
993 end of a string.
994
995 @param filterString filter string to be checked
996 @type str
997 @return flag indicating a end of string match filter
998 @rtype bool
999 """
1000 for index, filterChar in enumerate(filterString):
1001 # __IGNORE_WARNING_Y111__
1002 if filterChar in ["^", "*"]:
1003 return False
1004 elif filterChar == "|":
1005 return index == len(filterString) - 1
1006
1007 return False
1008
1009 def __isMatchingDomain(self, domain, filterString):
1010 """
1011 Private method to check, if a given domain matches the given filter
1012 string.
1013
1014 @param domain domain to be checked
1015 @type str
1016 @param filterString filter string to check against
1017 @type str
1018 @return flag indicating a match
1019 @rtype bool
1020 """
1021 if filterString == domain:
1022 return True
1023
1024 if not domain.endswith(filterString):
1025 return False
1026
1027 index = domain.find(filterString)
1028
1029 return bool(index > 0 and domain[index - 1] == ".")
1030
1031 def __isMatchingRegExpStrings(self, url):
1032 """
1033 Private method to check the given URL against the fixed parts of
1034 the regexp.
1035
1036 @param url URL to be checked
1037 @type str
1038 @return flag indicating a match
1039 @rtype bool
1040 """
1041 if self.__regExp is not None:
1042 return all(matcher in url for matcher in self.__stringMatchers)
1043
1044 return True
1045
1046 def __parseRegExpFilter(self, filterString):
1047 """
1048 Private method to split the given regular expression into strings that
1049 can be used with 'in'.
1050
1051 @param filterString regexp filter string to be parsed
1052 @type str
1053 @return fixed string parts of the filter
1054 @rtype list of str
1055 """
1056 matchers = []
1057
1058 startPos = -1
1059 for index in range(len(filterString)):
1060 filterChar = filterString[index]
1061 if filterChar in ["|", "*", "^"]:
1062 sub = filterString[startPos:index]
1063 if len(sub) > 1:
1064 matchers.append(sub)
1065 startPos = index + 1
1066
1067 sub = filterString[startPos:]
1068 if len(sub) > 1:
1069 matchers.append(sub)
1070
1071 return list(set(matchers))
1072
1073 def ruleType(self):
1074 """
1075 Public method to get the rule type.
1076
1077 @return rule type
1078 @rtype AdBlockRuleType
1079 """
1080 return self.__type
1081
1082 def ruleOptions(self):
1083 """
1084 Public method to get the rule options.
1085
1086 @return rule options
1087 @rtype AdBlockRuleOption
1088 """
1089 return self.__options
1090
1091 def ruleExceptions(self):
1092 """
1093 Public method to get the rule exceptions.
1094
1095 @return rule exceptions
1096 @rtype AdBlockRuleOption
1097 """
1098 return self.__exceptions
1099
1100 def matchString(self):
1101 """
1102 Public method to get the match string.
1103
1104 @return match string
1105 @rtype str
1106 """
1107 return self.__matchString
1108
1109 def caseSensitivity(self):
1110 """
1111 Public method to get the case sensitivity.
1112
1113 @return case sensitivity
1114 @rtype Qt.CaseSensitivity
1115 """
1116 return self.__caseSensitivity
1117
1118 def allowedDomains(self):
1119 """
1120 Public method to get a copy of the list of allowed domains.
1121
1122 @return list of allowed domains
1123 @rtype list of str
1124 """
1125 return self.__allowedDomains[:]
1126
1127 def blockedDomains(self):
1128 """
1129 Public method to get a copy of the list of blocked domains.
1130
1131 @return list of blocked domains
1132 @rtype list of str
1133 """
1134 return self.__blockedDomains[:]
1135
1136 def addBlockedDomains(self, domains):
1137 """
1138 Public method to add to the list of blocked domains.
1139
1140 @param domains list of domains to be added
1141 @type str or list of str
1142 """
1143 if isinstance(domains, list):
1144 self.__blockedDomains.extend(domains)
1145 else:
1146 self.__blockedDomains.append(domains)
1147
1148 def getRegExpAndMatchers(self):
1149 """
1150 Public method to get the regular expression and associated string
1151 matchers.
1152
1153 @return tuple containing the regular expression and the list of
1154 string matchers
1155 @rtype tuple of (re.Pattern, list of str)
1156 """
1157 if self.__regExp is not None:
1158 return (re.compile(self.__regExp.pattern),
1159 self.__stringMatchers[:])
1160 else:
1161 return (None, [])
1162
1163 def copyFrom(self, other):
1164 """
1165 Public method to copy another AdBlock rule.
1166
1167 @param other reference to the AdBlock rule to copy from
1168 @type AdBlockRule
1169 """
1170 self.__subscription = other.subscription()
1171 self.__type = other.ruleType()
1172 self.__options = other.ruleOptions()
1173 self.__exceptions = other.ruleExceptions()
1174 self.__filter = other.filter()
1175 self.__matchString = other.matchString()
1176 self.__caseSensitivity = other.caseSensitivity()
1177 self.__isEnabled = other.isEnabled()
1178 self.__isException = other.isException()
1179 self.__isInternalDisabled = other.isInternalDisabled()
1180 self.__allowedDomains = other.allowedDomains()
1181 self.__blockedDomains = other.blockedDomains()
1182 self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()

eric ide

mercurial