eric6/WebBrowser/AdBlock/AdBlockRule.py

changeset 6942
2602857055c5
parent 6645
ad476851d7e0
child 7201
6b42677d7043
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock rule class.
8 """
9
10 from __future__ import unicode_literals
11
12 import re
13
14 from enum import IntEnum
15
16 from PyQt5.QtCore import PYQT_VERSION, Qt, QRegExp
17 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo
18
19 from Globals import qVersionTuple
20
21
22 def toSecondLevelDomain(url):
23 """
24 Module function to get a second level domain from the given URL.
25
26 @param url URL to extract domain from
27 @type QUrl
28 @return name of second level domain
29 @rtype str
30 """
31 topLevelDomain = url.topLevelDomain()
32 urlHost = url.host()
33
34 if not topLevelDomain or not urlHost:
35 return ""
36
37 domain = urlHost[:len(urlHost) - len(topLevelDomain)]
38 if domain.count(".") == 0:
39 return urlHost
40
41 while domain.count(".") != 0:
42 domain = domain[domain.find(".") + 1:]
43
44 return domain + topLevelDomain
45
46
47 class AdBlockRuleType(IntEnum):
48 """
49 Class implementing the rule type enum.
50 """
51 CssRule = 0
52 DomainMatchRule = 1
53 RegExpMatchRule = 2
54 StringEndsMatchRule = 3
55 StringContainsMatchRule = 4
56 MatchAllUrlsRule = 5
57 Invalid = 6
58
59
60 class AdBlockRuleOption(IntEnum):
61 """
62 Class implementing the rule option enum.
63 """
64 NoOption = 0
65 DomainRestrictedOption = 1
66 ThirdPartyOption = 2
67 ObjectOption = 4
68 SubdocumentOption = 8
69 XMLHttpRequestOption = 16
70 ImageOption = 32
71 ScriptOption = 64
72 StyleSheetOption = 128
73 ObjectSubrequestOption = 256
74 PingOption = 512
75 MediaOption = 1024
76 FontOption = 2048
77 OtherOption = 4096
78
79 # Exception only options
80 DocumentOption = 8192
81 ElementHideOption = 16384
82
83
84 class AdBlockRule(object):
85 """
86 Class implementing the AdBlock rule.
87 """
88 def __init__(self, filterRule="", subscription=None):
89 """
90 Constructor
91
92 @param filterRule filter string of the rule
93 @type str
94 @param subscription reference to the subscription object
95 @type AdBlockSubscription
96 """
97 self.__subscription = subscription
98
99 self.__regExp = None
100 self.__stringMatchers = []
101
102 self.__blockedDomains = []
103 self.__allowedDomains = []
104
105 self.__isEnabled = True
106 self.__isException = False
107 self.__isInternalDisabled = False
108 self.__caseSensitivity = Qt.CaseInsensitive
109
110 self.__type = AdBlockRuleType.StringContainsMatchRule
111 self.__options = AdBlockRuleOption.NoOption
112 self.__exceptions = AdBlockRuleOption.NoOption
113
114 self.setFilter(filterRule)
115
116 def subscription(self):
117 """
118 Public method to get the subscription this rule belongs to.
119
120 @return subscription of the rule
121 @rtype AdBlockSubscription
122 """
123 return self.__subscription
124
125 def setSubscription(self, subscription):
126 """
127 Public method to set the subscription this rule belongs to.
128
129 @param subscription subscription of the rule
130 @type AdBlockSubscription
131 """
132 self.__subscription = subscription
133
134 def filter(self):
135 """
136 Public method to get the rule filter string.
137
138 @return rule filter string
139 @rtype str
140 """
141 return self.__filter
142
143 def setFilter(self, filterRule):
144 """
145 Public method to set the rule filter string.
146
147 @param filterRule rule filter string
148 @type str
149 """
150 self.__filter = filterRule
151 self.__parseFilter()
152
153 def __parseFilter(self):
154 """
155 Private method to parse the filter pattern.
156 """
157 parsedLine = self.__filter
158
159 # empty rule or just a comment
160 if not parsedLine.strip() or parsedLine.startswith("!"):
161 self.__isEnabled = False
162 self.__isInternalDisabled = True
163 self.__type = AdBlockRuleType.Invalid
164 return
165
166 # CSS element hiding rule
167 if "##" in parsedLine or "#@#" in parsedLine:
168 self.__type = AdBlockRuleType.CssRule
169 pos = parsedLine.find("#")
170
171 # domain restricted rule
172 if not parsedLine.startswith("##"):
173 domains = parsedLine[:pos]
174 self.__parseDomains(domains, ",")
175
176 self.__isException = parsedLine[pos + 1] == "@"
177 if self.__isException:
178 self.__matchString = parsedLine[pos + 3:]
179 else:
180 self.__matchString = parsedLine[pos + 2:]
181
182 # CSS rule cannot have more options -> stop parsing
183 return
184
185 # Exception always starts with @@
186 if parsedLine.startswith("@@"):
187 self.__isException = True
188 parsedLine = parsedLine[2:]
189
190 # Parse all options following '$' character
191 optionsIndex = parsedLine.find("$")
192 if optionsIndex >= 0:
193 options = [opt
194 for opt in parsedLine[optionsIndex + 1:].split(",")
195 if opt]
196
197 handledOptions = 0
198 for option in options:
199 if option.startswith("domain="):
200 self.__parseDomains(option[7:], "|")
201 handledOptions += 1
202 elif option == "match-case":
203 self.__caseSensitivity = Qt.CaseSensitive
204 handledOptions += 1
205 elif option.endswith("third-party"):
206 self.setOption(AdBlockRuleOption.ThirdPartyOption)
207 self.__setException(AdBlockRuleOption.ThirdPartyOption,
208 option.startswith("~"))
209 handledOptions += 1
210 elif option.endswith("object"):
211 self.setOption(AdBlockRuleOption.ObjectOption)
212 self.__setException(AdBlockRuleOption.ObjectOption,
213 option.startswith("~"))
214 handledOptions += 1
215 elif option.endswith("subdocument"):
216 self.setOption(AdBlockRuleOption.SubdocumentOption)
217 self.__setException(AdBlockRuleOption.SubdocumentOption,
218 option.startswith("~"))
219 handledOptions += 1
220 elif option.endswith("xmlhttprequest"):
221 self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
222 self.__setException(AdBlockRuleOption.XMLHttpRequestOption,
223 option.startswith("~"))
224 handledOptions += 1
225 elif option.endswith("image"):
226 self.setOption(AdBlockRuleOption.ImageOption)
227 self.__setException(AdBlockRuleOption.ImageOption,
228 option.startswith("~"))
229 elif option.endswith("script"):
230 self.setOption(AdBlockRuleOption.ScriptOption)
231 self.__setException(AdBlockRuleOption.ScriptOption,
232 option.startswith("~"))
233 elif option.endswith("stylesheet"):
234 self.setOption(AdBlockRuleOption.StyleSheetOption)
235 self.__setException(AdBlockRuleOption.StyleSheetOption,
236 option.startswith("~"))
237 elif option.endswith("object-subrequest"):
238 self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
239 self.__setException(
240 AdBlockRuleOption.ObjectSubrequestOption,
241 option.startswith("~"))
242 elif option.endswith("ping"):
243 self.setOption(AdBlockRuleOption.PingOption)
244 self.__setException(AdBlockRuleOption.PingOption,
245 option.startswith("~"))
246 elif option.endswith("media"):
247 self.setOption(AdBlockRuleOption.MediaOption)
248 self.__setException(AdBlockRuleOption.MediaOption,
249 option.startswith("~"))
250 elif option.endswith("font"):
251 self.setOption(AdBlockRuleOption.FontOption)
252 self.__setException(AdBlockRuleOption.FontOption,
253 option.startswith("~"))
254 elif option.endswith("other"):
255 self.setOption(AdBlockRuleOption.OtherOption)
256 self.__setException(AdBlockRuleOption.OtherOption,
257 option.startswith("~"))
258 elif option == "document" and self.__isException:
259 self.setOption(AdBlockRuleOption.DocumentOption)
260 handledOptions += 1
261 elif option == "elemhide" and self.__isException:
262 self.setOption(AdBlockRuleOption.ElementHideOption)
263 handledOptions += 1
264 elif option == "collapse":
265 # Hiding placeholders of blocked elements is enabled by
266 # default
267 handledOptions += 1
268
269 # If we don't handle all options, it's safer to just disable
270 # this rule
271 if handledOptions != len(options):
272 self.__isInternalDisabled = True
273 self.__type = AdBlockRuleType.Invalid
274 return
275
276 parsedLine = parsedLine[:optionsIndex]
277
278 # Rule is classic regexp
279 if parsedLine.startswith("/") and parsedLine.endswith("/"):
280 parsedLine = parsedLine[1:-1]
281 self.__type = AdBlockRuleType.RegExpMatchRule
282 self.__regExp = QRegExp(parsedLine, self.__caseSensitivity,
283 QRegExp.RegExp)
284 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
285 return
286
287 # Remove starting / ending wildcards (*)
288 if parsedLine.startswith("*"):
289 parsedLine = parsedLine[1:]
290 if parsedLine.endswith("*"):
291 parsedLine = parsedLine[:-1]
292
293 # Fast string matching for domain here
294 if self.__filterIsOnlyDomain(parsedLine):
295 parsedLine = parsedLine[2:-1]
296 self.__type = AdBlockRuleType.DomainMatchRule
297 self.__matchString = parsedLine
298 return
299
300 # If rule contains '|' only at the end, string matching can be used
301 if self.__filterIsOnlyEndsMatch(parsedLine):
302 parsedLine = parsedLine[:-1]
303 self.__type = AdBlockRuleType.StringEndsMatchRule
304 self.__matchString = parsedLine
305 return
306
307 # If there is still a wildcard (*) or separator (^) or (|),
308 # the rule must be modified to comply with QRegExp.
309 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
310 self.__type = AdBlockRuleType.RegExpMatchRule
311 pattern = self.__convertPatternToRegExp(parsedLine)
312 self.__regExp = QRegExp(pattern, self.__caseSensitivity,
313 QRegExp.RegExp)
314 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
315 return
316
317 # This rule matches all URLs
318 if len(parsedLine) == 0:
319 if self.__options == AdBlockRuleOption.NoOption:
320 self.__isInternalDisabled = True
321 self.__type = AdBlockRuleType.Invalid
322 return
323
324 self.__type = AdBlockRuleType.MatchAllUrlsRule
325 return
326
327 # no regexp required
328 self.__type = AdBlockRuleType.StringContainsMatchRule
329 self.__matchString = parsedLine
330
331 def __parseDomains(self, domains, separator):
332 """
333 Private method to parse a string with a domain list.
334
335 @param domains list of domains
336 @type str
337 @param separator separator character used by the list
338 @type str
339 """
340 domainsList = [d for d in domains.split(separator) if d]
341
342 for domain in domainsList:
343 if not domain:
344 continue
345 if domain.startswith("~"):
346 self.__blockedDomains.append(domain[1:])
347 else:
348 self.__allowedDomains.append(domain)
349
350 if bool(self.__blockedDomains) or bool(self.__allowedDomains):
351 self.setOption(AdBlockRuleOption.DomainRestrictedOption)
352
353 def networkMatch(self, request, domain, encodedUrl):
354 """
355 Public method to check the rule for a match.
356
357 @param request reference to the network request
358 @type QWebEngineUrlRequestInfo
359 @param domain domain name
360 @type str
361 @param encodedUrl string encoded URL to be checked
362 @type str
363 @return flag indicating a match
364 @rtype bool
365 """
366 if self.__type == AdBlockRuleType.CssRule or \
367 not self.__isEnabled or \
368 self.__isInternalDisabled:
369 return False
370
371 matched = self.__stringMatch(domain, encodedUrl)
372
373 if matched:
374 # check domain restrictions
375 if self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and \
376 not self.matchDomain(request.firstPartyUrl().host()):
377 return False
378
379 # check third-party restrictions
380 if self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and \
381 not self.matchThirdParty(request):
382 return False
383
384 # check object restrictions
385 if self.__hasOption(AdBlockRuleOption.ObjectOption) and \
386 not self.matchObject(request):
387 return False
388
389 # check subdocument restrictions
390 if self.__hasOption(AdBlockRuleOption.SubdocumentOption) and \
391 not self.matchSubdocument(request):
392 return False
393
394 # check xmlhttprequest restriction
395 if self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and \
396 not self.matchXmlHttpRequest(request):
397 return False
398
399 # check image restriction
400 if self.__hasOption(AdBlockRuleOption.ImageOption) and \
401 not self.matchImage(request):
402 return False
403
404 # check script restriction
405 if self.__hasOption(AdBlockRuleOption.ScriptOption) and \
406 not self.matchScript(request):
407 return False
408
409 # check stylesheet restriction
410 if self.__hasOption(AdBlockRuleOption.StyleSheetOption) and \
411 not self.matchStyleSheet(request):
412 return False
413
414 # check object-subrequest restriction
415 if self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and \
416 not self.matchObjectSubrequest(request):
417 return False
418
419 # check ping restriction
420 if self.__hasOption(AdBlockRuleOption.PingOption) and \
421 not self.matchPing(request):
422 return False
423
424 # check media restriction
425 if self.__hasOption(AdBlockRuleOption.MediaOption) and \
426 not self.matchMedia(request):
427 return False
428
429 # check font restriction
430 if self.__hasOption(AdBlockRuleOption.FontOption) and \
431 not self.matchFont(request):
432 return False
433
434 return matched
435
436 def urlMatch(self, url):
437 """
438 Public method to check an URL against the rule.
439
440 @param url URL to check
441 @type QUrl
442 @return flag indicating a match
443 @rtype bool
444 """
445 if not self.__hasOption(AdBlockRuleOption.DocumentOption) and \
446 not self.__hasOption(AdBlockRuleOption.ElementHideOption):
447 return False
448
449 encodedUrl = bytes(url.toEncoded()).decode()
450 domain = url.host()
451 return self.__stringMatch(domain, encodedUrl)
452
453 def __stringMatch(self, domain, encodedUrl):
454 """
455 Private method to match a domain string.
456
457 @param domain domain to match
458 @type str
459 @param encodedUrl URL in encoded form
460 @type str
461 @return flag indicating a match
462 @rtype bool
463 """
464 matched = False
465
466 if self.__type == AdBlockRuleType.StringContainsMatchRule:
467 if self.__caseSensitivity == Qt.CaseInsensitive:
468 matched = self.__matchString.lower() in encodedUrl.lower()
469 else:
470 matched = self.__matchString in encodedUrl
471 elif self.__type == AdBlockRuleType.DomainMatchRule:
472 matched = self.__isMatchingDomain(domain, self.__matchString)
473 elif self.__type == AdBlockRuleType.StringEndsMatchRule:
474 if self.__caseSensitivity == Qt.CaseInsensitive:
475 matched = encodedUrl.lower().endswith(
476 self.__matchString.lower())
477 else:
478 matched = encodedUrl.endswith(self.__matchString)
479 elif self.__type == AdBlockRuleType.RegExpMatchRule:
480 if not self.__isMatchingRegExpStrings(encodedUrl):
481 matched = False
482 else:
483 matched = self.__regExp.indexIn(encodedUrl) != -1
484 elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
485 matched = True
486
487 return matched
488
489 def matchDomain(self, domain):
490 """
491 Public method to match a domain.
492
493 @param domain domain name to check
494 @type str
495 @return flag indicating a match
496 @rtype bool
497 """
498 if not self.__isEnabled:
499 return False
500
501 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
502 return True
503
504 if len(self.__blockedDomains) == 0:
505 for dom in self.__allowedDomains:
506 if self.__isMatchingDomain(domain, dom):
507 return True
508 elif len(self.__allowedDomains) == 0:
509 for dom in self.__blockedDomains:
510 if self.__isMatchingDomain(domain, dom):
511 return False
512 return True
513 else:
514 for dom in self.__blockedDomains:
515 if self.__isMatchingDomain(domain, dom):
516 return False
517 for dom in self.__allowedDomains:
518 if self.__isMatchingDomain(domain, dom):
519 return True
520
521 return False
522
523 def matchThirdParty(self, req):
524 """
525 Public method to match a third-party rule.
526
527 @param req request object to check
528 @type QWebEngineUrlRequestInfo
529 @return flag indicating a match
530 @rtype boolean
531 """
532 # Third-party matching should be performed on second-level domains
533 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
534 host = toSecondLevelDomain(req.requestUrl())
535
536 match = firstPartyHost != host
537
538 if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
539 return not match
540 else:
541 return match
542
543 def matchObject(self, req):
544 """
545 Public method to match an object rule.
546
547 @param req request object to check
548 @type QWebEngineUrlRequestInfo
549 @return flag indicating a match
550 @rtype bool
551 """
552 match = (
553 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeObject)
554
555 if self.__hasException(AdBlockRuleOption.ObjectOption):
556 return not match
557 else:
558 return match
559
560 def matchSubdocument(self, req):
561 """
562 Public method to match a sub-document rule.
563
564 @param req request object to check
565 @type QWebEngineUrlRequestInfo
566 @return flag indicating a match
567 @rtype boolean
568 """
569 match = (
570 req.resourceType() ==
571 QWebEngineUrlRequestInfo.ResourceTypeSubFrame)
572
573 if self.__hasException(AdBlockRuleOption.SubdocumentOption):
574 return not match
575 else:
576 return match
577
578 def matchXmlHttpRequest(self, req):
579 """
580 Public method to match a XmlHttpRequest rule.
581
582 @param req request object to check
583 @type QWebEngineUrlRequestInfo
584 @return flag indicating a match
585 @rtype bool
586 """
587 match = (
588 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr)
589
590 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
591 return not match
592 else:
593 return match
594
595 def matchImage(self, req):
596 """
597 Public method to match an Image rule.
598
599 @param req request object to check
600 @type QWebEngineUrlRequestInfo
601 @return flag indicating a match
602 @rtype bool
603 """
604 match = (
605 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeImage)
606
607 if self.__hasException(AdBlockRuleOption.ImageOption):
608 return not match
609 else:
610 return match
611
612 def matchScript(self, req):
613 """
614 Public method to match a Script rule.
615
616 @param req request object to check
617 @type QWebEngineUrlRequestInfo
618 @return flag indicating a match
619 @rtype bool
620 """
621 match = (
622 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeScript)
623
624 if self.__hasException(AdBlockRuleOption.ScriptOption):
625 return not match
626 else:
627 return match
628
629 def matchStyleSheet(self, req):
630 """
631 Public method to match a StyleSheet rule.
632
633 @param req request object to check
634 @type QWebEngineUrlRequestInfo
635 @return flag indicating a match
636 @rtype bool
637 """
638 match = (
639 req.resourceType() ==
640 QWebEngineUrlRequestInfo.ResourceTypeStylesheet)
641
642 if self.__hasException(AdBlockRuleOption.StyleSheetOption):
643 return not match
644 else:
645 return match
646
647 def matchObjectSubrequest(self, req):
648 """
649 Public method to match an Object Subrequest rule.
650
651 @param req request object to check
652 @type QWebEngineUrlRequestInfo
653 @return flag indicating a match
654 @rtype boolean
655 """
656 match = (
657 req.resourceType() ==
658 QWebEngineUrlRequestInfo.ResourceTypeSubResource)
659 if qVersionTuple() >= (5, 7, 0) and PYQT_VERSION >= 0x50700:
660 match = match or (
661 req.resourceType() ==
662 QWebEngineUrlRequestInfo.ResourceTypePluginResource)
663
664 if self.__objectSubrequestException:
665 return not match
666 else:
667 return match
668
669 def matchPing(self, req):
670 """
671 Public method to match a Ping rule.
672
673 @param req request object to check
674 @type QWebEngineUrlRequestInfo
675 @return flag indicating a match
676 @rtype bool
677 """
678 match = (
679 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypePing)
680
681 if self.__hasException(AdBlockRuleOption.PingOption):
682 return not match
683 else:
684 return match
685
686 def matchMedia(self, req):
687 """
688 Public method to match a Media rule.
689
690 @param req request object to check
691 @type QWebEngineUrlRequestInfo
692 @return flag indicating a match
693 @rtype bool
694 """
695 match = (
696 req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeMedia)
697
698 if self.__hasException(AdBlockRuleOption.MediaOption):
699 return not match
700 else:
701 return match
702
703 def matchFont(self, req):
704 """
705 Public method to match a Font rule.
706
707 @param req request object to check
708 @type QWebEngineUrlRequestInfo
709 @return flag indicating a match
710 @rtype bool
711 """
712 match = (
713 req.resourceType() ==
714 QWebEngineUrlRequestInfo.ResourceTypeFontResource)
715
716 if self.__hasException(AdBlockRuleOption.FontOption):
717 return not match
718 else:
719 return match
720
721 def matchOther(self, req):
722 """
723 Public method to match any other rule.
724
725 @param req request object to check
726 @type QWebEngineUrlRequestInfo
727 @return flag indicating a match
728 @rtype bool
729 """
730 match = req.resourceType() in [
731 QWebEngineUrlRequestInfo.ResourceTypeSubResource,
732 QWebEngineUrlRequestInfo.ResourceTypeWorker,
733 QWebEngineUrlRequestInfo.ResourceTypeSharedWorker,
734 QWebEngineUrlRequestInfo.ResourceTypeServiceWorker,
735 QWebEngineUrlRequestInfo.ResourceTypePrefetch,
736 QWebEngineUrlRequestInfo.ResourceTypeFavicon,
737 QWebEngineUrlRequestInfo.ResourceTypeUnknown,
738 ]
739
740 if self.__hasException(AdBlockRuleOption.OtherOption):
741 return not match
742 else:
743 return match
744
745 def isException(self):
746 """
747 Public method to check, if the rule defines an exception.
748
749 @return flag indicating an exception
750 @rtype bool
751 """
752 return self.__isException
753
754 def setException(self, exception):
755 """
756 Public method to set the rule's exception flag.
757
758 @param exception flag indicating an exception rule
759 @type bool
760 """
761 self.__isException = exception
762
763 def isEnabled(self):
764 """
765 Public method to check, if the rule is enabled.
766
767 @return flag indicating enabled state
768 @rtype bool
769 """
770 return self.__isEnabled
771
772 def setEnabled(self, enabled):
773 """
774 Public method to set the rule's enabled state.
775
776 @param enabled flag indicating the new enabled state
777 @type bool
778 """
779 self.__isEnabled = enabled
780
781 def isCSSRule(self):
782 """
783 Public method to check, if the rule is a CSS rule.
784
785 @return flag indicating a CSS rule
786 @rtype bool
787 """
788 return self.__type == AdBlockRuleType.CssRule
789
790 def cssSelector(self):
791 """
792 Public method to get the CSS selector of the rule.
793
794 @return CSS selector
795 @rtype str
796 """
797 return self.__matchString
798
799 def isDocument(self):
800 """
801 Public method to check, if this is a document rule.
802
803 @return flag indicating a document rule
804 @rtype bool
805 """
806 return self.__hasOption(AdBlockRuleOption.DocumentOption)
807
808 def isElementHiding(self):
809 """
810 Public method to check, if this is an element hiding rule.
811
812 @return flag indicating an element hiding rule
813 @rtype bool
814 """
815 return self.__hasOption(AdBlockRuleOption.ElementHideOption)
816
817 def isDomainRestricted(self):
818 """
819 Public method to check, if this rule is restricted by domain.
820
821 @return flag indicating a domain restriction
822 @rtype bool
823 """
824 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
825
826 def isComment(self):
827 """
828 Public method to check, if this is a comment.
829
830 @return flag indicating a comment
831 @rtype bool
832 """
833 return self.__filter.startswith("!")
834
835 def isHeader(self):
836 """
837 Public method to check, if this is a header.
838
839 @return flag indicating a header
840 @rtype bool
841 """
842 return self.__filter.startswith("[Adblock")
843
844 def isSlow(self):
845 """
846 Public method to check, if this is a slow rule.
847
848 @return flag indicating a slow rule
849 @rtype bool
850 """
851 return self.__regExp is not None
852
853 def isInternalDisabled(self):
854 """
855 Public method to check, if this rule was disabled internally.
856
857 @return flag indicating an internally disabled rule
858 @rtype bool
859 """
860 return self.__isInternalDisabled
861
862 def __convertPatternToRegExp(self, wildcardPattern):
863 """
864 Private method to convert a wildcard pattern to a regular expression.
865
866 @param wildcardPattern string containing the wildcard pattern
867 @type str
868 @return string containing a regular expression
869 @rtype string
870 """
871 pattern = wildcardPattern
872
873 # remove multiple wildcards
874 pattern = re.sub(r"\*+", "*", pattern)
875 # remove anchors following separator placeholder
876 pattern = re.sub(r"\^\|$", "^", pattern)
877 # remove leading wildcards
878 pattern = re.sub(r"^(\*)", "", pattern)
879 # remove trailing wildcards
880 pattern = re.sub(r"(\*)$", "", pattern)
881 # escape special symbols
882 pattern = re.sub(r"(\W)", r"\\\1", pattern)
883 # process extended anchor at expression start
884 pattern = re.sub(
885 r"^\\\|\\\|",
886 r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
887 # process separator placeholders
888 pattern = re.sub(r"\\\^", r"(?:[^\w\d\-.%]|$)", pattern)
889 # process anchor at expression start
890 pattern = re.sub(r"^\\\|", "^", pattern)
891 # process anchor at expression end
892 pattern = re.sub(r"\\\|$", "$", pattern)
893 # replace wildcards by .*
894 pattern = re.sub(r"\\\*", ".*", pattern)
895
896 return pattern
897
898 def __hasOption(self, opt):
899 """
900 Private method to check, if the given option has been set.
901
902 @param opt option to check for
903 @type AdBlockRuleOption
904 @return flag indicating the state of the option
905 @rtype bool
906 """
907 return bool(self.__options & opt)
908
909 def setOption(self, opt):
910 """
911 Public method to set the given option.
912
913 @param opt option to be set
914 @type AdBlockRuleOption
915 """
916 self.__options |= opt
917
918 def __hasException(self, opt):
919 """
920 Private method to check, if the given option has been set as an
921 exception.
922
923 @param opt option to check for
924 @type AdBlockRuleOption
925 @return flag indicating the exception state of the option
926 @rtype bool
927 """
928 return bool(self.__exceptions & opt)
929
930 def __setException(self, opt, on):
931 """
932 Private method to set the given option as an exception.
933
934 @param opt option to be set
935 @type AdBlockRuleOption
936 @param on flag indicating to set or unset the exception
937 @type bool
938 """
939 if on:
940 self.__exceptions |= opt
941 else:
942 self.__exceptions &= ~opt
943
944 def __filterIsOnlyDomain(self, filterString):
945 """
946 Private method to check, if the given filter is a domain only filter.
947
948 @param filterString filter string to be checked
949 @type str
950 @return flag indicating a domain only filter
951 @rtype bool
952 """
953 if not filterString.endswith("^") or not filterString.startswith("||"):
954 return False
955
956 for filterChar in filterString:
957 if filterChar in ["/", ":", "?", "=", "&", "*"]:
958 return False
959
960 return True
961
962 def __filterIsOnlyEndsMatch(self, filterString):
963 """
964 Private method to check, if the given filter is to match against the
965 end of a string.
966
967 @param filterString filter string to be checked
968 @type str
969 @return flag indicating a end of string match filter
970 @rtype bool
971 """
972 index = 0
973 for filterChar in filterString:
974 if filterChar in ["^", "*"]:
975 return False
976 elif filterChar == "|":
977 return bool(index == len(filterString) - 1)
978 index += 1
979
980 return False
981
982 def __isMatchingDomain(self, domain, filterString):
983 """
984 Private method to check, if a given domain matches the given filter
985 string.
986
987 @param domain domain to be checked
988 @type str
989 @param filterString filter string to check against
990 @type str
991 @return flag indicating a match
992 @rtype bool
993 """
994 if filterString == domain:
995 return True
996
997 if not domain.endswith(filterString):
998 return False
999
1000 index = domain.find(filterString)
1001
1002 return bool(index > 0 and domain[index - 1] == ".")
1003
1004 def __isMatchingRegExpStrings(self, url):
1005 """
1006 Private method to check the given URL against the fixed parts of
1007 the regexp.
1008
1009 @param url URL to be checked
1010 @type str
1011 @return flag indicating a match
1012 @rtype bool
1013 """
1014 assert self.__regExp is not None
1015
1016 for matcher in self.__stringMatchers:
1017 if matcher not in url:
1018 return False
1019
1020 return True
1021
1022 def __parseRegExpFilter(self, filterString):
1023 """
1024 Private method to split the given regular expression into strings that
1025 can be used with 'in'.
1026
1027 @param filterString regexp filter string to be parsed
1028 @type str
1029 @return fixed string parts of the filter
1030 @rtype list of str
1031 """
1032 matchers = []
1033
1034 startPos = -1
1035 for index in range(len(filterString)):
1036 filterChar = filterString[index]
1037 if filterChar in ["|", "*", "^"]:
1038 sub = filterString[startPos:index]
1039 if len(sub) > 1:
1040 matchers.append(sub)
1041 startPos = index + 1
1042
1043 sub = filterString[startPos:]
1044 if len(sub) > 1:
1045 matchers.append(sub)
1046
1047 return list(set(matchers))
1048
1049 def ruleType(self):
1050 """
1051 Public method to get the rule type.
1052
1053 @return rule type
1054 @rtype AdBlockRuleType
1055 """
1056 return self.__type
1057
1058 def ruleOptions(self):
1059 """
1060 Public method to get the rule options.
1061
1062 @return rule options
1063 @rtype AdBlockRuleOption
1064 """
1065 return self.__options
1066
1067 def ruleExceptions(self):
1068 """
1069 Public method to get the rule exceptions.
1070
1071 @return rule exceptions
1072 @rtype AdBlockRuleOption
1073 """
1074 return self.__exceptions
1075
1076 def matchString(self):
1077 """
1078 Public method to get the match string.
1079
1080 @return match string
1081 @rtype str
1082 """
1083 return self.__matchString
1084
1085 def caseSensitivity(self):
1086 """
1087 Public method to get the case sensitivity.
1088
1089 @return case sensitivity
1090 @rtype Qt.CaseSensitivity
1091 """
1092 return self.__caseSensitivity
1093
1094 def allowedDomains(self):
1095 """
1096 Public method to get a copy of the list of allowed domains.
1097
1098 @return list of allowed domains
1099 @rtype list of str
1100 """
1101 return self.__allowedDomains[:]
1102
1103 def blockedDomains(self):
1104 """
1105 Public method to get a copy of the list of blocked domains.
1106
1107 @return list of blocked domains
1108 @rtype list of str
1109 """
1110 return self.__blockedDomains[:]
1111
1112 def addBlockedDomains(self, domains):
1113 """
1114 Public method to add to the list of blocked domains.
1115
1116 @param domains list of domains to be added
1117 @type str or list of str
1118 """
1119 if isinstance(domains, list):
1120 self.__blockedDomains.extend(domains)
1121 else:
1122 self.__blockedDomains.append(domains)
1123
1124 def getRegExpAndMatchers(self):
1125 """
1126 Public method to get the regular expression and associated string
1127 matchers.
1128
1129 @return tuple containing the regular expression and the list of
1130 string matchers
1131 @rtype tuple of (QRegExp, list of str)
1132 """
1133 if self.__regExp is not None:
1134 return (QRegExp(self.__regExp), self.__stringMatchers[:])
1135 else:
1136 return (None, [])
1137
1138 def copyFrom(self, other):
1139 """
1140 Public method to copy another AdBlock rule.
1141
1142 @param other reference to the AdBlock rule to copy from
1143 @type AdBlockRule
1144 """
1145 self.__subscription = other.subscription()
1146 self.__type = other.ruleType()
1147 self.__options = other.ruleOptions()
1148 self.__exceptions = other.ruleExceptions()
1149 self.__filter = other.filter()
1150 self.__matchString = other.matchString()
1151 self.__caseSensitivity = other.caseSensitivity()
1152 self.__isEnabled = other.isEnabled()
1153 self.__isException = other.isException()
1154 self.__isInternalDisabled = other.isInternalDisabled()
1155 self.__allowedDomains = other.allowedDomains()
1156 self.__blockedDomains = other.blockedDomains()
1157 self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()

eric ide

mercurial