WebBrowser/AdBlock/AdBlockRule.py

changeset 6028
859f6894eed9
parent 5921
d4797da58218
child 6048
82ad8ec9548c
diff -r d056a536670e -r 859f6894eed9 WebBrowser/AdBlock/AdBlockRule.py
--- a/WebBrowser/AdBlock/AdBlockRule.py	Thu Dec 14 19:25:34 2017 +0100
+++ b/WebBrowser/AdBlock/AdBlockRule.py	Mon Dec 18 18:09:39 2017 +0100
@@ -11,6 +11,8 @@
 
 import re
 
+from enum import IntFlag
+
 from PyQt5.QtCore import PYQT_VERSION, Qt, QRegExp
 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo
 
@@ -21,8 +23,10 @@
     """
     Module function to get a second level domain from the given URL.
     
-    @param url URL to extract domain from (QUrl)
-    @return name of second level domain (string)
+    @param url URL to extract domain from
+    @type QUrl
+    @return name of second level domain
+    @rtype str
     """
     topLevelDomain = url.topLevelDomain()
     urlHost = url.host()
@@ -40,6 +44,43 @@
     return domain + topLevelDomain
 
 
+class AdBlockRuleType(IntFlag):
+    """
+    Class implementing the rule type enum.
+    """
+    CssRule = 0
+    DomainMatchRule = 1
+    RegExpMatchRule = 2
+    StringEndsMatchRule = 3
+    StringContainsMatchRule = 4
+    MatchAllUrlsRule = 5
+    Invalid = 6
+
+
+class AdBlockRuleOption(IntFlag):
+    """
+    Class implementing the rule option enum.
+    """
+    NoOption = 0
+    DomainRestrictedOption = 1
+    ThirdPartyOption = 2
+    ObjectOption = 4
+    SubdocumentOption = 8
+    XMLHttpRequestOption = 16
+    ImageOption = 32
+    ScriptOption = 64
+    StyleSheetOption = 128
+    ObjectSubrequestOption = 256
+    PingOption = 512
+    MediaOption = 1024
+    FontOption = 2048
+    OtherOption = 4096
+
+    # Exception only options
+    DocumentOption = 8192
+    ElementHideOption = 16384
+
+
 class AdBlockRule(object):
     """
     Class implementing the AdBlock rule.
@@ -48,45 +89,27 @@
         """
         Constructor
         
-        @param filterRule filter string of the rule (string)
+        @param filterRule filter string of the rule
+        @type str
         @param subscription reference to the subscription object
-            (AdBlockSubscription)
+        @type AdBlockSubscription
         """
         self.__subscription = subscription
         
-        self.__regExp = QRegExp()
-        self.__options = []
+        self.__regExp = None
+        self.__stringMatchers = []
+        
         self.__blockedDomains = []
         self.__allowedDomains = []
         
-        self.__enabled = True
-        self.__cssRule = False
-        self.__exception = False
-        self.__internalDisabled = False
-        self.__domainRestricted = False
-        self.__useRegExp = False
-        self.__useDomainMatch = False
-        self.__useEndsMatch = False
-        self.__thirdParty = False
-        self.__thirdPartyException = False
-        self.__object = False
-        self.__objectException = False
-        self.__subdocument = False
-        self.__subdocumentException = False
-        self.__xmlhttprequest = False
-        self.__xmlhttprequestException = False
-        self.__document = False
-        self.__elemhide = False
+        self.__isEnabled = True
+        self.__isException = False
+        self.__isInternalDisabled = False
         self.__caseSensitivity = Qt.CaseInsensitive
-        self.__image = False
-        self.__imageException = False
-        self.__script = False
-        self.__scriptException = False
-        self.__stylesheet = False
-        self.__stylesheetException = False
-        self.__objectSubrequest = False
-        self.__objectSubrequestException = False
-        self.__stringMatchRule = False
+        
+        self.__type = AdBlockRuleType.StringContainsMatchRule
+        self.__options = AdBlockRuleOption.NoOption
+        self.__exceptions = AdBlockRuleOption.NoOption
         
         self.setFilter(filterRule)
     
@@ -94,15 +117,26 @@
         """
         Public method to get the subscription this rule belongs to.
         
-        @return subscription of the rule (AdBlockSubscription)
+        @return subscription of the rule
+        @rtype AdBlockSubscription
         """
         return self.__subscription
     
+    def setSubscription(self, subscription):
+        """
+        Public method to set the subscription this rule belongs to.
+        
+        @param subscription subscription of the rule
+        @type AdBlockSubscription
+        """
+        self.__subscription = subscription
+    
     def filter(self):
         """
         Public method to get the rule filter string.
         
-        @return rule filter string (string)
+        @return rule filter string
+        @rtype str
         """
         return self.__filter
     
@@ -110,7 +144,8 @@
         """
         Public method to set the rule filter string.
         
-        @param filterRule rule filter string (string)
+        @param filterRule rule filter string
+        @type str
         """
         self.__filter = filterRule
         self.__parseFilter()
@@ -122,13 +157,15 @@
         parsedLine = self.__filter
         
         # empty rule or just a comment
-        if not parsedLine.strip() or parsedLine.startswith(("!", "[Adblock")):
-            self.__enabled = False
+        if not parsedLine.strip() or parsedLine.startswith("!"):
+            self.__isEnabled = False
+            self.__isInternalDisabled = True
+            self.__type = AdBlockRuleType.Invalid
             return
         
         # CSS element hiding rule
         if "##" in parsedLine or "#@#" in parsedLine:
-            self.__cssRule = True
+            self.__type = AdBlockRuleType.CssRule
             pos = parsedLine.find("#")
             
             # domain restricted rule
@@ -136,24 +173,26 @@
                 domains = parsedLine[:pos]
                 self.__parseDomains(domains, ",")
             
-            self.__exception = parsedLine[pos + 1] == "@"
+            self.__isException = parsedLine[pos + 1] == "@"
+            if self.__isException:
+                self.__matchString = parsedLine[pos + 3:]
+            else:
+                self.__matchString = parsedLine[pos + 2:]
             
-            if self.__exception:
-                self.__cssSelector = parsedLine[pos + 3:]
-            else:
-                self.__cssSelector = parsedLine[pos + 2:]
             # CSS rule cannot have more options -> stop parsing
             return
         
         # Exception always starts with @@
         if parsedLine.startswith("@@"):
-            self.__exception = True
+            self.__isException = True
             parsedLine = parsedLine[2:]
         
         # Parse all options following '$' character
         optionsIndex = parsedLine.find("$")
         if optionsIndex >= 0:
-            options = parsedLine[optionsIndex + 1:].split(",")
+            options = [opt
+                       for opt in parsedLine[optionsIndex + 1:].split(",")
+                       if opt]
             
             handledOptions = 0
             for option in options:
@@ -164,47 +203,74 @@
                     self.__caseSensitivity = Qt.CaseSensitive
                     handledOptions += 1
                 elif option.endswith("third-party"):
-                    self.__thirdParty = True
-                    self.__thirdPartyException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.ThirdPartyOption)
+                    self.__setException(AdBlockRuleOption.ThirdPartyOption,
+                                        option.startswith("~"))
                     handledOptions += 1
                 elif option.endswith("object"):
-                    self.__object = True
-                    self.__objectException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.ObjectOption)
+                    self.__setException(AdBlockRuleOption.ObjectOption,
+                                        option.startswith("~"))
                     handledOptions += 1
                 elif option.endswith("subdocument"):
-                    self.__subdocument = True
-                    self.__subdocumentException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.SubdocumentOption)
+                    self.__setException(AdBlockRuleOption.SubdocumentOption,
+                                        option.startswith("~"))
                     handledOptions += 1
                 elif option.endswith("xmlhttprequest"):
-                    self.__xmlhttprequest = True
-                    self.__xmlhttprequestException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
+                    self.__setException(AdBlockRuleOption.XMLHttpRequestOption,
+                                        option.startswith("~"))
                     handledOptions += 1
                 elif option.endswith("image"):
-                    self.__image = True
-                    self.__imageException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.ImageOption)
+                    self.__setException(AdBlockRuleOption.ImageOption,
+                                        option.startswith("~"))
                 elif option.endswith("script"):
-                    self.__script = True
-                    self.__scriptException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.ScriptOption)
+                    self.__setException(AdBlockRuleOption.ScriptOption,
+                                        option.startswith("~"))
                 elif option.endswith("stylesheet"):
-                    self.__stylesheet = True
-                    self.__stylesheetException = option.startswith("~")
+                    self.setOption(AdBlockRuleOption.StyleSheetOption)
+                    self.__setException(AdBlockRuleOption.StyleSheetOption,
+                                        option.startswith("~"))
                 elif option.endswith("object-subrequest"):
-                    self.__objectSubrequest = True
-                    self.__objectSubrequestException = option.startswith("~")
-                elif option == "document" and self.__exception:
-                    self.__document = True
+                    self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
+                    self.__setException(
+                        AdBlockRuleOption.ObjectSubrequestOption,
+                        option.startswith("~"))
+                elif option.endswith("ping"):
+                    self.setOption(AdBlockRuleOption.PingOption)
+                    self.__setException(AdBlockRuleOption.PingOption,
+                                        option.startswith("~"))
+                elif option.endswith("media"):
+                    self.setOption(AdBlockRuleOption.MediaOption)
+                    self.__setException(AdBlockRuleOption.MediaOption,
+                                        option.startswith("~"))
+                elif option.endswith("font"):
+                    self.setOption(AdBlockRuleOption.FontOption)
+                    self.__setException(AdBlockRuleOption.FontOption,
+                                        option.startswith("~"))
+                elif option.endswith("other"):
+                    self.setOption(AdBlockRuleOption.OtherOption)
+                    self.__setException(AdBlockRuleOption.OtherOption,
+                                        option.startswith("~"))
+                elif option == "document" and self.__isException:
+                    self.setOption(AdBlockRuleOption.DocumentOption)
                     handledOptions += 1
-                elif option == "elemhide" and self.__exception:
-                    self.__elemhide = True
+                elif option == "elemhide" and self.__isException:
+                    self.setOption(AdBlockRuleOption.ElementHideOption)
                     handledOptions += 1
                 elif option == "collapse":
-                    # Hiding placeholders of blocked elements
+                    # Hiding placeholders of blocked elements is enabled by
+                    # default
                     handledOptions += 1
             
             # If we don't handle all options, it's safer to just disable
             # this rule
             if handledOptions != len(options):
-                self.__internalDisabled = True
+                self.__isInternalDisabled = True
+                self.__type = AdBlockRuleType.Invalid
                 return
             
             parsedLine = parsedLine[:optionsIndex]
@@ -212,57 +278,66 @@
         # Rule is classic regexp
         if parsedLine.startswith("/") and parsedLine.endswith("/"):
             parsedLine = parsedLine[1:-1]
-            self.__useRegExp = True
+            self.__type = AdBlockRuleType.RegExpMatchRule
             self.__regExp = QRegExp(parsedLine, self.__caseSensitivity,
                                     QRegExp.RegExp)
+            self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
             return
         
-        # Remove starting / ending wildcards
+        # Remove starting / ending wildcards (*)
         if parsedLine.startswith("*"):
             parsedLine = parsedLine[1:]
         if parsedLine.endswith("*"):
             parsedLine = parsedLine[:-1]
         
-        # Fast string matching for domain can be used
-        if parsedLine.startswith("||") and \
-           parsedLine.endswith("^") and \
-           QRegExp("[/:?=&\\*]").indexIn(parsedLine) == -1:
+        # Fast string matching for domain here
+        if self.__filterIsOnlyDomain(parsedLine):
             parsedLine = parsedLine[2:-1]
-            self.__useDomainMatch = True
+            self.__type = AdBlockRuleType.DomainMatchRule
             self.__matchString = parsedLine
             return
         
         # If rule contains '|' only at the end, string matching can be used
-        if parsedLine.endswith("|") and \
-           QRegExp("[\\^\\*]").indexIn(parsedLine) == -1 and \
-           parsedLine.count("|") == 1:
+        if self.__filterIsOnlyEndsMatch(parsedLine):
             parsedLine = parsedLine[:-1]
-            self.__useEndsMatch = True
+            self.__type = AdBlockRuleType.StringEndsMatchRule
             self.__matchString = parsedLine
             return
         
         # If there is still a wildcard (*) or separator (^) or (|),
         # the rule must be modified to comply with QRegExp.
         if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
+            self.__type = AdBlockRuleType.RegExpMatchRule
             pattern = self.__convertPatternToRegExp(parsedLine)
-            self.__useRegExp = True
             self.__regExp = QRegExp(pattern, self.__caseSensitivity,
                                     QRegExp.RegExp)
+            self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
+            return
+        
+        # This rule matches all URLs
+        if len(parsedLine) == 0:
+            if self.__options == AdBlockRuleOption.NoOption:
+                self.__isInternalDisabled = True
+                self.__type = AdBlockRuleType.Invalid
+                return
+            
+            self.__type = AdBlockRuleType.MatchAllUrlsRule
             return
         
         # no regexp required
-        self.__useRegExp = False
+        self.__type = AdBlockRuleType.StringContainsMatchRule
         self.__matchString = parsedLine
-        self.__stringMatchRule = True
     
     def __parseDomains(self, domains, separator):
         """
         Private method to parse a string with a domain list.
         
-        @param domains list of domains (string)
-        @param separator separator character used by the list (string)
+        @param domains list of domains
+        @type str
+        @param separator separator character used by the list
+        @type str
         """
-        domainsList = domains.split(separator)
+        domainsList = [d for d in domains.split(separator) if d]
         
         for domain in domainsList:
             if not domain:
@@ -272,8 +347,8 @@
             else:
                 self.__allowedDomains.append(domain)
         
-        self.__domainRestricted = \
-            bool(self.__blockedDomains) or bool(self.__allowedDomains)
+        if bool(self.__blockedDomains) or bool(self.__allowedDomains):
+            self.setOption(AdBlockRuleOption.DomainRestrictedOption)
     
     def networkMatch(self, request, domain, encodedUrl):
         """
@@ -288,48 +363,72 @@
         @return flag indicating a match
         @rtype bool
         """
-        if self.__cssRule or not self.__enabled or self.__internalDisabled:
+        if self.__type == AdBlockRuleType.CssRule or \
+           not self.__isEnabled or \
+           self.__isInternalDisabled:
             return False
         
         matched = self.__stringMatch(domain, encodedUrl)
         
         if matched:
             # check domain restrictions
-            if self.__domainRestricted and \
-                    not self.matchDomain(request.firstPartyUrl().host()):
+            if self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and \
+               not self.matchDomain(request.firstPartyUrl().host()):
                 return False
             
             # check third-party restrictions
-            if self.__thirdParty and not self.matchThirdParty(request):
+            if self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and \
+               not self.matchThirdParty(request):
                 return False
             
             # check object restrictions
-            if self.__object and not self.matchObject(request):
+            if self.__hasOption(AdBlockRuleOption.ObjectOption) and \
+               not self.matchObject(request):
                 return False
             
             # check subdocument restrictions
-            if self.__subdocument and not self.matchSubdocument(request):
+            if self.__hasOption(AdBlockRuleOption.SubdocumentOption) and \
+               not self.matchSubdocument(request):
                 return False
             
             # check xmlhttprequest restriction
-            if self.__xmlhttprequest and not self.matchXmlHttpRequest(request):
+            if self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and \
+               not self.matchXmlHttpRequest(request):
                 return False
             
             # check image restriction
-            if self.__image and not self.matchImage(request):
+            if self.__hasOption(AdBlockRuleOption.ImageOption) and \
+               not self.matchImage(request):
                 return False
             
             # check script restriction
-            if self.__script and not self.matchScript(request):
+            if self.__hasOption(AdBlockRuleOption.ScriptOption) and \
+               not self.matchScript(request):
                 return False
             
             # check stylesheet restriction
-            if self.__stylesheet and not self.matchStyleSheet(request):
+            if self.__hasOption(AdBlockRuleOption.StyleSheetOption) and \
+               not self.matchStyleSheet(request):
                 return False
             
             # check object-subrequest restriction
-            if self.__objectSubrequest and \
-                    not self.matchObjectSubrequest(request):
+            if self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and \
+               not self.matchObjectSubrequest(request):
+                return False
+            
+            # check ping restriction
+            if self.__hasOption(AdBlockRuleOption.PingOption) and \
+               not self.matchPing(request):
+                return False
+            
+            # check media restriction
+            if self.__hasOption(AdBlockRuleOption.MediaOption) and \
+               not self.matchMedia(request):
+                return False
+            
+            # check font restriction
+            if self.__hasOption(AdBlockRuleOption.FontOption) and \
+               not self.matchFont(request):
                 return False
         
         return matched
@@ -338,10 +437,13 @@
         """
         Public method to check an URL against the rule.
         
-        @param url URL to check (QUrl)
-        @return flag indicating a match (boolean)
+        @param url URL to check
+        @type QUrl
+        @return flag indicating a match
+        @rtype bool
         """
-        if not self.__document and not self.__elemhide:
+        if not self.__hasOption(AdBlockRuleOption.DocumentOption) and \
+           not self.__hasOption(AdBlockRuleOption.ElementHideOption):
             return False
         
         encodedUrl = bytes(url.toEncoded()).decode()
@@ -359,26 +461,28 @@
         @return flag indicating a match
         @rtype bool
         """
-        if self.__cssRule or not self.__enabled or self.__internalDisabled:
-            return False
-        
         matched = False
         
-        if self.__useRegExp:
-            matched = self.__regExp.indexIn(encodedUrl) != -1
-        elif self.__useDomainMatch:
-            matched = domain.endswith(self.__matchString)
-        elif self.__useEndsMatch:
+        if self.__type == AdBlockRuleType.StringContainsMatchRule:
+            if self.__caseSensitivity == Qt.CaseInsensitive:
+                matched = self.__matchString.lower() in encodedUrl.lower()
+            else:
+                matched = self.__matchString in encodedUrl
+        elif self.__type == AdBlockRuleType.DomainMatchRule:
+            matched = self.__isMatchingDomain(domain, self.__matchString)
+        elif self.__type == AdBlockRuleType.StringEndsMatchRule:
             if self.__caseSensitivity == Qt.CaseInsensitive:
                 matched = encodedUrl.lower().endswith(
                     self.__matchString.lower())
             else:
                 matched = encodedUrl.endswith(self.__matchString)
-        else:
-            if self.__caseSensitivity == Qt.CaseInsensitive:
-                matched = self.__matchString.lower() in encodedUrl.lower()
+        elif self.__type == AdBlockRuleType.RegExpMatchRule:
+            if not self.__isMatchingRegExpStrings(encodedUrl):
+                matched = False
             else:
-                matched = self.__matchString in encodedUrl
+                matched = self.__regExp.indexIn(encodedUrl) != -1
+        elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
+            matched = True
         
         return matched
     
@@ -386,40 +490,44 @@
         """
         Public method to match a domain.
         
-        @param domain domain name to check (string)
-        @return flag indicating a match (boolean)
+        @param domain domain name to check
+        @type str
+        @return flag indicating a match
+        @rtype bool
         """
-        if not self.__enabled:
+        if not self.__isEnabled:
             return False
         
-        if not self.__domainRestricted:
+        if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
             return True
         
         if len(self.__blockedDomains) == 0:
             for dom in self.__allowedDomains:
-                if domain.endswith(dom):
+                if self.__isMatchingDomain(domain, dom):
                     return True
         elif len(self.__allowedDomains) == 0:
             for dom in self.__blockedDomains:
-                if domain.endswith(dom):
+                if self.__isMatchingDomain(domain, dom):
                     return False
             return True
         else:
             for dom in self.__blockedDomains:
-                if domain.endswith(dom):
+                if self.__isMatchingDomain(domain, dom):
                     return False
             for dom in self.__allowedDomains:
-                if domain.endswith(dom):
+                if self.__isMatchingDomain(domain, dom):
                     return True
         
         return False
     
     def matchThirdParty(self, req):
         """
-        Public slot to match a third-party rule.
+        Public method to match a third-party rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype boolean
         """
         # Third-party matching should be performed on second-level domains
         firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
@@ -427,109 +535,123 @@
         
         match = firstPartyHost != host
         
-        if self.__thirdPartyException:
+        if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
             return not match
         else:
             return match
     
     def matchObject(self, req):
         """
-        Public slot to match an object rule.
+        Public method to match an object rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
         """
         match = (
             req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeObject)
         
-        if self.__objectException:
+        if self.__hasException(AdBlockRuleOption.ObjectOption):
             return not match
         else:
             return match
     
     def matchSubdocument(self, req):
         """
-        Public slot to match a sub-document rule.
+        Public method to match a sub-document rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype boolean
         """
         match = (
             req.resourceType() ==
             QWebEngineUrlRequestInfo.ResourceTypeSubFrame)
         
-        if self.__subdocumentException:
+        if self.__hasException(AdBlockRuleOption.SubdocumentOption):
             return not match
         else:
             return match
     
     def matchXmlHttpRequest(self, req):
         """
-        Public slot to match a XmlHttpRequest rule.
+        Public method to match a XmlHttpRequest rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
         """
         match = (
             req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr)
         
-        if self.__xmlhttprequestException:
+        if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
             return not match
         else:
             return match
     
     def matchImage(self, req):
         """
-        Public slot to match an Image rule.
+        Public method to match an Image rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
         """
         match = (
             req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeImage)
         
-        if self.__imageException:
+        if self.__hasException(AdBlockRuleOption.ImageOption):
             return not match
         else:
             return match
     
     def matchScript(self, req):
         """
-        Public slot to match a Script rule.
+        Public method to match a Script rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
         """
         match = (
             req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeScript)
         
-        if self.__scriptException:
+        if self.__hasException(AdBlockRuleOption.ScriptOption):
             return not match
         else:
             return match
     
     def matchStyleSheet(self, req):
         """
-        Public slot to match a StyleSheet rule.
+        Public method to match a StyleSheet rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
         """
         match = (
             req.resourceType() ==
             QWebEngineUrlRequestInfo.ResourceTypeStylesheet)
         
-        if self.__stylesheetException:
+        if self.__hasException(AdBlockRuleOption.StyleSheetOption):
             return not match
         else:
             return match
     
     def matchObjectSubrequest(self, req):
         """
-        Public slot to match an Object Subrequest rule.
+        Public method to match an Object Subrequest rule.
         
-        @param req request object to check (QWebEngineUrlRequestInfo)
-        @return flag indicating a match (boolean)
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype boolean
         """
         match = (
             req.resourceType() ==
@@ -544,87 +666,169 @@
         else:
             return match
     
+    def matchPing(self, req):
+        """
+        Public method to match a Ping rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypePing)
+        
+        if self.__hasException(AdBlockRuleOption.PingOption):
+            return not match
+        else:
+            return match
+    
+    def matchMedia(self, req):
+        """
+        Public method to match a Media rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeMedia)
+        
+        if self.__hasException(AdBlockRuleOption.MediaOption):
+            return not match
+        else:
+            return match
+    
+    def matchFont(self, req):
+        """
+        Public method to match a Font rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() ==
+            QWebEngineUrlRequestInfo.ResourceTypeFontResource)
+        
+        if self.__hasException(AdBlockRuleOption.FontOption):
+            return not match
+        else:
+            return match
+    
+    def matchOther(self, req):
+        """
+        Public method to match any other rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = req.resourceType() in [
+            QWebEngineUrlRequestInfo.ResourceTypeSubResource,
+            QWebEngineUrlRequestInfo.ResourceTypeWorker,
+            QWebEngineUrlRequestInfo.ResourceTypeSharedWorker,
+            QWebEngineUrlRequestInfo.ResourceTypeServiceWorker,
+            QWebEngineUrlRequestInfo.ResourceTypePrefetch,
+            QWebEngineUrlRequestInfo.ResourceTypeFavicon,
+            QWebEngineUrlRequestInfo.ResourceTypeUnknown,
+        ]
+        
+        if self.__hasException(AdBlockRuleOption.OtherOption):
+            return not match
+        else:
+            return match
+    
     def isException(self):
         """
         Public method to check, if the rule defines an exception.
         
-        @return flag indicating an exception (boolean)
+        @return flag indicating an exception
+        @rtype bool
         """
-        return self.__exception
+        return self.__isException
     
     def setException(self, exception):
         """
         Public method to set the rule's exception flag.
         
-        @param exception flag indicating an exception rule (boolean)
+        @param exception flag indicating an exception rule
+        @type bool
         """
-        self.__exception = exception
+        self.__isException = exception
     
     def isEnabled(self):
         """
         Public method to check, if the rule is enabled.
         
-        @return flag indicating enabled state (boolean)
+        @return flag indicating enabled state
+        @rtype bool
         """
-        return self.__enabled
+        return self.__isEnabled
     
     def setEnabled(self, enabled):
         """
         Public method to set the rule's enabled state.
         
-        @param enabled flag indicating the new enabled state (boolean)
+        @param enabled flag indicating the new enabled state
+        @type bool
         """
-        self.__enabled = enabled
-        if not enabled:
-            self.__filter = "!" + self.__filter
-        else:
-            self.__filter = self.__filter[1:]
+        self.__isEnabled = enabled
     
     def isCSSRule(self):
         """
         Public method to check, if the rule is a CSS rule.
         
-        @return flag indicating a CSS rule (boolean)
+        @return flag indicating a CSS rule
+        @rtype bool
         """
-        return self.__cssRule
+        return self.__type == AdBlockRuleType.CssRule
     
     def cssSelector(self):
         """
         Public method to get the CSS selector of the rule.
         
-        @return CSS selector (string)
+        @return CSS selector
+        @rtype str
         """
-        return self.__cssSelector
+        return self.__matchString
     
     def isDocument(self):
         """
         Public method to check, if this is a document rule.
         
-        @return flag indicating a document rule (boolean)
+        @return flag indicating a document rule
+        @rtype bool
         """
-        return self.__document
+        return self.__hasOption(AdBlockRuleOption.DocumentOption)
     
     def isElementHiding(self):
         """
         Public method to check, if this is an element hiding rule.
         
-        @return flag indicating an element hiding rule (boolean)
+        @return flag indicating an element hiding rule
+        @rtype bool
         """
-        return self.__elemhide
+        return self.__hasOption(AdBlockRuleOption.ElementHideOption)
     
     def isDomainRestricted(self):
         """
         Public method to check, if this rule is restricted by domain.
         
-        @return flag indicating a domain restriction (boolean)
+        @return flag indicating a domain restriction
+        @rtype bool
         """
-        return self.__domainRestricted
+        return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
     
     def isComment(self):
         """
         Public method to check, if this is a comment.
         
-        @return flag indicating a comment (boolean)
+        @return flag indicating a comment
+        @rtype bool
         """
         return self.__filter.startswith("!")
     
@@ -632,7 +836,8 @@
         """
         Public method to check, if this is a header.
         
-        @return flag indicating a header (boolean)
+        @return flag indicating a header
+        @rtype bool
         """
         return self.__filter.startswith("[Adblock")
     
@@ -640,24 +845,28 @@
         """
         Public method to check, if this is a slow rule.
         
-        @return flag indicating a slow rule (boolean)
+        @return flag indicating a slow rule
+        @rtype bool
         """
-        return self.__useRegExp
+        return self.__regExp is not None
     
     def isInternalDisabled(self):
         """
         Public method to check, if this rule was disabled internally.
         
-        @return flag indicating an internally disabled rule (boolean)
+        @return flag indicating an internally disabled rule
+        @rtype bool
         """
-        return self.__internalDisabled
+        return self.__isInternalDisabled
     
     def __convertPatternToRegExp(self, wildcardPattern):
         """
         Private method to convert a wildcard pattern to a regular expression.
         
-        @param wildcardPattern string containing the wildcard pattern (string)
-        @return string containing a regular expression (string)
+        @param wildcardPattern string containing the wildcard pattern
+        @type str
+        @return string containing a regular expression
+        @rtype string
         """
         pattern = wildcardPattern
         
@@ -685,3 +894,264 @@
         pattern = re.sub(r"\\\*", ".*", pattern)
         
         return pattern
+    
+    def __hasOption(self, opt):
+        """
+        Private method to check, if the given option has been set.
+        
+        @param opt option to check for
+        @type AdBlockRuleOption
+        @return flag indicating the state of the option
+        @rtype bool
+        """
+        return bool(self.__options & opt)
+    
+    def setOption(self, opt):
+        """
+        Public method to set the given option.
+        
+        @param opt option to be set
+        @type AdBlockRuleOption
+        """
+        self.__options |= opt
+    
+    def __hasException(self, opt):
+        """
+        Private method to check, if the given option has been set as an
+        exception.
+        
+        @param opt option to check for
+        @type AdBlockRuleOption
+        @return flag indicating the exception state of the option
+        @rtype bool
+        """
+        return bool(self.__exceptions & opt)
+    
+    def __setException(self, opt, on):
+        """
+        Private method to set the given option as an exception.
+        
+        @param opt option to be set
+        @type AdBlockRuleOption
+        @param on flag indicating to set or unset the exception
+        @type bool
+        """
+        if on:
+            self.__exceptions |= opt
+        else:
+            self.__exceptions &= ~opt
+    
+    def __filterIsOnlyDomain(self, filterString):
+        """
+        Private method to check, if the given filter is a domain only filter.
+        
+        @param filterString filter string to be checked
+        @type str
+        @return flag indicating a domain only filter
+        @rtype bool
+        """
+        if not filterString.endswith("^") or not filterString.startswith("||"):
+            return False
+        
+        for filterChar in filterString:
+            if filterChar in ["/", ":", "?", "=", "&", "*"]:
+                return False
+        
+        return True
+    
+    def __filterIsOnlyEndsMatch(self, filterString):
+        """
+        Private method to check, if the given filter is to match against the
+        end of a string.
+        
+        @param filterString filter string to be checked
+        @type str
+        @return flag indicating a end of string match filter
+        @rtype bool
+        """
+        index = 0
+        for filterChar in filterString:
+            if filterChar in ["^", "*"]:
+                return False
+            elif filterChar == "|":
+                return bool(index == len(filterString) - 1)
+            index += 1
+        
+        return False
+    
+    def __isMatchingDomain(self, domain, filterString):
+        """
+        Private method to check, if a given domain matches the given filter
+        string.
+        
+        @param domain domain to be checked
+        @type str
+        @param filterString filter string to check against
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        if filterString == domain:
+            return True
+        
+        if not domain.endswith(filterString):
+            return False
+        
+        index = domain.find(filterString)
+        
+        return bool(index > 0 and domain[index - 1] == ".")
+    
+    def __isMatchingRegExpStrings(self, url):
+        """
+        Private method to check the given URL against the fixed parts of
+        the regexp.
+        
+        @param url URL to be checked
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        assert self.__regExp is not None
+        
+        for matcher in self.__stringMatchers:
+            if matcher not in url:
+                return False
+        
+        return True
+    
+    def __parseRegExpFilter(self, filterString):
+        """
+        Private method to split the given regular expression into strings that
+        can be used with 'in'.
+        
+        @param filterString regexp filter string to be parsed
+        @type str
+        @return fixed string parts of the filter
+        @rtype list of str
+        """
+        matchers = []
+        
+        startPos = -1
+        for index in range(len(filterString)):
+            filterChar = filterString[index]
+            if filterChar in ["|", "*", "^"]:
+                sub = filterString[startPos:index]
+                if len(sub) > 1:
+                    matchers.append(sub)
+                startPos = index + 1
+        
+        sub = filterString[startPos:]
+        if len(sub) > 1:
+            matchers.append(sub)
+        
+        return list(set(matchers))
+    
+    def ruleType(self):
+        """
+        Public method to get the rule type.
+        
+        @return rule type
+        @rtype AdBlockRuleType
+        """
+        return self.__type
+    
+    def ruleOptions(self):
+        """
+        Public method to get the rule options.
+        
+        @return rule options
+        @rtype AdBlockRuleOption
+        """
+        return self.__options
+    
+    def ruleExceptions(self):
+        """
+        Public method to get the rule exceptions.
+        
+        @return rule exceptions
+        @rtype AdBlockRuleOption
+        """
+        return self.__exceptions
+    
+    def matchString(self):
+        """
+        Public method to get the match string.
+        
+        @return match string
+        @rtype str
+        """
+        return self.__matchString
+    
+    def caseSensitivity(self):
+        """
+        Public method to get the case sensitivity.
+        
+        @return case sensitivity
+        @rtype Qt.CaseSensitivity
+        """
+        return self.__caseSensitivity
+    
+    def allowedDomains(self):
+        """
+        Public method to get a copy of the list of allowed domains.
+        
+        @return list of allowed domains
+        @rtype list of str
+        """
+        return self.__allowedDomains[:]
+    
+    def blockedDomains(self):
+        """
+        Public method to get a copy of the list of blocked domains.
+        
+        @return list of blocked domains
+        @rtype list of str
+        """
+        return self.__blockedDomains[:]
+    
+    def addBlockedDomains(self, domains):
+        """
+        Public method to add to the list of blocked domains.
+        
+        @param domains list of domains to be added
+        @type str or list of str
+        """
+        if isinstance(domains, list):
+            self.__blockedDomains.extend(domains)
+        else:
+            self.__blockedDomains.append(domains)
+    
+    def getRegExpAndMatchers(self):
+        """
+        Public method to get the regular expression and associated string
+        matchers.
+        
+        @return tuple containing the regular expression and the list of
+            string matchers
+        @rtype tuple of (QRegExp, list of str)
+        """
+        if self.__regExp is not None:
+            return (QRegExp(self.__regExp), self.__stringMatchers[:])
+        else:
+            return (None, [])
+    
+    def copyFrom(self, other):
+        """
+        Public method to copy another AdBlock rule.
+        
+        @param other reference to the AdBlock rule to copy from
+        @type AdBlockRule
+        """
+        self.__subscription = other.subscription()
+        self.__type = other.ruleType()
+        self.__options = other.ruleOptions()
+        self.__exceptions = other.ruleExceptions()
+        self.__filter = other.filter()
+        self.__matchString = other.matchString()
+        self.__caseSensitivity = other.caseSensitivity()
+        self.__isEnabled = other.isEnabled()
+        self.__isException = other.isException()
+        self.__isInternalDisabled = other.isInternalDisabled()
+        self.__allowedDomains = other.allowedDomains()
+        self.__blockedDomains = other.blockedDomains()
+        self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()

eric ide

mercurial