eric6/WebBrowser/AdBlock/AdBlockRule.py

changeset 6942
2602857055c5
parent 6645
ad476851d7e0
child 7201
6b42677d7043
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eric6/WebBrowser/AdBlock/AdBlockRule.py	Sun Apr 14 15:09:21 2019 +0200
@@ -0,0 +1,1157 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de>
+#
+
+"""
+Module implementing the AdBlock rule class.
+"""
+
+from __future__ import unicode_literals
+
+import re
+
+from enum import IntEnum
+
+from PyQt5.QtCore import PYQT_VERSION, Qt, QRegExp
+from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo
+
+from Globals import qVersionTuple
+
+
+def toSecondLevelDomain(url):
+    """
+    Module function to get a second level domain from the given URL.
+    
+    @param url URL to extract domain from
+    @type QUrl
+    @return name of second level domain
+    @rtype str
+    """
+    topLevelDomain = url.topLevelDomain()
+    urlHost = url.host()
+    
+    if not topLevelDomain or not urlHost:
+        return ""
+    
+    domain = urlHost[:len(urlHost) - len(topLevelDomain)]
+    if domain.count(".") == 0:
+        return urlHost
+    
+    while domain.count(".") != 0:
+        domain = domain[domain.find(".") + 1:]
+    
+    return domain + topLevelDomain
+
+
+class AdBlockRuleType(IntEnum):
+    """
+    Class implementing the rule type enum.
+    """
+    CssRule = 0
+    DomainMatchRule = 1
+    RegExpMatchRule = 2
+    StringEndsMatchRule = 3
+    StringContainsMatchRule = 4
+    MatchAllUrlsRule = 5
+    Invalid = 6
+
+
+class AdBlockRuleOption(IntEnum):
+    """
+    Class implementing the rule option enum.
+    """
+    NoOption = 0
+    DomainRestrictedOption = 1
+    ThirdPartyOption = 2
+    ObjectOption = 4
+    SubdocumentOption = 8
+    XMLHttpRequestOption = 16
+    ImageOption = 32
+    ScriptOption = 64
+    StyleSheetOption = 128
+    ObjectSubrequestOption = 256
+    PingOption = 512
+    MediaOption = 1024
+    FontOption = 2048
+    OtherOption = 4096
+    
+    # Exception only options
+    DocumentOption = 8192
+    ElementHideOption = 16384
+
+
+class AdBlockRule(object):
+    """
+    Class implementing the AdBlock rule.
+    """
+    def __init__(self, filterRule="", subscription=None):
+        """
+        Constructor
+        
+        @param filterRule filter string of the rule
+        @type str
+        @param subscription reference to the subscription object
+        @type AdBlockSubscription
+        """
+        self.__subscription = subscription
+        
+        self.__regExp = None
+        self.__stringMatchers = []
+        
+        self.__blockedDomains = []
+        self.__allowedDomains = []
+        
+        self.__isEnabled = True
+        self.__isException = False
+        self.__isInternalDisabled = False
+        self.__caseSensitivity = Qt.CaseInsensitive
+        
+        self.__type = AdBlockRuleType.StringContainsMatchRule
+        self.__options = AdBlockRuleOption.NoOption
+        self.__exceptions = AdBlockRuleOption.NoOption
+        
+        self.setFilter(filterRule)
+    
+    def subscription(self):
+        """
+        Public method to get the subscription this rule belongs to.
+        
+        @return subscription of the rule
+        @rtype AdBlockSubscription
+        """
+        return self.__subscription
+    
+    def setSubscription(self, subscription):
+        """
+        Public method to set the subscription this rule belongs to.
+        
+        @param subscription subscription of the rule
+        @type AdBlockSubscription
+        """
+        self.__subscription = subscription
+    
+    def filter(self):
+        """
+        Public method to get the rule filter string.
+        
+        @return rule filter string
+        @rtype str
+        """
+        return self.__filter
+    
+    def setFilter(self, filterRule):
+        """
+        Public method to set the rule filter string.
+        
+        @param filterRule rule filter string
+        @type str
+        """
+        self.__filter = filterRule
+        self.__parseFilter()
+    
+    def __parseFilter(self):
+        """
+        Private method to parse the filter pattern.
+        """
+        parsedLine = self.__filter
+        
+        # empty rule or just a comment
+        if not parsedLine.strip() or parsedLine.startswith("!"):
+            self.__isEnabled = False
+            self.__isInternalDisabled = True
+            self.__type = AdBlockRuleType.Invalid
+            return
+        
+        # CSS element hiding rule
+        if "##" in parsedLine or "#@#" in parsedLine:
+            self.__type = AdBlockRuleType.CssRule
+            pos = parsedLine.find("#")
+            
+            # domain restricted rule
+            if not parsedLine.startswith("##"):
+                domains = parsedLine[:pos]
+                self.__parseDomains(domains, ",")
+            
+            self.__isException = parsedLine[pos + 1] == "@"
+            if self.__isException:
+                self.__matchString = parsedLine[pos + 3:]
+            else:
+                self.__matchString = parsedLine[pos + 2:]
+            
+            # CSS rule cannot have more options -> stop parsing
+            return
+        
+        # Exception always starts with @@
+        if parsedLine.startswith("@@"):
+            self.__isException = True
+            parsedLine = parsedLine[2:]
+        
+        # Parse all options following '$' character
+        optionsIndex = parsedLine.find("$")
+        if optionsIndex >= 0:
+            options = [opt
+                       for opt in parsedLine[optionsIndex + 1:].split(",")
+                       if opt]
+            
+            handledOptions = 0
+            for option in options:
+                if option.startswith("domain="):
+                    self.__parseDomains(option[7:], "|")
+                    handledOptions += 1
+                elif option == "match-case":
+                    self.__caseSensitivity = Qt.CaseSensitive
+                    handledOptions += 1
+                elif option.endswith("third-party"):
+                    self.setOption(AdBlockRuleOption.ThirdPartyOption)
+                    self.__setException(AdBlockRuleOption.ThirdPartyOption,
+                                        option.startswith("~"))
+                    handledOptions += 1
+                elif option.endswith("object"):
+                    self.setOption(AdBlockRuleOption.ObjectOption)
+                    self.__setException(AdBlockRuleOption.ObjectOption,
+                                        option.startswith("~"))
+                    handledOptions += 1
+                elif option.endswith("subdocument"):
+                    self.setOption(AdBlockRuleOption.SubdocumentOption)
+                    self.__setException(AdBlockRuleOption.SubdocumentOption,
+                                        option.startswith("~"))
+                    handledOptions += 1
+                elif option.endswith("xmlhttprequest"):
+                    self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
+                    self.__setException(AdBlockRuleOption.XMLHttpRequestOption,
+                                        option.startswith("~"))
+                    handledOptions += 1
+                elif option.endswith("image"):
+                    self.setOption(AdBlockRuleOption.ImageOption)
+                    self.__setException(AdBlockRuleOption.ImageOption,
+                                        option.startswith("~"))
+                elif option.endswith("script"):
+                    self.setOption(AdBlockRuleOption.ScriptOption)
+                    self.__setException(AdBlockRuleOption.ScriptOption,
+                                        option.startswith("~"))
+                elif option.endswith("stylesheet"):
+                    self.setOption(AdBlockRuleOption.StyleSheetOption)
+                    self.__setException(AdBlockRuleOption.StyleSheetOption,
+                                        option.startswith("~"))
+                elif option.endswith("object-subrequest"):
+                    self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
+                    self.__setException(
+                        AdBlockRuleOption.ObjectSubrequestOption,
+                        option.startswith("~"))
+                elif option.endswith("ping"):
+                    self.setOption(AdBlockRuleOption.PingOption)
+                    self.__setException(AdBlockRuleOption.PingOption,
+                                        option.startswith("~"))
+                elif option.endswith("media"):
+                    self.setOption(AdBlockRuleOption.MediaOption)
+                    self.__setException(AdBlockRuleOption.MediaOption,
+                                        option.startswith("~"))
+                elif option.endswith("font"):
+                    self.setOption(AdBlockRuleOption.FontOption)
+                    self.__setException(AdBlockRuleOption.FontOption,
+                                        option.startswith("~"))
+                elif option.endswith("other"):
+                    self.setOption(AdBlockRuleOption.OtherOption)
+                    self.__setException(AdBlockRuleOption.OtherOption,
+                                        option.startswith("~"))
+                elif option == "document" and self.__isException:
+                    self.setOption(AdBlockRuleOption.DocumentOption)
+                    handledOptions += 1
+                elif option == "elemhide" and self.__isException:
+                    self.setOption(AdBlockRuleOption.ElementHideOption)
+                    handledOptions += 1
+                elif option == "collapse":
+                    # Hiding placeholders of blocked elements is enabled by
+                    # default
+                    handledOptions += 1
+            
+            # If we don't handle all options, it's safer to just disable
+            # this rule
+            if handledOptions != len(options):
+                self.__isInternalDisabled = True
+                self.__type = AdBlockRuleType.Invalid
+                return
+            
+            parsedLine = parsedLine[:optionsIndex]
+        
+        # Rule is classic regexp
+        if parsedLine.startswith("/") and parsedLine.endswith("/"):
+            parsedLine = parsedLine[1:-1]
+            self.__type = AdBlockRuleType.RegExpMatchRule
+            self.__regExp = QRegExp(parsedLine, self.__caseSensitivity,
+                                    QRegExp.RegExp)
+            self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
+            return
+        
+        # Remove starting / ending wildcards (*)
+        if parsedLine.startswith("*"):
+            parsedLine = parsedLine[1:]
+        if parsedLine.endswith("*"):
+            parsedLine = parsedLine[:-1]
+        
+        # Fast string matching for domain here
+        if self.__filterIsOnlyDomain(parsedLine):
+            parsedLine = parsedLine[2:-1]
+            self.__type = AdBlockRuleType.DomainMatchRule
+            self.__matchString = parsedLine
+            return
+        
+        # If rule contains '|' only at the end, string matching can be used
+        if self.__filterIsOnlyEndsMatch(parsedLine):
+            parsedLine = parsedLine[:-1]
+            self.__type = AdBlockRuleType.StringEndsMatchRule
+            self.__matchString = parsedLine
+            return
+        
+        # If there is still a wildcard (*) or separator (^) or (|),
+        # the rule must be modified to comply with QRegExp.
+        if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
+            self.__type = AdBlockRuleType.RegExpMatchRule
+            pattern = self.__convertPatternToRegExp(parsedLine)
+            self.__regExp = QRegExp(pattern, self.__caseSensitivity,
+                                    QRegExp.RegExp)
+            self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
+            return
+        
+        # This rule matches all URLs
+        if len(parsedLine) == 0:
+            if self.__options == AdBlockRuleOption.NoOption:
+                self.__isInternalDisabled = True
+                self.__type = AdBlockRuleType.Invalid
+                return
+            
+            self.__type = AdBlockRuleType.MatchAllUrlsRule
+            return
+        
+        # no regexp required
+        self.__type = AdBlockRuleType.StringContainsMatchRule
+        self.__matchString = parsedLine
+    
+    def __parseDomains(self, domains, separator):
+        """
+        Private method to parse a string with a domain list.
+        
+        @param domains list of domains
+        @type str
+        @param separator separator character used by the list
+        @type str
+        """
+        domainsList = [d for d in domains.split(separator) if d]
+        
+        for domain in domainsList:
+            if not domain:
+                continue
+            if domain.startswith("~"):
+                self.__blockedDomains.append(domain[1:])
+            else:
+                self.__allowedDomains.append(domain)
+        
+        if bool(self.__blockedDomains) or bool(self.__allowedDomains):
+            self.setOption(AdBlockRuleOption.DomainRestrictedOption)
+    
+    def networkMatch(self, request, domain, encodedUrl):
+        """
+        Public method to check the rule for a match.
+        
+        @param request reference to the network request
+        @type QWebEngineUrlRequestInfo
+        @param domain domain name
+        @type str
+        @param encodedUrl string encoded URL to be checked
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        if self.__type == AdBlockRuleType.CssRule or \
+           not self.__isEnabled or \
+           self.__isInternalDisabled:
+            return False
+        
+        matched = self.__stringMatch(domain, encodedUrl)
+        
+        if matched:
+            # check domain restrictions
+            if self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and \
+               not self.matchDomain(request.firstPartyUrl().host()):
+                return False
+            
+            # check third-party restrictions
+            if self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and \
+               not self.matchThirdParty(request):
+                return False
+            
+            # check object restrictions
+            if self.__hasOption(AdBlockRuleOption.ObjectOption) and \
+               not self.matchObject(request):
+                return False
+            
+            # check subdocument restrictions
+            if self.__hasOption(AdBlockRuleOption.SubdocumentOption) and \
+               not self.matchSubdocument(request):
+                return False
+            
+            # check xmlhttprequest restriction
+            if self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and \
+               not self.matchXmlHttpRequest(request):
+                return False
+            
+            # check image restriction
+            if self.__hasOption(AdBlockRuleOption.ImageOption) and \
+               not self.matchImage(request):
+                return False
+            
+            # check script restriction
+            if self.__hasOption(AdBlockRuleOption.ScriptOption) and \
+               not self.matchScript(request):
+                return False
+            
+            # check stylesheet restriction
+            if self.__hasOption(AdBlockRuleOption.StyleSheetOption) and \
+               not self.matchStyleSheet(request):
+                return False
+            
+            # check object-subrequest restriction
+            if self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and \
+               not self.matchObjectSubrequest(request):
+                return False
+            
+            # check ping restriction
+            if self.__hasOption(AdBlockRuleOption.PingOption) and \
+               not self.matchPing(request):
+                return False
+            
+            # check media restriction
+            if self.__hasOption(AdBlockRuleOption.MediaOption) and \
+               not self.matchMedia(request):
+                return False
+            
+            # check font restriction
+            if self.__hasOption(AdBlockRuleOption.FontOption) and \
+               not self.matchFont(request):
+                return False
+        
+        return matched
+    
+    def urlMatch(self, url):
+        """
+        Public method to check an URL against the rule.
+        
+        @param url URL to check
+        @type QUrl
+        @return flag indicating a match
+        @rtype bool
+        """
+        if not self.__hasOption(AdBlockRuleOption.DocumentOption) and \
+           not self.__hasOption(AdBlockRuleOption.ElementHideOption):
+            return False
+        
+        encodedUrl = bytes(url.toEncoded()).decode()
+        domain = url.host()
+        return self.__stringMatch(domain, encodedUrl)
+    
+    def __stringMatch(self, domain, encodedUrl):
+        """
+        Private method to match a domain string.
+        
+        @param domain domain to match
+        @type str
+        @param encodedUrl URL in encoded form
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        matched = False
+        
+        if self.__type == AdBlockRuleType.StringContainsMatchRule:
+            if self.__caseSensitivity == Qt.CaseInsensitive:
+                matched = self.__matchString.lower() in encodedUrl.lower()
+            else:
+                matched = self.__matchString in encodedUrl
+        elif self.__type == AdBlockRuleType.DomainMatchRule:
+            matched = self.__isMatchingDomain(domain, self.__matchString)
+        elif self.__type == AdBlockRuleType.StringEndsMatchRule:
+            if self.__caseSensitivity == Qt.CaseInsensitive:
+                matched = encodedUrl.lower().endswith(
+                    self.__matchString.lower())
+            else:
+                matched = encodedUrl.endswith(self.__matchString)
+        elif self.__type == AdBlockRuleType.RegExpMatchRule:
+            if not self.__isMatchingRegExpStrings(encodedUrl):
+                matched = False
+            else:
+                matched = self.__regExp.indexIn(encodedUrl) != -1
+        elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
+            matched = True
+        
+        return matched
+    
+    def matchDomain(self, domain):
+        """
+        Public method to match a domain.
+        
+        @param domain domain name to check
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        if not self.__isEnabled:
+            return False
+        
+        if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
+            return True
+        
+        if len(self.__blockedDomains) == 0:
+            for dom in self.__allowedDomains:
+                if self.__isMatchingDomain(domain, dom):
+                    return True
+        elif len(self.__allowedDomains) == 0:
+            for dom in self.__blockedDomains:
+                if self.__isMatchingDomain(domain, dom):
+                    return False
+            return True
+        else:
+            for dom in self.__blockedDomains:
+                if self.__isMatchingDomain(domain, dom):
+                    return False
+            for dom in self.__allowedDomains:
+                if self.__isMatchingDomain(domain, dom):
+                    return True
+        
+        return False
+    
+    def matchThirdParty(self, req):
+        """
+        Public method to match a third-party rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype boolean
+        """
+        # Third-party matching should be performed on second-level domains
+        firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
+        host = toSecondLevelDomain(req.requestUrl())
+        
+        match = firstPartyHost != host
+        
+        if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
+            return not match
+        else:
+            return match
+    
+    def matchObject(self, req):
+        """
+        Public method to match an object rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeObject)
+        
+        if self.__hasException(AdBlockRuleOption.ObjectOption):
+            return not match
+        else:
+            return match
+    
+    def matchSubdocument(self, req):
+        """
+        Public method to match a sub-document rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype boolean
+        """
+        match = (
+            req.resourceType() ==
+            QWebEngineUrlRequestInfo.ResourceTypeSubFrame)
+        
+        if self.__hasException(AdBlockRuleOption.SubdocumentOption):
+            return not match
+        else:
+            return match
+    
+    def matchXmlHttpRequest(self, req):
+        """
+        Public method to match a XmlHttpRequest rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr)
+        
+        if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
+            return not match
+        else:
+            return match
+    
+    def matchImage(self, req):
+        """
+        Public method to match an Image rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeImage)
+        
+        if self.__hasException(AdBlockRuleOption.ImageOption):
+            return not match
+        else:
+            return match
+    
+    def matchScript(self, req):
+        """
+        Public method to match a Script rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeScript)
+        
+        if self.__hasException(AdBlockRuleOption.ScriptOption):
+            return not match
+        else:
+            return match
+    
+    def matchStyleSheet(self, req):
+        """
+        Public method to match a StyleSheet rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() ==
+            QWebEngineUrlRequestInfo.ResourceTypeStylesheet)
+        
+        if self.__hasException(AdBlockRuleOption.StyleSheetOption):
+            return not match
+        else:
+            return match
+    
+    def matchObjectSubrequest(self, req):
+        """
+        Public method to match an Object Subrequest rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype boolean
+        """
+        match = (
+            req.resourceType() ==
+            QWebEngineUrlRequestInfo.ResourceTypeSubResource)
+        if qVersionTuple() >= (5, 7, 0) and PYQT_VERSION >= 0x50700:
+            match = match or (
+                req.resourceType() ==
+                QWebEngineUrlRequestInfo.ResourceTypePluginResource)
+        
+        if self.__objectSubrequestException:
+            return not match
+        else:
+            return match
+    
+    def matchPing(self, req):
+        """
+        Public method to match a Ping rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypePing)
+        
+        if self.__hasException(AdBlockRuleOption.PingOption):
+            return not match
+        else:
+            return match
+    
+    def matchMedia(self, req):
+        """
+        Public method to match a Media rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeMedia)
+        
+        if self.__hasException(AdBlockRuleOption.MediaOption):
+            return not match
+        else:
+            return match
+    
+    def matchFont(self, req):
+        """
+        Public method to match a Font rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = (
+            req.resourceType() ==
+            QWebEngineUrlRequestInfo.ResourceTypeFontResource)
+        
+        if self.__hasException(AdBlockRuleOption.FontOption):
+            return not match
+        else:
+            return match
+    
+    def matchOther(self, req):
+        """
+        Public method to match any other rule.
+        
+        @param req request object to check
+        @type QWebEngineUrlRequestInfo
+        @return flag indicating a match
+        @rtype bool
+        """
+        match = req.resourceType() in [
+            QWebEngineUrlRequestInfo.ResourceTypeSubResource,
+            QWebEngineUrlRequestInfo.ResourceTypeWorker,
+            QWebEngineUrlRequestInfo.ResourceTypeSharedWorker,
+            QWebEngineUrlRequestInfo.ResourceTypeServiceWorker,
+            QWebEngineUrlRequestInfo.ResourceTypePrefetch,
+            QWebEngineUrlRequestInfo.ResourceTypeFavicon,
+            QWebEngineUrlRequestInfo.ResourceTypeUnknown,
+        ]
+        
+        if self.__hasException(AdBlockRuleOption.OtherOption):
+            return not match
+        else:
+            return match
+    
+    def isException(self):
+        """
+        Public method to check, if the rule defines an exception.
+        
+        @return flag indicating an exception
+        @rtype bool
+        """
+        return self.__isException
+    
+    def setException(self, exception):
+        """
+        Public method to set the rule's exception flag.
+        
+        @param exception flag indicating an exception rule
+        @type bool
+        """
+        self.__isException = exception
+    
+    def isEnabled(self):
+        """
+        Public method to check, if the rule is enabled.
+        
+        @return flag indicating enabled state
+        @rtype bool
+        """
+        return self.__isEnabled
+    
+    def setEnabled(self, enabled):
+        """
+        Public method to set the rule's enabled state.
+        
+        @param enabled flag indicating the new enabled state
+        @type bool
+        """
+        self.__isEnabled = enabled
+    
+    def isCSSRule(self):
+        """
+        Public method to check, if the rule is a CSS rule.
+        
+        @return flag indicating a CSS rule
+        @rtype bool
+        """
+        return self.__type == AdBlockRuleType.CssRule
+    
+    def cssSelector(self):
+        """
+        Public method to get the CSS selector of the rule.
+        
+        @return CSS selector
+        @rtype str
+        """
+        return self.__matchString
+    
+    def isDocument(self):
+        """
+        Public method to check, if this is a document rule.
+        
+        @return flag indicating a document rule
+        @rtype bool
+        """
+        return self.__hasOption(AdBlockRuleOption.DocumentOption)
+    
+    def isElementHiding(self):
+        """
+        Public method to check, if this is an element hiding rule.
+        
+        @return flag indicating an element hiding rule
+        @rtype bool
+        """
+        return self.__hasOption(AdBlockRuleOption.ElementHideOption)
+    
+    def isDomainRestricted(self):
+        """
+        Public method to check, if this rule is restricted by domain.
+        
+        @return flag indicating a domain restriction
+        @rtype bool
+        """
+        return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
+    
+    def isComment(self):
+        """
+        Public method to check, if this is a comment.
+        
+        @return flag indicating a comment
+        @rtype bool
+        """
+        return self.__filter.startswith("!")
+    
+    def isHeader(self):
+        """
+        Public method to check, if this is a header.
+        
+        @return flag indicating a header
+        @rtype bool
+        """
+        return self.__filter.startswith("[Adblock")
+    
+    def isSlow(self):
+        """
+        Public method to check, if this is a slow rule.
+        
+        @return flag indicating a slow rule
+        @rtype bool
+        """
+        return self.__regExp is not None
+    
+    def isInternalDisabled(self):
+        """
+        Public method to check, if this rule was disabled internally.
+        
+        @return flag indicating an internally disabled rule
+        @rtype bool
+        """
+        return self.__isInternalDisabled
+    
+    def __convertPatternToRegExp(self, wildcardPattern):
+        """
+        Private method to convert a wildcard pattern to a regular expression.
+        
+        @param wildcardPattern string containing the wildcard pattern
+        @type str
+        @return string containing a regular expression
+        @rtype string
+        """
+        pattern = wildcardPattern
+        
+        # remove multiple wildcards
+        pattern = re.sub(r"\*+", "*", pattern)
+        # remove anchors following separator placeholder
+        pattern = re.sub(r"\^\|$", "^", pattern)
+        # remove leading wildcards
+        pattern = re.sub(r"^(\*)", "", pattern)
+        # remove trailing wildcards
+        pattern = re.sub(r"(\*)$", "", pattern)
+        # escape special symbols
+        pattern = re.sub(r"(\W)", r"\\\1", pattern)
+        # process extended anchor at expression start
+        pattern = re.sub(
+            r"^\\\|\\\|",
+            r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
+        # process separator placeholders
+        pattern = re.sub(r"\\\^", r"(?:[^\w\d\-.%]|$)", pattern)
+        # process anchor at expression start
+        pattern = re.sub(r"^\\\|", "^", pattern)
+        # process anchor at expression end
+        pattern = re.sub(r"\\\|$", "$", pattern)
+        # replace wildcards by .*
+        pattern = re.sub(r"\\\*", ".*", pattern)
+        
+        return pattern
+    
+    def __hasOption(self, opt):
+        """
+        Private method to check, if the given option has been set.
+        
+        @param opt option to check for
+        @type AdBlockRuleOption
+        @return flag indicating the state of the option
+        @rtype bool
+        """
+        return bool(self.__options & opt)
+    
+    def setOption(self, opt):
+        """
+        Public method to set the given option.
+        
+        @param opt option to be set
+        @type AdBlockRuleOption
+        """
+        self.__options |= opt
+    
+    def __hasException(self, opt):
+        """
+        Private method to check, if the given option has been set as an
+        exception.
+        
+        @param opt option to check for
+        @type AdBlockRuleOption
+        @return flag indicating the exception state of the option
+        @rtype bool
+        """
+        return bool(self.__exceptions & opt)
+    
+    def __setException(self, opt, on):
+        """
+        Private method to set the given option as an exception.
+        
+        @param opt option to be set
+        @type AdBlockRuleOption
+        @param on flag indicating to set or unset the exception
+        @type bool
+        """
+        if on:
+            self.__exceptions |= opt
+        else:
+            self.__exceptions &= ~opt
+    
+    def __filterIsOnlyDomain(self, filterString):
+        """
+        Private method to check, if the given filter is a domain only filter.
+        
+        @param filterString filter string to be checked
+        @type str
+        @return flag indicating a domain only filter
+        @rtype bool
+        """
+        if not filterString.endswith("^") or not filterString.startswith("||"):
+            return False
+        
+        for filterChar in filterString:
+            if filterChar in ["/", ":", "?", "=", "&", "*"]:
+                return False
+        
+        return True
+    
+    def __filterIsOnlyEndsMatch(self, filterString):
+        """
+        Private method to check, if the given filter is to match against the
+        end of a string.
+        
+        @param filterString filter string to be checked
+        @type str
+        @return flag indicating a end of string match filter
+        @rtype bool
+        """
+        index = 0
+        for filterChar in filterString:
+            if filterChar in ["^", "*"]:
+                return False
+            elif filterChar == "|":
+                return bool(index == len(filterString) - 1)
+            index += 1
+        
+        return False
+    
+    def __isMatchingDomain(self, domain, filterString):
+        """
+        Private method to check, if a given domain matches the given filter
+        string.
+        
+        @param domain domain to be checked
+        @type str
+        @param filterString filter string to check against
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        if filterString == domain:
+            return True
+        
+        if not domain.endswith(filterString):
+            return False
+        
+        index = domain.find(filterString)
+        
+        return bool(index > 0 and domain[index - 1] == ".")
+    
+    def __isMatchingRegExpStrings(self, url):
+        """
+        Private method to check the given URL against the fixed parts of
+        the regexp.
+        
+        @param url URL to be checked
+        @type str
+        @return flag indicating a match
+        @rtype bool
+        """
+        assert self.__regExp is not None
+        
+        for matcher in self.__stringMatchers:
+            if matcher not in url:
+                return False
+        
+        return True
+    
+    def __parseRegExpFilter(self, filterString):
+        """
+        Private method to split the given regular expression into strings that
+        can be used with 'in'.
+        
+        @param filterString regexp filter string to be parsed
+        @type str
+        @return fixed string parts of the filter
+        @rtype list of str
+        """
+        matchers = []
+        
+        startPos = -1
+        for index in range(len(filterString)):
+            filterChar = filterString[index]
+            if filterChar in ["|", "*", "^"]:
+                sub = filterString[startPos:index]
+                if len(sub) > 1:
+                    matchers.append(sub)
+                startPos = index + 1
+        
+        sub = filterString[startPos:]
+        if len(sub) > 1:
+            matchers.append(sub)
+        
+        return list(set(matchers))
+    
+    def ruleType(self):
+        """
+        Public method to get the rule type.
+        
+        @return rule type
+        @rtype AdBlockRuleType
+        """
+        return self.__type
+    
+    def ruleOptions(self):
+        """
+        Public method to get the rule options.
+        
+        @return rule options
+        @rtype AdBlockRuleOption
+        """
+        return self.__options
+    
+    def ruleExceptions(self):
+        """
+        Public method to get the rule exceptions.
+        
+        @return rule exceptions
+        @rtype AdBlockRuleOption
+        """
+        return self.__exceptions
+    
+    def matchString(self):
+        """
+        Public method to get the match string.
+        
+        @return match string
+        @rtype str
+        """
+        return self.__matchString
+    
+    def caseSensitivity(self):
+        """
+        Public method to get the case sensitivity.
+        
+        @return case sensitivity
+        @rtype Qt.CaseSensitivity
+        """
+        return self.__caseSensitivity
+    
+    def allowedDomains(self):
+        """
+        Public method to get a copy of the list of allowed domains.
+        
+        @return list of allowed domains
+        @rtype list of str
+        """
+        return self.__allowedDomains[:]
+    
+    def blockedDomains(self):
+        """
+        Public method to get a copy of the list of blocked domains.
+        
+        @return list of blocked domains
+        @rtype list of str
+        """
+        return self.__blockedDomains[:]
+    
+    def addBlockedDomains(self, domains):
+        """
+        Public method to add to the list of blocked domains.
+        
+        @param domains list of domains to be added
+        @type str or list of str
+        """
+        if isinstance(domains, list):
+            self.__blockedDomains.extend(domains)
+        else:
+            self.__blockedDomains.append(domains)
+    
+    def getRegExpAndMatchers(self):
+        """
+        Public method to get the regular expression and associated string
+        matchers.
+        
+        @return tuple containing the regular expression and the list of
+            string matchers
+        @rtype tuple of (QRegExp, list of str)
+        """
+        if self.__regExp is not None:
+            return (QRegExp(self.__regExp), self.__stringMatchers[:])
+        else:
+            return (None, [])
+    
+    def copyFrom(self, other):
+        """
+        Public method to copy another AdBlock rule.
+        
+        @param other reference to the AdBlock rule to copy from
+        @type AdBlockRule
+        """
+        self.__subscription = other.subscription()
+        self.__type = other.ruleType()
+        self.__options = other.ruleOptions()
+        self.__exceptions = other.ruleExceptions()
+        self.__filter = other.filter()
+        self.__matchString = other.matchString()
+        self.__caseSensitivity = other.caseSensitivity()
+        self.__isEnabled = other.isEnabled()
+        self.__isException = other.isException()
+        self.__isInternalDisabled = other.isInternalDisabled()
+        self.__allowedDomains = other.allowedDomains()
+        self.__blockedDomains = other.blockedDomains()
+        self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()

eric ide

mercurial