--- a/WebBrowser/AdBlock/AdBlockRule.py Thu Dec 14 19:25:34 2017 +0100 +++ b/WebBrowser/AdBlock/AdBlockRule.py Mon Dec 18 18:09:39 2017 +0100 @@ -11,6 +11,8 @@ import re +from enum import IntFlag + from PyQt5.QtCore import PYQT_VERSION, Qt, QRegExp from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo @@ -21,8 +23,10 @@ """ Module function to get a second level domain from the given URL. - @param url URL to extract domain from (QUrl) - @return name of second level domain (string) + @param url URL to extract domain from + @type QUrl + @return name of second level domain + @rtype str """ topLevelDomain = url.topLevelDomain() urlHost = url.host() @@ -40,6 +44,43 @@ return domain + topLevelDomain +class AdBlockRuleType(IntFlag): + """ + Class implementing the rule type enum. + """ + CssRule = 0 + DomainMatchRule = 1 + RegExpMatchRule = 2 + StringEndsMatchRule = 3 + StringContainsMatchRule = 4 + MatchAllUrlsRule = 5 + Invalid = 6 + + +class AdBlockRuleOption(IntFlag): + """ + Class implementing the rule option enum. + """ + NoOption = 0 + DomainRestrictedOption = 1 + ThirdPartyOption = 2 + ObjectOption = 4 + SubdocumentOption = 8 + XMLHttpRequestOption = 16 + ImageOption = 32 + ScriptOption = 64 + StyleSheetOption = 128 + ObjectSubrequestOption = 256 + PingOption = 512 + MediaOption = 1024 + FontOption = 2048 + OtherOption = 4096 + + # Exception only options + DocumentOption = 8192 + ElementHideOption = 16384 + + class AdBlockRule(object): """ Class implementing the AdBlock rule. @@ -48,45 +89,27 @@ """ Constructor - @param filterRule filter string of the rule (string) + @param filterRule filter string of the rule + @type str @param subscription reference to the subscription object - (AdBlockSubscription) + @type AdBlockSubscription """ self.__subscription = subscription - self.__regExp = QRegExp() - self.__options = [] + self.__regExp = None + self.__stringMatchers = [] + self.__blockedDomains = [] self.__allowedDomains = [] - self.__enabled = True - self.__cssRule = False - self.__exception = False - self.__internalDisabled = False - self.__domainRestricted = False - self.__useRegExp = False - self.__useDomainMatch = False - self.__useEndsMatch = False - self.__thirdParty = False - self.__thirdPartyException = False - self.__object = False - self.__objectException = False - self.__subdocument = False - self.__subdocumentException = False - self.__xmlhttprequest = False - self.__xmlhttprequestException = False - self.__document = False - self.__elemhide = False + self.__isEnabled = True + self.__isException = False + self.__isInternalDisabled = False self.__caseSensitivity = Qt.CaseInsensitive - self.__image = False - self.__imageException = False - self.__script = False - self.__scriptException = False - self.__stylesheet = False - self.__stylesheetException = False - self.__objectSubrequest = False - self.__objectSubrequestException = False - self.__stringMatchRule = False + + self.__type = AdBlockRuleType.StringContainsMatchRule + self.__options = AdBlockRuleOption.NoOption + self.__exceptions = AdBlockRuleOption.NoOption self.setFilter(filterRule) @@ -94,15 +117,26 @@ """ Public method to get the subscription this rule belongs to. - @return subscription of the rule (AdBlockSubscription) + @return subscription of the rule + @rtype AdBlockSubscription """ return self.__subscription + def setSubscription(self, subscription): + """ + Public method to set the subscription this rule belongs to. + + @param subscription subscription of the rule + @type AdBlockSubscription + """ + self.__subscription = subscription + def filter(self): """ Public method to get the rule filter string. - @return rule filter string (string) + @return rule filter string + @rtype str """ return self.__filter @@ -110,7 +144,8 @@ """ Public method to set the rule filter string. - @param filterRule rule filter string (string) + @param filterRule rule filter string + @type str """ self.__filter = filterRule self.__parseFilter() @@ -122,13 +157,15 @@ parsedLine = self.__filter # empty rule or just a comment - if not parsedLine.strip() or parsedLine.startswith(("!", "[Adblock")): - self.__enabled = False + if not parsedLine.strip() or parsedLine.startswith("!"): + self.__isEnabled = False + self.__isInternalDisabled = True + self.__type = AdBlockRuleType.Invalid return # CSS element hiding rule if "##" in parsedLine or "#@#" in parsedLine: - self.__cssRule = True + self.__type = AdBlockRuleType.CssRule pos = parsedLine.find("#") # domain restricted rule @@ -136,24 +173,26 @@ domains = parsedLine[:pos] self.__parseDomains(domains, ",") - self.__exception = parsedLine[pos + 1] == "@" + self.__isException = parsedLine[pos + 1] == "@" + if self.__isException: + self.__matchString = parsedLine[pos + 3:] + else: + self.__matchString = parsedLine[pos + 2:] - if self.__exception: - self.__cssSelector = parsedLine[pos + 3:] - else: - self.__cssSelector = parsedLine[pos + 2:] # CSS rule cannot have more options -> stop parsing return # Exception always starts with @@ if parsedLine.startswith("@@"): - self.__exception = True + self.__isException = True parsedLine = parsedLine[2:] # Parse all options following '$' character optionsIndex = parsedLine.find("$") if optionsIndex >= 0: - options = parsedLine[optionsIndex + 1:].split(",") + options = [opt + for opt in parsedLine[optionsIndex + 1:].split(",") + if opt] handledOptions = 0 for option in options: @@ -164,47 +203,74 @@ self.__caseSensitivity = Qt.CaseSensitive handledOptions += 1 elif option.endswith("third-party"): - self.__thirdParty = True - self.__thirdPartyException = option.startswith("~") + self.setOption(AdBlockRuleOption.ThirdPartyOption) + self.__setException(AdBlockRuleOption.ThirdPartyOption, + option.startswith("~")) handledOptions += 1 elif option.endswith("object"): - self.__object = True - self.__objectException = option.startswith("~") + self.setOption(AdBlockRuleOption.ObjectOption) + self.__setException(AdBlockRuleOption.ObjectOption, + option.startswith("~")) handledOptions += 1 elif option.endswith("subdocument"): - self.__subdocument = True - self.__subdocumentException = option.startswith("~") + self.setOption(AdBlockRuleOption.SubdocumentOption) + self.__setException(AdBlockRuleOption.SubdocumentOption, + option.startswith("~")) handledOptions += 1 elif option.endswith("xmlhttprequest"): - self.__xmlhttprequest = True - self.__xmlhttprequestException = option.startswith("~") + self.setOption(AdBlockRuleOption.XMLHttpRequestOption) + self.__setException(AdBlockRuleOption.XMLHttpRequestOption, + option.startswith("~")) handledOptions += 1 elif option.endswith("image"): - self.__image = True - self.__imageException = option.startswith("~") + self.setOption(AdBlockRuleOption.ImageOption) + self.__setException(AdBlockRuleOption.ImageOption, + option.startswith("~")) elif option.endswith("script"): - self.__script = True - self.__scriptException = option.startswith("~") + self.setOption(AdBlockRuleOption.ScriptOption) + self.__setException(AdBlockRuleOption.ScriptOption, + option.startswith("~")) elif option.endswith("stylesheet"): - self.__stylesheet = True - self.__stylesheetException = option.startswith("~") + self.setOption(AdBlockRuleOption.StyleSheetOption) + self.__setException(AdBlockRuleOption.StyleSheetOption, + option.startswith("~")) elif option.endswith("object-subrequest"): - self.__objectSubrequest = True - self.__objectSubrequestException = option.startswith("~") - elif option == "document" and self.__exception: - self.__document = True + self.setOption(AdBlockRuleOption.ObjectSubrequestOption) + self.__setException( + AdBlockRuleOption.ObjectSubrequestOption, + option.startswith("~")) + elif option.endswith("ping"): + self.setOption(AdBlockRuleOption.PingOption) + self.__setException(AdBlockRuleOption.PingOption, + option.startswith("~")) + elif option.endswith("media"): + self.setOption(AdBlockRuleOption.MediaOption) + self.__setException(AdBlockRuleOption.MediaOption, + option.startswith("~")) + elif option.endswith("font"): + self.setOption(AdBlockRuleOption.FontOption) + self.__setException(AdBlockRuleOption.FontOption, + option.startswith("~")) + elif option.endswith("other"): + self.setOption(AdBlockRuleOption.OtherOption) + self.__setException(AdBlockRuleOption.OtherOption, + option.startswith("~")) + elif option == "document" and self.__isException: + self.setOption(AdBlockRuleOption.DocumentOption) handledOptions += 1 - elif option == "elemhide" and self.__exception: - self.__elemhide = True + elif option == "elemhide" and self.__isException: + self.setOption(AdBlockRuleOption.ElementHideOption) handledOptions += 1 elif option == "collapse": - # Hiding placeholders of blocked elements + # Hiding placeholders of blocked elements is enabled by + # default handledOptions += 1 # If we don't handle all options, it's safer to just disable # this rule if handledOptions != len(options): - self.__internalDisabled = True + self.__isInternalDisabled = True + self.__type = AdBlockRuleType.Invalid return parsedLine = parsedLine[:optionsIndex] @@ -212,57 +278,66 @@ # Rule is classic regexp if parsedLine.startswith("/") and parsedLine.endswith("/"): parsedLine = parsedLine[1:-1] - self.__useRegExp = True + self.__type = AdBlockRuleType.RegExpMatchRule self.__regExp = QRegExp(parsedLine, self.__caseSensitivity, QRegExp.RegExp) + self.__stringMatchers = self.__parseRegExpFilter(parsedLine) return - # Remove starting / ending wildcards + # Remove starting / ending wildcards (*) if parsedLine.startswith("*"): parsedLine = parsedLine[1:] if parsedLine.endswith("*"): parsedLine = parsedLine[:-1] - # Fast string matching for domain can be used - if parsedLine.startswith("||") and \ - parsedLine.endswith("^") and \ - QRegExp("[/:?=&\\*]").indexIn(parsedLine) == -1: + # Fast string matching for domain here + if self.__filterIsOnlyDomain(parsedLine): parsedLine = parsedLine[2:-1] - self.__useDomainMatch = True + self.__type = AdBlockRuleType.DomainMatchRule self.__matchString = parsedLine return # If rule contains '|' only at the end, string matching can be used - if parsedLine.endswith("|") and \ - QRegExp("[\\^\\*]").indexIn(parsedLine) == -1 and \ - parsedLine.count("|") == 1: + if self.__filterIsOnlyEndsMatch(parsedLine): parsedLine = parsedLine[:-1] - self.__useEndsMatch = True + self.__type = AdBlockRuleType.StringEndsMatchRule self.__matchString = parsedLine return # If there is still a wildcard (*) or separator (^) or (|), # the rule must be modified to comply with QRegExp. if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: + self.__type = AdBlockRuleType.RegExpMatchRule pattern = self.__convertPatternToRegExp(parsedLine) - self.__useRegExp = True self.__regExp = QRegExp(pattern, self.__caseSensitivity, QRegExp.RegExp) + self.__stringMatchers = self.__parseRegExpFilter(parsedLine) + return + + # This rule matches all URLs + if len(parsedLine) == 0: + if self.__options == AdBlockRuleOption.NoOption: + self.__isInternalDisabled = True + self.__type = AdBlockRuleType.Invalid + return + + self.__type = AdBlockRuleType.MatchAllUrlsRule return # no regexp required - self.__useRegExp = False + self.__type = AdBlockRuleType.StringContainsMatchRule self.__matchString = parsedLine - self.__stringMatchRule = True def __parseDomains(self, domains, separator): """ Private method to parse a string with a domain list. - @param domains list of domains (string) - @param separator separator character used by the list (string) + @param domains list of domains + @type str + @param separator separator character used by the list + @type str """ - domainsList = domains.split(separator) + domainsList = [d for d in domains.split(separator) if d] for domain in domainsList: if not domain: @@ -272,8 +347,8 @@ else: self.__allowedDomains.append(domain) - self.__domainRestricted = \ - bool(self.__blockedDomains) or bool(self.__allowedDomains) + if bool(self.__blockedDomains) or bool(self.__allowedDomains): + self.setOption(AdBlockRuleOption.DomainRestrictedOption) def networkMatch(self, request, domain, encodedUrl): """ @@ -288,48 +363,72 @@ @return flag indicating a match @rtype bool """ - if self.__cssRule or not self.__enabled or self.__internalDisabled: + if self.__type == AdBlockRuleType.CssRule or \ + not self.__isEnabled or \ + self.__isInternalDisabled: return False matched = self.__stringMatch(domain, encodedUrl) if matched: # check domain restrictions - if self.__domainRestricted and \ - not self.matchDomain(request.firstPartyUrl().host()): + if self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and \ + not self.matchDomain(request.firstPartyUrl().host()): return False # check third-party restrictions - if self.__thirdParty and not self.matchThirdParty(request): + if self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and \ + not self.matchThirdParty(request): return False # check object restrictions - if self.__object and not self.matchObject(request): + if self.__hasOption(AdBlockRuleOption.ObjectOption) and \ + not self.matchObject(request): return False # check subdocument restrictions - if self.__subdocument and not self.matchSubdocument(request): + if self.__hasOption(AdBlockRuleOption.SubdocumentOption) and \ + not self.matchSubdocument(request): return False # check xmlhttprequest restriction - if self.__xmlhttprequest and not self.matchXmlHttpRequest(request): + if self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and \ + not self.matchXmlHttpRequest(request): return False # check image restriction - if self.__image and not self.matchImage(request): + if self.__hasOption(AdBlockRuleOption.ImageOption) and \ + not self.matchImage(request): return False # check script restriction - if self.__script and not self.matchScript(request): + if self.__hasOption(AdBlockRuleOption.ScriptOption) and \ + not self.matchScript(request): return False # check stylesheet restriction - if self.__stylesheet and not self.matchStyleSheet(request): + if self.__hasOption(AdBlockRuleOption.StyleSheetOption) and \ + not self.matchStyleSheet(request): return False # check object-subrequest restriction - if self.__objectSubrequest and \ - not self.matchObjectSubrequest(request): + if self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and \ + not self.matchObjectSubrequest(request): + return False + + # check ping restriction + if self.__hasOption(AdBlockRuleOption.PingOption) and \ + not self.matchPing(request): + return False + + # check media restriction + if self.__hasOption(AdBlockRuleOption.MediaOption) and \ + not self.matchMedia(request): + return False + + # check font restriction + if self.__hasOption(AdBlockRuleOption.FontOption) and \ + not self.matchFont(request): return False return matched @@ -338,10 +437,13 @@ """ Public method to check an URL against the rule. - @param url URL to check (QUrl) - @return flag indicating a match (boolean) + @param url URL to check + @type QUrl + @return flag indicating a match + @rtype bool """ - if not self.__document and not self.__elemhide: + if not self.__hasOption(AdBlockRuleOption.DocumentOption) and \ + not self.__hasOption(AdBlockRuleOption.ElementHideOption): return False encodedUrl = bytes(url.toEncoded()).decode() @@ -359,26 +461,28 @@ @return flag indicating a match @rtype bool """ - if self.__cssRule or not self.__enabled or self.__internalDisabled: - return False - matched = False - if self.__useRegExp: - matched = self.__regExp.indexIn(encodedUrl) != -1 - elif self.__useDomainMatch: - matched = domain.endswith(self.__matchString) - elif self.__useEndsMatch: + if self.__type == AdBlockRuleType.StringContainsMatchRule: + if self.__caseSensitivity == Qt.CaseInsensitive: + matched = self.__matchString.lower() in encodedUrl.lower() + else: + matched = self.__matchString in encodedUrl + elif self.__type == AdBlockRuleType.DomainMatchRule: + matched = self.__isMatchingDomain(domain, self.__matchString) + elif self.__type == AdBlockRuleType.StringEndsMatchRule: if self.__caseSensitivity == Qt.CaseInsensitive: matched = encodedUrl.lower().endswith( self.__matchString.lower()) else: matched = encodedUrl.endswith(self.__matchString) - else: - if self.__caseSensitivity == Qt.CaseInsensitive: - matched = self.__matchString.lower() in encodedUrl.lower() + elif self.__type == AdBlockRuleType.RegExpMatchRule: + if not self.__isMatchingRegExpStrings(encodedUrl): + matched = False else: - matched = self.__matchString in encodedUrl + matched = self.__regExp.indexIn(encodedUrl) != -1 + elif self.__type == AdBlockRuleType.MatchAllUrlsRule: + matched = True return matched @@ -386,40 +490,44 @@ """ Public method to match a domain. - @param domain domain name to check (string) - @return flag indicating a match (boolean) + @param domain domain name to check + @type str + @return flag indicating a match + @rtype bool """ - if not self.__enabled: + if not self.__isEnabled: return False - if not self.__domainRestricted: + if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): return True if len(self.__blockedDomains) == 0: for dom in self.__allowedDomains: - if domain.endswith(dom): + if self.__isMatchingDomain(domain, dom): return True elif len(self.__allowedDomains) == 0: for dom in self.__blockedDomains: - if domain.endswith(dom): + if self.__isMatchingDomain(domain, dom): return False return True else: for dom in self.__blockedDomains: - if domain.endswith(dom): + if self.__isMatchingDomain(domain, dom): return False for dom in self.__allowedDomains: - if domain.endswith(dom): + if self.__isMatchingDomain(domain, dom): return True return False def matchThirdParty(self, req): """ - Public slot to match a third-party rule. + Public method to match a third-party rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype boolean """ # Third-party matching should be performed on second-level domains firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) @@ -427,109 +535,123 @@ match = firstPartyHost != host - if self.__thirdPartyException: + if self.__hasException(AdBlockRuleOption.ThirdPartyOption): return not match else: return match def matchObject(self, req): """ - Public slot to match an object rule. + Public method to match an object rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool """ match = ( req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeObject) - if self.__objectException: + if self.__hasException(AdBlockRuleOption.ObjectOption): return not match else: return match def matchSubdocument(self, req): """ - Public slot to match a sub-document rule. + Public method to match a sub-document rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype boolean """ match = ( req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeSubFrame) - if self.__subdocumentException: + if self.__hasException(AdBlockRuleOption.SubdocumentOption): return not match else: return match def matchXmlHttpRequest(self, req): """ - Public slot to match a XmlHttpRequest rule. + Public method to match a XmlHttpRequest rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool """ match = ( req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr) - if self.__xmlhttprequestException: + if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): return not match else: return match def matchImage(self, req): """ - Public slot to match an Image rule. + Public method to match an Image rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool """ match = ( req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeImage) - if self.__imageException: + if self.__hasException(AdBlockRuleOption.ImageOption): return not match else: return match def matchScript(self, req): """ - Public slot to match a Script rule. + Public method to match a Script rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool """ match = ( req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeScript) - if self.__scriptException: + if self.__hasException(AdBlockRuleOption.ScriptOption): return not match else: return match def matchStyleSheet(self, req): """ - Public slot to match a StyleSheet rule. + Public method to match a StyleSheet rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool """ match = ( req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeStylesheet) - if self.__stylesheetException: + if self.__hasException(AdBlockRuleOption.StyleSheetOption): return not match else: return match def matchObjectSubrequest(self, req): """ - Public slot to match an Object Subrequest rule. + Public method to match an Object Subrequest rule. - @param req request object to check (QWebEngineUrlRequestInfo) - @return flag indicating a match (boolean) + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype boolean """ match = ( req.resourceType() == @@ -544,87 +666,169 @@ else: return match + def matchPing(self, req): + """ + Public method to match a Ping rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypePing) + + if self.__hasException(AdBlockRuleOption.PingOption): + return not match + else: + return match + + def matchMedia(self, req): + """ + Public method to match a Media rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeMedia) + + if self.__hasException(AdBlockRuleOption.MediaOption): + return not match + else: + return match + + def matchFont(self, req): + """ + Public method to match a Font rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == + QWebEngineUrlRequestInfo.ResourceTypeFontResource) + + if self.__hasException(AdBlockRuleOption.FontOption): + return not match + else: + return match + + def matchOther(self, req): + """ + Public method to match any other rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = req.resourceType() in [ + QWebEngineUrlRequestInfo.ResourceTypeSubResource, + QWebEngineUrlRequestInfo.ResourceTypeWorker, + QWebEngineUrlRequestInfo.ResourceTypeSharedWorker, + QWebEngineUrlRequestInfo.ResourceTypeServiceWorker, + QWebEngineUrlRequestInfo.ResourceTypePrefetch, + QWebEngineUrlRequestInfo.ResourceTypeFavicon, + QWebEngineUrlRequestInfo.ResourceTypeUnknown, + ] + + if self.__hasException(AdBlockRuleOption.OtherOption): + return not match + else: + return match + def isException(self): """ Public method to check, if the rule defines an exception. - @return flag indicating an exception (boolean) + @return flag indicating an exception + @rtype bool """ - return self.__exception + return self.__isException def setException(self, exception): """ Public method to set the rule's exception flag. - @param exception flag indicating an exception rule (boolean) + @param exception flag indicating an exception rule + @type bool """ - self.__exception = exception + self.__isException = exception def isEnabled(self): """ Public method to check, if the rule is enabled. - @return flag indicating enabled state (boolean) + @return flag indicating enabled state + @rtype bool """ - return self.__enabled + return self.__isEnabled def setEnabled(self, enabled): """ Public method to set the rule's enabled state. - @param enabled flag indicating the new enabled state (boolean) + @param enabled flag indicating the new enabled state + @type bool """ - self.__enabled = enabled - if not enabled: - self.__filter = "!" + self.__filter - else: - self.__filter = self.__filter[1:] + self.__isEnabled = enabled def isCSSRule(self): """ Public method to check, if the rule is a CSS rule. - @return flag indicating a CSS rule (boolean) + @return flag indicating a CSS rule + @rtype bool """ - return self.__cssRule + return self.__type == AdBlockRuleType.CssRule def cssSelector(self): """ Public method to get the CSS selector of the rule. - @return CSS selector (string) + @return CSS selector + @rtype str """ - return self.__cssSelector + return self.__matchString def isDocument(self): """ Public method to check, if this is a document rule. - @return flag indicating a document rule (boolean) + @return flag indicating a document rule + @rtype bool """ - return self.__document + return self.__hasOption(AdBlockRuleOption.DocumentOption) def isElementHiding(self): """ Public method to check, if this is an element hiding rule. - @return flag indicating an element hiding rule (boolean) + @return flag indicating an element hiding rule + @rtype bool """ - return self.__elemhide + return self.__hasOption(AdBlockRuleOption.ElementHideOption) def isDomainRestricted(self): """ Public method to check, if this rule is restricted by domain. - @return flag indicating a domain restriction (boolean) + @return flag indicating a domain restriction + @rtype bool """ - return self.__domainRestricted + return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) def isComment(self): """ Public method to check, if this is a comment. - @return flag indicating a comment (boolean) + @return flag indicating a comment + @rtype bool """ return self.__filter.startswith("!") @@ -632,7 +836,8 @@ """ Public method to check, if this is a header. - @return flag indicating a header (boolean) + @return flag indicating a header + @rtype bool """ return self.__filter.startswith("[Adblock") @@ -640,24 +845,28 @@ """ Public method to check, if this is a slow rule. - @return flag indicating a slow rule (boolean) + @return flag indicating a slow rule + @rtype bool """ - return self.__useRegExp + return self.__regExp is not None def isInternalDisabled(self): """ Public method to check, if this rule was disabled internally. - @return flag indicating an internally disabled rule (boolean) + @return flag indicating an internally disabled rule + @rtype bool """ - return self.__internalDisabled + return self.__isInternalDisabled def __convertPatternToRegExp(self, wildcardPattern): """ Private method to convert a wildcard pattern to a regular expression. - @param wildcardPattern string containing the wildcard pattern (string) - @return string containing a regular expression (string) + @param wildcardPattern string containing the wildcard pattern + @type str + @return string containing a regular expression + @rtype string """ pattern = wildcardPattern @@ -685,3 +894,264 @@ pattern = re.sub(r"\\\*", ".*", pattern) return pattern + + def __hasOption(self, opt): + """ + Private method to check, if the given option has been set. + + @param opt option to check for + @type AdBlockRuleOption + @return flag indicating the state of the option + @rtype bool + """ + return bool(self.__options & opt) + + def setOption(self, opt): + """ + Public method to set the given option. + + @param opt option to be set + @type AdBlockRuleOption + """ + self.__options |= opt + + def __hasException(self, opt): + """ + Private method to check, if the given option has been set as an + exception. + + @param opt option to check for + @type AdBlockRuleOption + @return flag indicating the exception state of the option + @rtype bool + """ + return bool(self.__exceptions & opt) + + def __setException(self, opt, on): + """ + Private method to set the given option as an exception. + + @param opt option to be set + @type AdBlockRuleOption + @param on flag indicating to set or unset the exception + @type bool + """ + if on: + self.__exceptions |= opt + else: + self.__exceptions &= ~opt + + def __filterIsOnlyDomain(self, filterString): + """ + Private method to check, if the given filter is a domain only filter. + + @param filterString filter string to be checked + @type str + @return flag indicating a domain only filter + @rtype bool + """ + if not filterString.endswith("^") or not filterString.startswith("||"): + return False + + for filterChar in filterString: + if filterChar in ["/", ":", "?", "=", "&", "*"]: + return False + + return True + + def __filterIsOnlyEndsMatch(self, filterString): + """ + Private method to check, if the given filter is to match against the + end of a string. + + @param filterString filter string to be checked + @type str + @return flag indicating a end of string match filter + @rtype bool + """ + index = 0 + for filterChar in filterString: + if filterChar in ["^", "*"]: + return False + elif filterChar == "|": + return bool(index == len(filterString) - 1) + index += 1 + + return False + + def __isMatchingDomain(self, domain, filterString): + """ + Private method to check, if a given domain matches the given filter + string. + + @param domain domain to be checked + @type str + @param filterString filter string to check against + @type str + @return flag indicating a match + @rtype bool + """ + if filterString == domain: + return True + + if not domain.endswith(filterString): + return False + + index = domain.find(filterString) + + return bool(index > 0 and domain[index - 1] == ".") + + def __isMatchingRegExpStrings(self, url): + """ + Private method to check the given URL against the fixed parts of + the regexp. + + @param url URL to be checked + @type str + @return flag indicating a match + @rtype bool + """ + assert self.__regExp is not None + + for matcher in self.__stringMatchers: + if matcher not in url: + return False + + return True + + def __parseRegExpFilter(self, filterString): + """ + Private method to split the given regular expression into strings that + can be used with 'in'. + + @param filterString regexp filter string to be parsed + @type str + @return fixed string parts of the filter + @rtype list of str + """ + matchers = [] + + startPos = -1 + for index in range(len(filterString)): + filterChar = filterString[index] + if filterChar in ["|", "*", "^"]: + sub = filterString[startPos:index] + if len(sub) > 1: + matchers.append(sub) + startPos = index + 1 + + sub = filterString[startPos:] + if len(sub) > 1: + matchers.append(sub) + + return list(set(matchers)) + + def ruleType(self): + """ + Public method to get the rule type. + + @return rule type + @rtype AdBlockRuleType + """ + return self.__type + + def ruleOptions(self): + """ + Public method to get the rule options. + + @return rule options + @rtype AdBlockRuleOption + """ + return self.__options + + def ruleExceptions(self): + """ + Public method to get the rule exceptions. + + @return rule exceptions + @rtype AdBlockRuleOption + """ + return self.__exceptions + + def matchString(self): + """ + Public method to get the match string. + + @return match string + @rtype str + """ + return self.__matchString + + def caseSensitivity(self): + """ + Public method to get the case sensitivity. + + @return case sensitivity + @rtype Qt.CaseSensitivity + """ + return self.__caseSensitivity + + def allowedDomains(self): + """ + Public method to get a copy of the list of allowed domains. + + @return list of allowed domains + @rtype list of str + """ + return self.__allowedDomains[:] + + def blockedDomains(self): + """ + Public method to get a copy of the list of blocked domains. + + @return list of blocked domains + @rtype list of str + """ + return self.__blockedDomains[:] + + def addBlockedDomains(self, domains): + """ + Public method to add to the list of blocked domains. + + @param domains list of domains to be added + @type str or list of str + """ + if isinstance(domains, list): + self.__blockedDomains.extend(domains) + else: + self.__blockedDomains.append(domains) + + def getRegExpAndMatchers(self): + """ + Public method to get the regular expression and associated string + matchers. + + @return tuple containing the regular expression and the list of + string matchers + @rtype tuple of (QRegExp, list of str) + """ + if self.__regExp is not None: + return (QRegExp(self.__regExp), self.__stringMatchers[:]) + else: + return (None, []) + + def copyFrom(self, other): + """ + Public method to copy another AdBlock rule. + + @param other reference to the AdBlock rule to copy from + @type AdBlockRule + """ + self.__subscription = other.subscription() + self.__type = other.ruleType() + self.__options = other.ruleOptions() + self.__exceptions = other.ruleExceptions() + self.__filter = other.filter() + self.__matchString = other.matchString() + self.__caseSensitivity = other.caseSensitivity() + self.__isEnabled = other.isEnabled() + self.__isException = other.isException() + self.__isInternalDisabled = other.isInternalDisabled() + self.__allowedDomains = other.allowedDomains() + self.__blockedDomains = other.blockedDomains() + self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()