--- a/src/eric7/WebBrowser/AdBlock/AdBlockRule.py Wed Jul 13 11:16:20 2022 +0200 +++ b/src/eric7/WebBrowser/AdBlock/AdBlockRule.py Wed Jul 13 14:55:47 2022 +0200 @@ -19,7 +19,7 @@ def toSecondLevelDomain(url): """ Module function to get a second level domain from the given URL. - + @param url URL to extract domain from @type QUrl @return name of second level domain @@ -27,17 +27,17 @@ """ topLevelDomain = EricTldExtractor.instance().tld(url.host()) urlHost = url.host() - + if not topLevelDomain or not urlHost: return "" - - domain = urlHost[:len(urlHost) - len(topLevelDomain)] + + domain = urlHost[: len(urlHost) - len(topLevelDomain)] if domain.count(".") == 0: return urlHost - + while domain.count(".") != 0: - domain = domain[domain.find(".") + 1:] - + domain = domain[domain.find(".") + 1 :] + return domain + topLevelDomain @@ -45,6 +45,7 @@ """ Class implementing the rule type enum. """ + CssRule = 0 DomainMatchRule = 1 RegExpMatchRule = 2 @@ -58,6 +59,7 @@ """ Class implementing the rule option enum. """ + NoOption = 0 DomainRestrictedOption = 1 ThirdPartyOption = 2 @@ -72,7 +74,7 @@ MediaOption = 1024 FontOption = 2048 OtherOption = 4096 - + # Exception only options DocumentOption = 8192 ElementHideOption = 16384 @@ -82,115 +84,114 @@ """ Class implementing the AdBlock rule. """ + def __init__(self, filterRule="", subscription=None): """ Constructor - + @param filterRule filter string of the rule @type str @param subscription reference to the subscription object @type AdBlockSubscription """ self.__subscription = subscription - + self.__regExp = None self.__stringMatchers = [] - + self.__blockedDomains = [] self.__allowedDomains = [] - + self.__isEnabled = True self.__isException = False self.__isInternalDisabled = False self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive - + self.__type = AdBlockRuleType.StringContainsMatchRule self.__options = AdBlockRuleOption.NoOption self.__exceptions = AdBlockRuleOption.NoOption - + self.setFilter(filterRule) - + def subscription(self): """ Public method to get the subscription this rule belongs to. - + @return subscription of the rule @rtype AdBlockSubscription """ return self.__subscription - + def setSubscription(self, subscription): """ Public method to set the subscription this rule belongs to. - + @param subscription subscription of the rule @type AdBlockSubscription """ self.__subscription = subscription - + def filter(self): """ Public method to get the rule filter string. - + @return rule filter string @rtype str """ return self.__filter - + def setFilter(self, filterRule): """ Public method to set the rule filter string. - + @param filterRule rule filter string @type str """ self.__filter = filterRule self.__parseFilter() - + def __parseFilter(self): """ Private method to parse the filter pattern. """ parsedLine = self.__filter - + # empty rule or just a comment if not parsedLine.strip() or parsedLine.startswith("!"): self.__isEnabled = False self.__isInternalDisabled = True self.__type = AdBlockRuleType.Invalid return - + # CSS element hiding rule if "##" in parsedLine or "#@#" in parsedLine: self.__type = AdBlockRuleType.CssRule pos = parsedLine.find("#") - + # domain restricted rule if not parsedLine.startswith("##"): domains = parsedLine[:pos] self.__parseDomains(domains, ",") - + self.__isException = parsedLine[pos + 1] == "@" if self.__isException: - self.__matchString = parsedLine[pos + 3:] + self.__matchString = parsedLine[pos + 3 :] else: - self.__matchString = parsedLine[pos + 2:] - + self.__matchString = parsedLine[pos + 2 :] + # CSS rule cannot have more options -> stop parsing return - + # Exception always starts with @@ if parsedLine.startswith("@@"): self.__isException = True parsedLine = parsedLine[2:] - + # Parse all options following '$' character optionsIndex = parsedLine.find("$") if optionsIndex >= 0: - options = [opt - for opt in parsedLine[optionsIndex + 1:].split(",") - if opt] - + options = [opt for opt in parsedLine[optionsIndex + 1 :].split(",") if opt] + handledOptions = 0 for option in options: if option.startswith("domain="): @@ -201,57 +202,68 @@ handledOptions += 1 elif option.endswith("third-party"): self.setOption(AdBlockRuleOption.ThirdPartyOption) - self.__setException(AdBlockRuleOption.ThirdPartyOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.ThirdPartyOption, option.startswith("~") + ) handledOptions += 1 elif option.endswith("object"): self.setOption(AdBlockRuleOption.ObjectOption) - self.__setException(AdBlockRuleOption.ObjectOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.ObjectOption, option.startswith("~") + ) handledOptions += 1 elif option.endswith("subdocument"): self.setOption(AdBlockRuleOption.SubdocumentOption) - self.__setException(AdBlockRuleOption.SubdocumentOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.SubdocumentOption, option.startswith("~") + ) handledOptions += 1 elif option.endswith("xmlhttprequest"): self.setOption(AdBlockRuleOption.XMLHttpRequestOption) - self.__setException(AdBlockRuleOption.XMLHttpRequestOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.XMLHttpRequestOption, option.startswith("~") + ) handledOptions += 1 elif option.endswith("image"): self.setOption(AdBlockRuleOption.ImageOption) - self.__setException(AdBlockRuleOption.ImageOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.ImageOption, option.startswith("~") + ) elif option.endswith("script"): self.setOption(AdBlockRuleOption.ScriptOption) - self.__setException(AdBlockRuleOption.ScriptOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.ScriptOption, option.startswith("~") + ) elif option.endswith("stylesheet"): self.setOption(AdBlockRuleOption.StyleSheetOption) - self.__setException(AdBlockRuleOption.StyleSheetOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.StyleSheetOption, option.startswith("~") + ) elif option.endswith("object-subrequest"): self.setOption(AdBlockRuleOption.ObjectSubrequestOption) self.__setException( - AdBlockRuleOption.ObjectSubrequestOption, - option.startswith("~")) + AdBlockRuleOption.ObjectSubrequestOption, option.startswith("~") + ) elif option.endswith("ping"): self.setOption(AdBlockRuleOption.PingOption) - self.__setException(AdBlockRuleOption.PingOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.PingOption, option.startswith("~") + ) elif option.endswith("media"): self.setOption(AdBlockRuleOption.MediaOption) - self.__setException(AdBlockRuleOption.MediaOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.MediaOption, option.startswith("~") + ) elif option.endswith("font"): self.setOption(AdBlockRuleOption.FontOption) - self.__setException(AdBlockRuleOption.FontOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.FontOption, option.startswith("~") + ) elif option.endswith("other"): self.setOption(AdBlockRuleOption.OtherOption) - self.__setException(AdBlockRuleOption.OtherOption, - option.startswith("~")) + self.__setException( + AdBlockRuleOption.OtherOption, option.startswith("~") + ) elif option == "document" and self.__isException: self.setOption(AdBlockRuleOption.DocumentOption) handledOptions += 1 @@ -262,16 +274,16 @@ # Hiding placeholders of blocked elements is enabled by # default handledOptions += 1 - + # If we don't handle all options, it's safer to just disable # this rule if handledOptions != len(options): self.__isInternalDisabled = True self.__type = AdBlockRuleType.Invalid return - + parsedLine = parsedLine[:optionsIndex] - + # Rule is classic regexp if parsedLine.startswith("/") and parsedLine.endswith("/"): parsedLine = parsedLine[1:-1] @@ -282,27 +294,27 @@ self.__regExp = re.compile(parsedLine, re.IGNORECASE) self.__stringMatchers = self.__parseRegExpFilter(parsedLine) return - + # Remove starting / ending wildcards (*) if parsedLine.startswith("*"): parsedLine = parsedLine[1:] if parsedLine.endswith("*"): parsedLine = parsedLine[:-1] - + # Fast string matching for domain here if self.__filterIsOnlyDomain(parsedLine): parsedLine = parsedLine[2:-1] self.__type = AdBlockRuleType.DomainMatchRule self.__matchString = parsedLine return - + # If rule contains '|' only at the end, string matching can be used if self.__filterIsOnlyEndsMatch(parsedLine): parsedLine = parsedLine[:-1] self.__type = AdBlockRuleType.StringEndsMatchRule self.__matchString = parsedLine return - + # If there is still a wildcard (*) or separator (^) or (|), # the rule must be modified to comply with re. if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: @@ -314,32 +326,32 @@ self.__regExp = re.compile(pattern, re.IGNORECASE) self.__stringMatchers = self.__parseRegExpFilter(parsedLine) return - + # This rule matches all URLs if len(parsedLine) == 0: if self.__options == AdBlockRuleOption.NoOption: self.__isInternalDisabled = True self.__type = AdBlockRuleType.Invalid return - + self.__type = AdBlockRuleType.MatchAllUrlsRule return - + # no regexp required self.__type = AdBlockRuleType.StringContainsMatchRule self.__matchString = parsedLine - + def __parseDomains(self, domains, separator): """ Private method to parse a string with a domain list. - + @param domains list of domains @type str @param separator separator character used by the list @type str """ domainsList = [d for d in domains.split(separator) if d] - + for domain in domainsList: if not domain: continue @@ -347,14 +359,14 @@ self.__blockedDomains.append(domain[1:]) else: self.__allowedDomains.append(domain) - + if bool(self.__blockedDomains) or bool(self.__allowedDomains): self.setOption(AdBlockRuleOption.DomainRestrictedOption) - + def networkMatch(self, request, domain, encodedUrl): """ Public method to check the rule for a match. - + @param request reference to the network request @type QWebEngineUrlRequestInfo @param domain domain name @@ -365,124 +377,111 @@ @rtype bool """ if ( - self.__type == AdBlockRuleType.CssRule or - not self.__isEnabled or - self.__isInternalDisabled + self.__type == AdBlockRuleType.CssRule + or not self.__isEnabled + or self.__isInternalDisabled ): return False - + matched = self.__stringMatch(domain, encodedUrl) - + if matched: # check domain restrictions - if ( - self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and - not self.matchDomain(request.firstPartyUrl().host()) - ): + if self.__hasOption( + AdBlockRuleOption.DomainRestrictedOption + ) and not self.matchDomain(request.firstPartyUrl().host()): return False - + # check third-party restrictions - if ( - self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and - not self.matchThirdParty(request) - ): + if self.__hasOption( + AdBlockRuleOption.ThirdPartyOption + ) and not self.matchThirdParty(request): return False - + # check object restrictions - if ( - self.__hasOption(AdBlockRuleOption.ObjectOption) and - not self.matchObject(request) - ): + if self.__hasOption( + AdBlockRuleOption.ObjectOption + ) and not self.matchObject(request): return False - + # check subdocument restrictions - if ( - self.__hasOption(AdBlockRuleOption.SubdocumentOption) and - not self.matchSubdocument(request) - ): + if self.__hasOption( + AdBlockRuleOption.SubdocumentOption + ) and not self.matchSubdocument(request): return False - + # check xmlhttprequest restriction - if ( - self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and - not self.matchXmlHttpRequest(request) + if self.__hasOption( + AdBlockRuleOption.XMLHttpRequestOption + ) and not self.matchXmlHttpRequest(request): + return False + + # check image restriction + if self.__hasOption(AdBlockRuleOption.ImageOption) and not self.matchImage( + request ): return False - - # check image restriction - if ( - self.__hasOption(AdBlockRuleOption.ImageOption) and - not self.matchImage(request) - ): - return False - + # check script restriction - if ( - self.__hasOption(AdBlockRuleOption.ScriptOption) and - not self.matchScript(request) - ): + if self.__hasOption( + AdBlockRuleOption.ScriptOption + ) and not self.matchScript(request): + return False + + # check stylesheet restriction + if self.__hasOption( + AdBlockRuleOption.StyleSheetOption + ) and not self.matchStyleSheet(request): return False - - # check stylesheet restriction - if ( - self.__hasOption(AdBlockRuleOption.StyleSheetOption) and - not self.matchStyleSheet(request) + + # check object-subrequest restriction + if self.__hasOption( + AdBlockRuleOption.ObjectSubrequestOption + ) and not self.matchObjectSubrequest(request): + return False + + # check ping restriction + if self.__hasOption(AdBlockRuleOption.PingOption) and not self.matchPing( + request ): return False - - # check object-subrequest restriction - if ( - self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and - not self.matchObjectSubrequest(request) - ): - return False - - # check ping restriction - if ( - self.__hasOption(AdBlockRuleOption.PingOption) and - not self.matchPing(request) + + # check media restriction + if self.__hasOption(AdBlockRuleOption.MediaOption) and not self.matchMedia( + request ): return False - - # check media restriction - if ( - self.__hasOption(AdBlockRuleOption.MediaOption) and - not self.matchMedia(request) + + # check font restriction + if self.__hasOption(AdBlockRuleOption.FontOption) and not self.matchFont( + request ): return False - - # check font restriction - if ( - self.__hasOption(AdBlockRuleOption.FontOption) and - not self.matchFont(request) - ): - return False - + return matched - + def urlMatch(self, url): """ Public method to check an URL against the rule. - + @param url URL to check @type QUrl @return flag indicating a match @rtype bool """ - if ( - not self.__hasOption(AdBlockRuleOption.DocumentOption) and - not self.__hasOption(AdBlockRuleOption.ElementHideOption) - ): + if not self.__hasOption( + AdBlockRuleOption.DocumentOption + ) and not self.__hasOption(AdBlockRuleOption.ElementHideOption): return False - + encodedUrl = bytes(url.toEncoded()).decode() domain = url.host() return self.__stringMatch(domain, encodedUrl) - + def __stringMatch(self, domain, encodedUrl): """ Private method to match a domain string. - + @param domain domain to match @type str @param encodedUrl URL in encoded form @@ -491,7 +490,7 @@ @rtype bool """ matched = False - + if self.__type == AdBlockRuleType.StringContainsMatchRule: if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: matched = self.__matchString.lower() in encodedUrl.lower() @@ -501,8 +500,7 @@ matched = self.__isMatchingDomain(domain, self.__matchString) elif self.__type == AdBlockRuleType.StringEndsMatchRule: if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: - matched = encodedUrl.lower().endswith( - self.__matchString.lower()) + matched = encodedUrl.lower().endswith(self.__matchString.lower()) else: matched = encodedUrl.endswith(self.__matchString) elif self.__type == AdBlockRuleType.RegExpMatchRule: @@ -512,13 +510,13 @@ matched = self.__regExp.search(encodedUrl) is not None elif self.__type == AdBlockRuleType.MatchAllUrlsRule: matched = True - + return matched - + def matchDomain(self, domain): """ Public method to match a domain. - + @param domain domain name to check @type str @return flag indicating a match @@ -526,28 +524,31 @@ """ if not self.__isEnabled: return False - + if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): return True - + if len(self.__blockedDomains) == 0: - return any(self.__isMatchingDomain(domain, dom) - for dom in self.__allowedDomains) + return any( + self.__isMatchingDomain(domain, dom) for dom in self.__allowedDomains + ) elif len(self.__allowedDomains) == 0: - return all(not self.__isMatchingDomain(domain, dom) - for dom in self.__blockedDomains) + return all( + not self.__isMatchingDomain(domain, dom) + for dom in self.__blockedDomains + ) else: - return ( - all(not self.__isMatchingDomain(domain, dom) - for dom in self.__blockedDomains) and - any(self.__isMatchingDomain(domain, dom) - for dom in self.__allowedDomains) + return all( + not self.__isMatchingDomain(domain, dom) + for dom in self.__blockedDomains + ) and any( + self.__isMatchingDomain(domain, dom) for dom in self.__allowedDomains ) - + def matchThirdParty(self, req): """ Public method to match a third-party rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @@ -556,203 +557,210 @@ # Third-party matching should be performed on second-level domains firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) host = toSecondLevelDomain(req.requestUrl()) - + match = firstPartyHost != host - + if self.__hasException(AdBlockRuleOption.ThirdPartyOption): return not match else: return match - + def matchObject(self, req): """ Public method to match an object rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject + ) + if self.__hasException(AdBlockRuleOption.ObjectOption): return not match else: return match - + def matchSubdocument(self, req): """ Public method to match a sub-document rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype boolean """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame + ) + if self.__hasException(AdBlockRuleOption.SubdocumentOption): return not match else: return match - + def matchXmlHttpRequest(self, req): """ Public method to match a XmlHttpRequest rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr) - + req.resourceType() == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr + ) + if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): return not match else: return match - + def matchImage(self, req): """ Public method to match an Image rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage + ) + if self.__hasException(AdBlockRuleOption.ImageOption): return not match else: return match - + def matchScript(self, req): """ Public method to match a Script rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript + ) + if self.__hasException(AdBlockRuleOption.ScriptOption): return not match else: return match - + def matchStyleSheet(self, req): """ Public method to match a StyleSheet rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet + ) + if self.__hasException(AdBlockRuleOption.StyleSheetOption): return not match else: return match - + def matchObjectSubrequest(self, req): """ Public method to match an Object Subrequest rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype boolean """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource ) match = match or ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource ) - + if self.__objectSubrequestException: return not match else: return match - + def matchPing(self, req): """ Public method to match a Ping rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing) - + req.resourceType() == QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing + ) + if self.__hasException(AdBlockRuleOption.PingOption): return not match else: return match - + def matchMedia(self, req): """ Public method to match a Media rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia + ) + if self.__hasException(AdBlockRuleOption.MediaOption): return not match else: return match - + def matchFont(self, req): """ Public method to match a Font rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @rtype bool """ match = ( - req.resourceType() == - QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource) - + req.resourceType() + == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource + ) + if self.__hasException(AdBlockRuleOption.FontOption): return not match else: return match - + def matchOther(self, req): """ Public method to match any other rule. - + @param req request object to check @type QWebEngineUrlRequestInfo @return flag indicating a match @@ -767,140 +775,140 @@ QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon, QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown, ] - + if self.__hasException(AdBlockRuleOption.OtherOption): return not match else: return match - + def isException(self): """ Public method to check, if the rule defines an exception. - + @return flag indicating an exception @rtype bool """ return self.__isException - + def setException(self, exception): """ Public method to set the rule's exception flag. - + @param exception flag indicating an exception rule @type bool """ self.__isException = exception - + def isEnabled(self): """ Public method to check, if the rule is enabled. - + @return flag indicating enabled state @rtype bool """ return self.__isEnabled - + def setEnabled(self, enabled): """ Public method to set the rule's enabled state. - + @param enabled flag indicating the new enabled state @type bool """ self.__isEnabled = enabled - + def isCSSRule(self): """ Public method to check, if the rule is a CSS rule. - + @return flag indicating a CSS rule @rtype bool """ return self.__type == AdBlockRuleType.CssRule - + def cssSelector(self): """ Public method to get the CSS selector of the rule. - + @return CSS selector @rtype str """ return self.__matchString - + def isDocument(self): """ Public method to check, if this is a document rule. - + @return flag indicating a document rule @rtype bool """ return self.__hasOption(AdBlockRuleOption.DocumentOption) - + def isElementHiding(self): """ Public method to check, if this is an element hiding rule. - + @return flag indicating an element hiding rule @rtype bool """ return self.__hasOption(AdBlockRuleOption.ElementHideOption) - + def isDomainRestricted(self): """ Public method to check, if this rule is restricted by domain. - + @return flag indicating a domain restriction @rtype bool """ return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) - + def isComment(self): """ Public method to check, if this is a comment. - + @return flag indicating a comment @rtype bool """ return self.__filter.startswith("!") - + def isHeader(self): """ Public method to check, if this is a header. - + @return flag indicating a header @rtype bool """ return self.__filter.startswith("[Adblock") - + def isSlow(self): """ Public method to check, if this is a slow rule. - + @return flag indicating a slow rule @rtype bool """ return self.__regExp is not None - + def isInternalDisabled(self): """ Public method to check, if this rule was disabled internally. - + @return flag indicating an internally disabled rule @rtype bool """ return self.__isInternalDisabled - + def __convertPatternToRegExp(self, wildcardPattern): """ Private method to convert a wildcard pattern to a regular expression. - + @param wildcardPattern string containing the wildcard pattern @type str @return string containing a regular expression @rtype string """ pattern = wildcardPattern - + # remove multiple wildcards pattern = re.sub(r"\*+", "*", pattern) # remove anchors following separator placeholder @@ -912,9 +920,7 @@ # escape special symbols pattern = re.sub(r"(\W)", r"\\\1", pattern) # process extended anchor at expression start - pattern = re.sub( - r"^\\\|\\\|", - r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) + pattern = re.sub(r"^\\\|\\\|", r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) # process separator placeholders pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern) # process anchor at expression start @@ -923,45 +929,45 @@ pattern = re.sub(r"\\\|$", "$", pattern) # replace wildcards by .* pattern = re.sub(r"\\\*", ".*", pattern) - + return pattern - + def __hasOption(self, opt): """ Private method to check, if the given option has been set. - + @param opt option to check for @type AdBlockRuleOption @return flag indicating the state of the option @rtype bool """ return bool(self.__options & opt) - + def setOption(self, opt): """ Public method to set the given option. - + @param opt option to be set @type AdBlockRuleOption """ self.__options |= opt - + def __hasException(self, opt): """ Private method to check, if the given option has been set as an exception. - + @param opt option to check for @type AdBlockRuleOption @return flag indicating the exception state of the option @rtype bool """ return bool(self.__exceptions & opt) - + def __setException(self, opt, on): """ Private method to set the given option as an exception. - + @param opt option to be set @type AdBlockRuleOption @param on flag indicating to set or unset the exception @@ -971,11 +977,11 @@ self.__exceptions |= opt else: self.__exceptions &= ~opt - + def __filterIsOnlyDomain(self, filterString): """ Private method to check, if the given filter is a domain only filter. - + @param filterString filter string to be checked @type str @return flag indicating a domain only filter @@ -983,15 +989,17 @@ """ if not filterString.endswith("^") or not filterString.startswith("||"): return False - - return all(filterChar not in ["/", ":", "?", "=", "&", "*"] - for filterChar in filterString) - + + return all( + filterChar not in ["/", ":", "?", "=", "&", "*"] + for filterChar in filterString + ) + def __filterIsOnlyEndsMatch(self, filterString): """ Private method to check, if the given filter is to match against the end of a string. - + @param filterString filter string to be checked @type str @return flag indicating a end of string match filter @@ -1003,14 +1011,14 @@ return False elif filterChar == "|": return index == len(filterString) - 1 - + return False - + def __isMatchingDomain(self, domain, filterString): """ Private method to check, if a given domain matches the given filter string. - + @param domain domain to be checked @type str @param filterString filter string to check against @@ -1020,19 +1028,19 @@ """ if filterString == domain: return True - + if not domain.endswith(filterString): return False - + index = domain.find(filterString) - + return bool(index > 0 and domain[index - 1] == ".") - + def __isMatchingRegExpStrings(self, url): """ Private method to check the given URL against the fixed parts of the regexp. - + @param url URL to be checked @type str @return flag indicating a match @@ -1040,21 +1048,21 @@ """ if self.__regExp is not None: return all(matcher in url for matcher in self.__stringMatchers) - + return True - + def __parseRegExpFilter(self, filterString): """ Private method to split the given regular expression into strings that can be used with 'in'. - + @param filterString regexp filter string to be parsed @type str @return fixed string parts of the filter @rtype list of str """ matchers = [] - + startPos = -1 for index in range(len(filterString)): filterChar = filterString[index] @@ -1063,80 +1071,80 @@ if len(sub) > 1: matchers.append(sub) startPos = index + 1 - + sub = filterString[startPos:] if len(sub) > 1: matchers.append(sub) - + return list(set(matchers)) - + def ruleType(self): """ Public method to get the rule type. - + @return rule type @rtype AdBlockRuleType """ return self.__type - + def ruleOptions(self): """ Public method to get the rule options. - + @return rule options @rtype AdBlockRuleOption """ return self.__options - + def ruleExceptions(self): """ Public method to get the rule exceptions. - + @return rule exceptions @rtype AdBlockRuleOption """ return self.__exceptions - + def matchString(self): """ Public method to get the match string. - + @return match string @rtype str """ return self.__matchString - + def caseSensitivity(self): """ Public method to get the case sensitivity. - + @return case sensitivity @rtype Qt.CaseSensitivity """ return self.__caseSensitivity - + def allowedDomains(self): """ Public method to get a copy of the list of allowed domains. - + @return list of allowed domains @rtype list of str """ return self.__allowedDomains[:] - + def blockedDomains(self): """ Public method to get a copy of the list of blocked domains. - + @return list of blocked domains @rtype list of str """ return self.__blockedDomains[:] - + def addBlockedDomains(self, domains): """ Public method to add to the list of blocked domains. - + @param domains list of domains to be added @type str or list of str """ @@ -1144,26 +1152,25 @@ self.__blockedDomains.extend(domains) else: self.__blockedDomains.append(domains) - + def getRegExpAndMatchers(self): """ Public method to get the regular expression and associated string matchers. - + @return tuple containing the regular expression and the list of string matchers @rtype tuple of (re.Pattern, list of str) """ if self.__regExp is not None: - return (re.compile(self.__regExp.pattern), - self.__stringMatchers[:]) + return (re.compile(self.__regExp.pattern), self.__stringMatchers[:]) else: return (None, []) - + def copyFrom(self, other): """ Public method to copy another AdBlock rule. - + @param other reference to the AdBlock rule to copy from @type AdBlockRule """