--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eric6/WebBrowser/AdBlock/AdBlockRule.py Sun Apr 14 15:09:21 2019 +0200 @@ -0,0 +1,1157 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2009 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> +# + +""" +Module implementing the AdBlock rule class. +""" + +from __future__ import unicode_literals + +import re + +from enum import IntEnum + +from PyQt5.QtCore import PYQT_VERSION, Qt, QRegExp +from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo + +from Globals import qVersionTuple + + +def toSecondLevelDomain(url): + """ + Module function to get a second level domain from the given URL. + + @param url URL to extract domain from + @type QUrl + @return name of second level domain + @rtype str + """ + topLevelDomain = url.topLevelDomain() + urlHost = url.host() + + if not topLevelDomain or not urlHost: + return "" + + domain = urlHost[:len(urlHost) - len(topLevelDomain)] + if domain.count(".") == 0: + return urlHost + + while domain.count(".") != 0: + domain = domain[domain.find(".") + 1:] + + return domain + topLevelDomain + + +class AdBlockRuleType(IntEnum): + """ + Class implementing the rule type enum. + """ + CssRule = 0 + DomainMatchRule = 1 + RegExpMatchRule = 2 + StringEndsMatchRule = 3 + StringContainsMatchRule = 4 + MatchAllUrlsRule = 5 + Invalid = 6 + + +class AdBlockRuleOption(IntEnum): + """ + Class implementing the rule option enum. + """ + NoOption = 0 + DomainRestrictedOption = 1 + ThirdPartyOption = 2 + ObjectOption = 4 + SubdocumentOption = 8 + XMLHttpRequestOption = 16 + ImageOption = 32 + ScriptOption = 64 + StyleSheetOption = 128 + ObjectSubrequestOption = 256 + PingOption = 512 + MediaOption = 1024 + FontOption = 2048 + OtherOption = 4096 + + # Exception only options + DocumentOption = 8192 + ElementHideOption = 16384 + + +class AdBlockRule(object): + """ + Class implementing the AdBlock rule. + """ + def __init__(self, filterRule="", subscription=None): + """ + Constructor + + @param filterRule filter string of the rule + @type str + @param subscription reference to the subscription object + @type AdBlockSubscription + """ + self.__subscription = subscription + + self.__regExp = None + self.__stringMatchers = [] + + self.__blockedDomains = [] + self.__allowedDomains = [] + + self.__isEnabled = True + self.__isException = False + self.__isInternalDisabled = False + self.__caseSensitivity = Qt.CaseInsensitive + + self.__type = AdBlockRuleType.StringContainsMatchRule + self.__options = AdBlockRuleOption.NoOption + self.__exceptions = AdBlockRuleOption.NoOption + + self.setFilter(filterRule) + + def subscription(self): + """ + Public method to get the subscription this rule belongs to. + + @return subscription of the rule + @rtype AdBlockSubscription + """ + return self.__subscription + + def setSubscription(self, subscription): + """ + Public method to set the subscription this rule belongs to. + + @param subscription subscription of the rule + @type AdBlockSubscription + """ + self.__subscription = subscription + + def filter(self): + """ + Public method to get the rule filter string. + + @return rule filter string + @rtype str + """ + return self.__filter + + def setFilter(self, filterRule): + """ + Public method to set the rule filter string. + + @param filterRule rule filter string + @type str + """ + self.__filter = filterRule + self.__parseFilter() + + def __parseFilter(self): + """ + Private method to parse the filter pattern. + """ + parsedLine = self.__filter + + # empty rule or just a comment + if not parsedLine.strip() or parsedLine.startswith("!"): + self.__isEnabled = False + self.__isInternalDisabled = True + self.__type = AdBlockRuleType.Invalid + return + + # CSS element hiding rule + if "##" in parsedLine or "#@#" in parsedLine: + self.__type = AdBlockRuleType.CssRule + pos = parsedLine.find("#") + + # domain restricted rule + if not parsedLine.startswith("##"): + domains = parsedLine[:pos] + self.__parseDomains(domains, ",") + + self.__isException = parsedLine[pos + 1] == "@" + if self.__isException: + self.__matchString = parsedLine[pos + 3:] + else: + self.__matchString = parsedLine[pos + 2:] + + # CSS rule cannot have more options -> stop parsing + return + + # Exception always starts with @@ + if parsedLine.startswith("@@"): + self.__isException = True + parsedLine = parsedLine[2:] + + # Parse all options following '$' character + optionsIndex = parsedLine.find("$") + if optionsIndex >= 0: + options = [opt + for opt in parsedLine[optionsIndex + 1:].split(",") + if opt] + + handledOptions = 0 + for option in options: + if option.startswith("domain="): + self.__parseDomains(option[7:], "|") + handledOptions += 1 + elif option == "match-case": + self.__caseSensitivity = Qt.CaseSensitive + handledOptions += 1 + elif option.endswith("third-party"): + self.setOption(AdBlockRuleOption.ThirdPartyOption) + self.__setException(AdBlockRuleOption.ThirdPartyOption, + option.startswith("~")) + handledOptions += 1 + elif option.endswith("object"): + self.setOption(AdBlockRuleOption.ObjectOption) + self.__setException(AdBlockRuleOption.ObjectOption, + option.startswith("~")) + handledOptions += 1 + elif option.endswith("subdocument"): + self.setOption(AdBlockRuleOption.SubdocumentOption) + self.__setException(AdBlockRuleOption.SubdocumentOption, + option.startswith("~")) + handledOptions += 1 + elif option.endswith("xmlhttprequest"): + self.setOption(AdBlockRuleOption.XMLHttpRequestOption) + self.__setException(AdBlockRuleOption.XMLHttpRequestOption, + option.startswith("~")) + handledOptions += 1 + elif option.endswith("image"): + self.setOption(AdBlockRuleOption.ImageOption) + self.__setException(AdBlockRuleOption.ImageOption, + option.startswith("~")) + elif option.endswith("script"): + self.setOption(AdBlockRuleOption.ScriptOption) + self.__setException(AdBlockRuleOption.ScriptOption, + option.startswith("~")) + elif option.endswith("stylesheet"): + self.setOption(AdBlockRuleOption.StyleSheetOption) + self.__setException(AdBlockRuleOption.StyleSheetOption, + option.startswith("~")) + elif option.endswith("object-subrequest"): + self.setOption(AdBlockRuleOption.ObjectSubrequestOption) + self.__setException( + AdBlockRuleOption.ObjectSubrequestOption, + option.startswith("~")) + elif option.endswith("ping"): + self.setOption(AdBlockRuleOption.PingOption) + self.__setException(AdBlockRuleOption.PingOption, + option.startswith("~")) + elif option.endswith("media"): + self.setOption(AdBlockRuleOption.MediaOption) + self.__setException(AdBlockRuleOption.MediaOption, + option.startswith("~")) + elif option.endswith("font"): + self.setOption(AdBlockRuleOption.FontOption) + self.__setException(AdBlockRuleOption.FontOption, + option.startswith("~")) + elif option.endswith("other"): + self.setOption(AdBlockRuleOption.OtherOption) + self.__setException(AdBlockRuleOption.OtherOption, + option.startswith("~")) + elif option == "document" and self.__isException: + self.setOption(AdBlockRuleOption.DocumentOption) + handledOptions += 1 + elif option == "elemhide" and self.__isException: + self.setOption(AdBlockRuleOption.ElementHideOption) + handledOptions += 1 + elif option == "collapse": + # Hiding placeholders of blocked elements is enabled by + # default + handledOptions += 1 + + # If we don't handle all options, it's safer to just disable + # this rule + if handledOptions != len(options): + self.__isInternalDisabled = True + self.__type = AdBlockRuleType.Invalid + return + + parsedLine = parsedLine[:optionsIndex] + + # Rule is classic regexp + if parsedLine.startswith("/") and parsedLine.endswith("/"): + parsedLine = parsedLine[1:-1] + self.__type = AdBlockRuleType.RegExpMatchRule + self.__regExp = QRegExp(parsedLine, self.__caseSensitivity, + QRegExp.RegExp) + self.__stringMatchers = self.__parseRegExpFilter(parsedLine) + return + + # Remove starting / ending wildcards (*) + if parsedLine.startswith("*"): + parsedLine = parsedLine[1:] + if parsedLine.endswith("*"): + parsedLine = parsedLine[:-1] + + # Fast string matching for domain here + if self.__filterIsOnlyDomain(parsedLine): + parsedLine = parsedLine[2:-1] + self.__type = AdBlockRuleType.DomainMatchRule + self.__matchString = parsedLine + return + + # If rule contains '|' only at the end, string matching can be used + if self.__filterIsOnlyEndsMatch(parsedLine): + parsedLine = parsedLine[:-1] + self.__type = AdBlockRuleType.StringEndsMatchRule + self.__matchString = parsedLine + return + + # If there is still a wildcard (*) or separator (^) or (|), + # the rule must be modified to comply with QRegExp. + if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: + self.__type = AdBlockRuleType.RegExpMatchRule + pattern = self.__convertPatternToRegExp(parsedLine) + self.__regExp = QRegExp(pattern, self.__caseSensitivity, + QRegExp.RegExp) + self.__stringMatchers = self.__parseRegExpFilter(parsedLine) + return + + # This rule matches all URLs + if len(parsedLine) == 0: + if self.__options == AdBlockRuleOption.NoOption: + self.__isInternalDisabled = True + self.__type = AdBlockRuleType.Invalid + return + + self.__type = AdBlockRuleType.MatchAllUrlsRule + return + + # no regexp required + self.__type = AdBlockRuleType.StringContainsMatchRule + self.__matchString = parsedLine + + def __parseDomains(self, domains, separator): + """ + Private method to parse a string with a domain list. + + @param domains list of domains + @type str + @param separator separator character used by the list + @type str + """ + domainsList = [d for d in domains.split(separator) if d] + + for domain in domainsList: + if not domain: + continue + if domain.startswith("~"): + self.__blockedDomains.append(domain[1:]) + else: + self.__allowedDomains.append(domain) + + if bool(self.__blockedDomains) or bool(self.__allowedDomains): + self.setOption(AdBlockRuleOption.DomainRestrictedOption) + + def networkMatch(self, request, domain, encodedUrl): + """ + Public method to check the rule for a match. + + @param request reference to the network request + @type QWebEngineUrlRequestInfo + @param domain domain name + @type str + @param encodedUrl string encoded URL to be checked + @type str + @return flag indicating a match + @rtype bool + """ + if self.__type == AdBlockRuleType.CssRule or \ + not self.__isEnabled or \ + self.__isInternalDisabled: + return False + + matched = self.__stringMatch(domain, encodedUrl) + + if matched: + # check domain restrictions + if self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and \ + not self.matchDomain(request.firstPartyUrl().host()): + return False + + # check third-party restrictions + if self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and \ + not self.matchThirdParty(request): + return False + + # check object restrictions + if self.__hasOption(AdBlockRuleOption.ObjectOption) and \ + not self.matchObject(request): + return False + + # check subdocument restrictions + if self.__hasOption(AdBlockRuleOption.SubdocumentOption) and \ + not self.matchSubdocument(request): + return False + + # check xmlhttprequest restriction + if self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and \ + not self.matchXmlHttpRequest(request): + return False + + # check image restriction + if self.__hasOption(AdBlockRuleOption.ImageOption) and \ + not self.matchImage(request): + return False + + # check script restriction + if self.__hasOption(AdBlockRuleOption.ScriptOption) and \ + not self.matchScript(request): + return False + + # check stylesheet restriction + if self.__hasOption(AdBlockRuleOption.StyleSheetOption) and \ + not self.matchStyleSheet(request): + return False + + # check object-subrequest restriction + if self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and \ + not self.matchObjectSubrequest(request): + return False + + # check ping restriction + if self.__hasOption(AdBlockRuleOption.PingOption) and \ + not self.matchPing(request): + return False + + # check media restriction + if self.__hasOption(AdBlockRuleOption.MediaOption) and \ + not self.matchMedia(request): + return False + + # check font restriction + if self.__hasOption(AdBlockRuleOption.FontOption) and \ + not self.matchFont(request): + return False + + return matched + + def urlMatch(self, url): + """ + Public method to check an URL against the rule. + + @param url URL to check + @type QUrl + @return flag indicating a match + @rtype bool + """ + if not self.__hasOption(AdBlockRuleOption.DocumentOption) and \ + not self.__hasOption(AdBlockRuleOption.ElementHideOption): + return False + + encodedUrl = bytes(url.toEncoded()).decode() + domain = url.host() + return self.__stringMatch(domain, encodedUrl) + + def __stringMatch(self, domain, encodedUrl): + """ + Private method to match a domain string. + + @param domain domain to match + @type str + @param encodedUrl URL in encoded form + @type str + @return flag indicating a match + @rtype bool + """ + matched = False + + if self.__type == AdBlockRuleType.StringContainsMatchRule: + if self.__caseSensitivity == Qt.CaseInsensitive: + matched = self.__matchString.lower() in encodedUrl.lower() + else: + matched = self.__matchString in encodedUrl + elif self.__type == AdBlockRuleType.DomainMatchRule: + matched = self.__isMatchingDomain(domain, self.__matchString) + elif self.__type == AdBlockRuleType.StringEndsMatchRule: + if self.__caseSensitivity == Qt.CaseInsensitive: + matched = encodedUrl.lower().endswith( + self.__matchString.lower()) + else: + matched = encodedUrl.endswith(self.__matchString) + elif self.__type == AdBlockRuleType.RegExpMatchRule: + if not self.__isMatchingRegExpStrings(encodedUrl): + matched = False + else: + matched = self.__regExp.indexIn(encodedUrl) != -1 + elif self.__type == AdBlockRuleType.MatchAllUrlsRule: + matched = True + + return matched + + def matchDomain(self, domain): + """ + Public method to match a domain. + + @param domain domain name to check + @type str + @return flag indicating a match + @rtype bool + """ + if not self.__isEnabled: + return False + + if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): + return True + + if len(self.__blockedDomains) == 0: + for dom in self.__allowedDomains: + if self.__isMatchingDomain(domain, dom): + return True + elif len(self.__allowedDomains) == 0: + for dom in self.__blockedDomains: + if self.__isMatchingDomain(domain, dom): + return False + return True + else: + for dom in self.__blockedDomains: + if self.__isMatchingDomain(domain, dom): + return False + for dom in self.__allowedDomains: + if self.__isMatchingDomain(domain, dom): + return True + + return False + + def matchThirdParty(self, req): + """ + Public method to match a third-party rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype boolean + """ + # Third-party matching should be performed on second-level domains + firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) + host = toSecondLevelDomain(req.requestUrl()) + + match = firstPartyHost != host + + if self.__hasException(AdBlockRuleOption.ThirdPartyOption): + return not match + else: + return match + + def matchObject(self, req): + """ + Public method to match an object rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeObject) + + if self.__hasException(AdBlockRuleOption.ObjectOption): + return not match + else: + return match + + def matchSubdocument(self, req): + """ + Public method to match a sub-document rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype boolean + """ + match = ( + req.resourceType() == + QWebEngineUrlRequestInfo.ResourceTypeSubFrame) + + if self.__hasException(AdBlockRuleOption.SubdocumentOption): + return not match + else: + return match + + def matchXmlHttpRequest(self, req): + """ + Public method to match a XmlHttpRequest rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeXhr) + + if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): + return not match + else: + return match + + def matchImage(self, req): + """ + Public method to match an Image rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeImage) + + if self.__hasException(AdBlockRuleOption.ImageOption): + return not match + else: + return match + + def matchScript(self, req): + """ + Public method to match a Script rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeScript) + + if self.__hasException(AdBlockRuleOption.ScriptOption): + return not match + else: + return match + + def matchStyleSheet(self, req): + """ + Public method to match a StyleSheet rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == + QWebEngineUrlRequestInfo.ResourceTypeStylesheet) + + if self.__hasException(AdBlockRuleOption.StyleSheetOption): + return not match + else: + return match + + def matchObjectSubrequest(self, req): + """ + Public method to match an Object Subrequest rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype boolean + """ + match = ( + req.resourceType() == + QWebEngineUrlRequestInfo.ResourceTypeSubResource) + if qVersionTuple() >= (5, 7, 0) and PYQT_VERSION >= 0x50700: + match = match or ( + req.resourceType() == + QWebEngineUrlRequestInfo.ResourceTypePluginResource) + + if self.__objectSubrequestException: + return not match + else: + return match + + def matchPing(self, req): + """ + Public method to match a Ping rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypePing) + + if self.__hasException(AdBlockRuleOption.PingOption): + return not match + else: + return match + + def matchMedia(self, req): + """ + Public method to match a Media rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == QWebEngineUrlRequestInfo.ResourceTypeMedia) + + if self.__hasException(AdBlockRuleOption.MediaOption): + return not match + else: + return match + + def matchFont(self, req): + """ + Public method to match a Font rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = ( + req.resourceType() == + QWebEngineUrlRequestInfo.ResourceTypeFontResource) + + if self.__hasException(AdBlockRuleOption.FontOption): + return not match + else: + return match + + def matchOther(self, req): + """ + Public method to match any other rule. + + @param req request object to check + @type QWebEngineUrlRequestInfo + @return flag indicating a match + @rtype bool + """ + match = req.resourceType() in [ + QWebEngineUrlRequestInfo.ResourceTypeSubResource, + QWebEngineUrlRequestInfo.ResourceTypeWorker, + QWebEngineUrlRequestInfo.ResourceTypeSharedWorker, + QWebEngineUrlRequestInfo.ResourceTypeServiceWorker, + QWebEngineUrlRequestInfo.ResourceTypePrefetch, + QWebEngineUrlRequestInfo.ResourceTypeFavicon, + QWebEngineUrlRequestInfo.ResourceTypeUnknown, + ] + + if self.__hasException(AdBlockRuleOption.OtherOption): + return not match + else: + return match + + def isException(self): + """ + Public method to check, if the rule defines an exception. + + @return flag indicating an exception + @rtype bool + """ + return self.__isException + + def setException(self, exception): + """ + Public method to set the rule's exception flag. + + @param exception flag indicating an exception rule + @type bool + """ + self.__isException = exception + + def isEnabled(self): + """ + Public method to check, if the rule is enabled. + + @return flag indicating enabled state + @rtype bool + """ + return self.__isEnabled + + def setEnabled(self, enabled): + """ + Public method to set the rule's enabled state. + + @param enabled flag indicating the new enabled state + @type bool + """ + self.__isEnabled = enabled + + def isCSSRule(self): + """ + Public method to check, if the rule is a CSS rule. + + @return flag indicating a CSS rule + @rtype bool + """ + return self.__type == AdBlockRuleType.CssRule + + def cssSelector(self): + """ + Public method to get the CSS selector of the rule. + + @return CSS selector + @rtype str + """ + return self.__matchString + + def isDocument(self): + """ + Public method to check, if this is a document rule. + + @return flag indicating a document rule + @rtype bool + """ + return self.__hasOption(AdBlockRuleOption.DocumentOption) + + def isElementHiding(self): + """ + Public method to check, if this is an element hiding rule. + + @return flag indicating an element hiding rule + @rtype bool + """ + return self.__hasOption(AdBlockRuleOption.ElementHideOption) + + def isDomainRestricted(self): + """ + Public method to check, if this rule is restricted by domain. + + @return flag indicating a domain restriction + @rtype bool + """ + return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) + + def isComment(self): + """ + Public method to check, if this is a comment. + + @return flag indicating a comment + @rtype bool + """ + return self.__filter.startswith("!") + + def isHeader(self): + """ + Public method to check, if this is a header. + + @return flag indicating a header + @rtype bool + """ + return self.__filter.startswith("[Adblock") + + def isSlow(self): + """ + Public method to check, if this is a slow rule. + + @return flag indicating a slow rule + @rtype bool + """ + return self.__regExp is not None + + def isInternalDisabled(self): + """ + Public method to check, if this rule was disabled internally. + + @return flag indicating an internally disabled rule + @rtype bool + """ + return self.__isInternalDisabled + + def __convertPatternToRegExp(self, wildcardPattern): + """ + Private method to convert a wildcard pattern to a regular expression. + + @param wildcardPattern string containing the wildcard pattern + @type str + @return string containing a regular expression + @rtype string + """ + pattern = wildcardPattern + + # remove multiple wildcards + pattern = re.sub(r"\*+", "*", pattern) + # remove anchors following separator placeholder + pattern = re.sub(r"\^\|$", "^", pattern) + # remove leading wildcards + pattern = re.sub(r"^(\*)", "", pattern) + # remove trailing wildcards + pattern = re.sub(r"(\*)$", "", pattern) + # escape special symbols + pattern = re.sub(r"(\W)", r"\\\1", pattern) + # process extended anchor at expression start + pattern = re.sub( + r"^\\\|\\\|", + r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) + # process separator placeholders + pattern = re.sub(r"\\\^", r"(?:[^\w\d\-.%]|$)", pattern) + # process anchor at expression start + pattern = re.sub(r"^\\\|", "^", pattern) + # process anchor at expression end + pattern = re.sub(r"\\\|$", "$", pattern) + # replace wildcards by .* + pattern = re.sub(r"\\\*", ".*", pattern) + + return pattern + + def __hasOption(self, opt): + """ + Private method to check, if the given option has been set. + + @param opt option to check for + @type AdBlockRuleOption + @return flag indicating the state of the option + @rtype bool + """ + return bool(self.__options & opt) + + def setOption(self, opt): + """ + Public method to set the given option. + + @param opt option to be set + @type AdBlockRuleOption + """ + self.__options |= opt + + def __hasException(self, opt): + """ + Private method to check, if the given option has been set as an + exception. + + @param opt option to check for + @type AdBlockRuleOption + @return flag indicating the exception state of the option + @rtype bool + """ + return bool(self.__exceptions & opt) + + def __setException(self, opt, on): + """ + Private method to set the given option as an exception. + + @param opt option to be set + @type AdBlockRuleOption + @param on flag indicating to set or unset the exception + @type bool + """ + if on: + self.__exceptions |= opt + else: + self.__exceptions &= ~opt + + def __filterIsOnlyDomain(self, filterString): + """ + Private method to check, if the given filter is a domain only filter. + + @param filterString filter string to be checked + @type str + @return flag indicating a domain only filter + @rtype bool + """ + if not filterString.endswith("^") or not filterString.startswith("||"): + return False + + for filterChar in filterString: + if filterChar in ["/", ":", "?", "=", "&", "*"]: + return False + + return True + + def __filterIsOnlyEndsMatch(self, filterString): + """ + Private method to check, if the given filter is to match against the + end of a string. + + @param filterString filter string to be checked + @type str + @return flag indicating a end of string match filter + @rtype bool + """ + index = 0 + for filterChar in filterString: + if filterChar in ["^", "*"]: + return False + elif filterChar == "|": + return bool(index == len(filterString) - 1) + index += 1 + + return False + + def __isMatchingDomain(self, domain, filterString): + """ + Private method to check, if a given domain matches the given filter + string. + + @param domain domain to be checked + @type str + @param filterString filter string to check against + @type str + @return flag indicating a match + @rtype bool + """ + if filterString == domain: + return True + + if not domain.endswith(filterString): + return False + + index = domain.find(filterString) + + return bool(index > 0 and domain[index - 1] == ".") + + def __isMatchingRegExpStrings(self, url): + """ + Private method to check the given URL against the fixed parts of + the regexp. + + @param url URL to be checked + @type str + @return flag indicating a match + @rtype bool + """ + assert self.__regExp is not None + + for matcher in self.__stringMatchers: + if matcher not in url: + return False + + return True + + def __parseRegExpFilter(self, filterString): + """ + Private method to split the given regular expression into strings that + can be used with 'in'. + + @param filterString regexp filter string to be parsed + @type str + @return fixed string parts of the filter + @rtype list of str + """ + matchers = [] + + startPos = -1 + for index in range(len(filterString)): + filterChar = filterString[index] + if filterChar in ["|", "*", "^"]: + sub = filterString[startPos:index] + if len(sub) > 1: + matchers.append(sub) + startPos = index + 1 + + sub = filterString[startPos:] + if len(sub) > 1: + matchers.append(sub) + + return list(set(matchers)) + + def ruleType(self): + """ + Public method to get the rule type. + + @return rule type + @rtype AdBlockRuleType + """ + return self.__type + + def ruleOptions(self): + """ + Public method to get the rule options. + + @return rule options + @rtype AdBlockRuleOption + """ + return self.__options + + def ruleExceptions(self): + """ + Public method to get the rule exceptions. + + @return rule exceptions + @rtype AdBlockRuleOption + """ + return self.__exceptions + + def matchString(self): + """ + Public method to get the match string. + + @return match string + @rtype str + """ + return self.__matchString + + def caseSensitivity(self): + """ + Public method to get the case sensitivity. + + @return case sensitivity + @rtype Qt.CaseSensitivity + """ + return self.__caseSensitivity + + def allowedDomains(self): + """ + Public method to get a copy of the list of allowed domains. + + @return list of allowed domains + @rtype list of str + """ + return self.__allowedDomains[:] + + def blockedDomains(self): + """ + Public method to get a copy of the list of blocked domains. + + @return list of blocked domains + @rtype list of str + """ + return self.__blockedDomains[:] + + def addBlockedDomains(self, domains): + """ + Public method to add to the list of blocked domains. + + @param domains list of domains to be added + @type str or list of str + """ + if isinstance(domains, list): + self.__blockedDomains.extend(domains) + else: + self.__blockedDomains.append(domains) + + def getRegExpAndMatchers(self): + """ + Public method to get the regular expression and associated string + matchers. + + @return tuple containing the regular expression and the list of + string matchers + @rtype tuple of (QRegExp, list of str) + """ + if self.__regExp is not None: + return (QRegExp(self.__regExp), self.__stringMatchers[:]) + else: + return (None, []) + + def copyFrom(self, other): + """ + Public method to copy another AdBlock rule. + + @param other reference to the AdBlock rule to copy from + @type AdBlockRule + """ + self.__subscription = other.subscription() + self.__type = other.ruleType() + self.__options = other.ruleOptions() + self.__exceptions = other.ruleExceptions() + self.__filter = other.filter() + self.__matchString = other.matchString() + self.__caseSensitivity = other.caseSensitivity() + self.__isEnabled = other.isEnabled() + self.__isException = other.isException() + self.__isInternalDisabled = other.isInternalDisabled() + self.__allowedDomains = other.allowedDomains() + self.__blockedDomains = other.blockedDomains() + self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()