eric: comparison eric7/WebBrowser/AdBlock/AdBlockRule.py

-:4e8b98454baa
+:800c432b34c8
+# -*- coding: utf-8 -*-
+# Copyright (c) 2009 - 2021 Detlev Offenbach <detlev@die-offenbachs.de>
+#
+"""
+Module implementing the AdBlock rule class.
+"""
+import re
+from enum import IntEnum
+from PyQt5.QtCore import Qt
+from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo
+def toSecondLevelDomain(url):
+"""
+Module function to get a second level domain from the given URL.
+@param url URL to extract domain from
+@type QUrl
+@return name of second level domain
+@rtype str
+"""
+topLevelDomain = url.topLevelDomain()
+urlHost = url.host()
+if not topLevelDomain or not urlHost:
+return ""
+domain = urlHost[:len(urlHost) - len(topLevelDomain)]
+if domain.count(".") == 0:
+return urlHost
+while domain.count(".") != 0:
+domain = domain[domain.find(".") + 1:]
+return domain + topLevelDomain
+class AdBlockRuleType(IntEnum):
+"""
+Class implementing the rule type enum.
+"""
+CssRule = 0
+DomainMatchRule = 1
+RegExpMatchRule = 2
+StringEndsMatchRule = 3
+StringContainsMatchRule = 4
+MatchAllUrlsRule = 5
+Invalid = 6
+class AdBlockRuleOption(IntEnum):
+"""
+Class implementing the rule option enum.
+"""
+NoOption = 0
+DomainRestrictedOption = 1
+ThirdPartyOption = 2
+ObjectOption = 4
+SubdocumentOption = 8
+XMLHttpRequestOption = 16
+ImageOption = 32
+ScriptOption = 64
+StyleSheetOption = 128
+ObjectSubrequestOption = 256
+PingOption = 512
+MediaOption = 1024
+FontOption = 2048
+OtherOption = 4096
+# Exception only options
+DocumentOption = 8192
+ElementHideOption = 16384
+class AdBlockRule:
+"""
+Class implementing the AdBlock rule.
+"""
+def __init__(self, filterRule="", subscription=None):
+"""
+Constructor
+@param filterRule filter string of the rule
+@type str
+@param subscription reference to the subscription object
+@type AdBlockSubscription
+"""
+self.__subscription = subscription
+self.__regExp = None
+self.__stringMatchers = []
+self.__blockedDomains = []
+self.__allowedDomains = []
+self.__isEnabled = True
+self.__isException = False
+self.__isInternalDisabled = False
+self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive
+self.__type = AdBlockRuleType.StringContainsMatchRule
+self.__options = AdBlockRuleOption.NoOption
+self.__exceptions = AdBlockRuleOption.NoOption
+self.setFilter(filterRule)
+def subscription(self):
+"""
+Public method to get the subscription this rule belongs to.
+@return subscription of the rule
+@rtype AdBlockSubscription
+"""
+return self.__subscription
+def setSubscription(self, subscription):
+"""
+Public method to set the subscription this rule belongs to.
+@param subscription subscription of the rule
+@type AdBlockSubscription
+"""
+self.__subscription = subscription
+def filter(self):
+"""
+Public method to get the rule filter string.
+@return rule filter string
+@rtype str
+"""
+return self.__filter
+def setFilter(self, filterRule):
+"""
+Public method to set the rule filter string.
+@param filterRule rule filter string
+@type str
+"""
+self.__filter = filterRule
+self.__parseFilter()
+def __parseFilter(self):
+"""
+Private method to parse the filter pattern.
+"""
+parsedLine = self.__filter
+# empty rule or just a comment
+if not parsedLine.strip() or parsedLine.startswith("!"):
+self.__isEnabled = False
+self.__isInternalDisabled = True
+self.__type = AdBlockRuleType.Invalid
+return
+# CSS element hiding rule
+if "##" in parsedLine or "#@#" in parsedLine:
+self.__type = AdBlockRuleType.CssRule
+pos = parsedLine.find("#")
+# domain restricted rule
+if not parsedLine.startswith("##"):
+domains = parsedLine[:pos]
+self.__parseDomains(domains, ",")
+self.__isException = parsedLine[pos + 1] == "@"
+if self.__isException:
+self.__matchString = parsedLine[pos + 3:]
+else:
+self.__matchString = parsedLine[pos + 2:]
+# CSS rule cannot have more options -> stop parsing
+return
+# Exception always starts with @@
+if parsedLine.startswith("@@"):
+self.__isException = True
+parsedLine = parsedLine[2:]
+# Parse all options following '$' character
+optionsIndex = parsedLine.find("$")
+if optionsIndex >= 0:
+options = [opt
+for opt in parsedLine[optionsIndex + 1:].split(",")
+if opt]
+handledOptions = 0
+for option in options:
+if option.startswith("domain="):
+self.__parseDomains(option[7:], "|")
+handledOptions += 1
+elif option == "match-case":
+self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive
+handledOptions += 1
+elif option.endswith("third-party"):
+self.setOption(AdBlockRuleOption.ThirdPartyOption)
+self.__setException(AdBlockRuleOption.ThirdPartyOption,
+option.startswith("~"))
+handledOptions += 1
+elif option.endswith("object"):
+self.setOption(AdBlockRuleOption.ObjectOption)
+self.__setException(AdBlockRuleOption.ObjectOption,
+option.startswith("~"))
+handledOptions += 1
+elif option.endswith("subdocument"):
+self.setOption(AdBlockRuleOption.SubdocumentOption)
+self.__setException(AdBlockRuleOption.SubdocumentOption,
+option.startswith("~"))
+handledOptions += 1
+elif option.endswith("xmlhttprequest"):
+self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
+self.__setException(AdBlockRuleOption.XMLHttpRequestOption,
+option.startswith("~"))
+handledOptions += 1
+elif option.endswith("image"):
+self.setOption(AdBlockRuleOption.ImageOption)
+self.__setException(AdBlockRuleOption.ImageOption,
+option.startswith("~"))
+elif option.endswith("script"):
+self.setOption(AdBlockRuleOption.ScriptOption)
+self.__setException(AdBlockRuleOption.ScriptOption,
+option.startswith("~"))
+elif option.endswith("stylesheet"):
+self.setOption(AdBlockRuleOption.StyleSheetOption)
+self.__setException(AdBlockRuleOption.StyleSheetOption,
+option.startswith("~"))
+elif option.endswith("object-subrequest"):
+self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
+self.__setException(
+AdBlockRuleOption.ObjectSubrequestOption,
+option.startswith("~"))
+elif option.endswith("ping"):
+self.setOption(AdBlockRuleOption.PingOption)
+self.__setException(AdBlockRuleOption.PingOption,
+option.startswith("~"))
+elif option.endswith("media"):
+self.setOption(AdBlockRuleOption.MediaOption)
+self.__setException(AdBlockRuleOption.MediaOption,
+option.startswith("~"))
+elif option.endswith("font"):
+self.setOption(AdBlockRuleOption.FontOption)
+self.__setException(AdBlockRuleOption.FontOption,
+option.startswith("~"))
+elif option.endswith("other"):
+self.setOption(AdBlockRuleOption.OtherOption)
+self.__setException(AdBlockRuleOption.OtherOption,
+option.startswith("~"))
+elif option == "document" and self.__isException:
+self.setOption(AdBlockRuleOption.DocumentOption)
+handledOptions += 1
+elif option == "elemhide" and self.__isException:
+self.setOption(AdBlockRuleOption.ElementHideOption)
+handledOptions += 1
+elif option == "collapse":
+# Hiding placeholders of blocked elements is enabled by
+# default
+handledOptions += 1
+# If we don't handle all options, it's safer to just disable
+# this rule
+if handledOptions != len(options):
+self.__isInternalDisabled = True
+self.__type = AdBlockRuleType.Invalid
+return
+parsedLine = parsedLine[:optionsIndex]
+# Rule is classic regexp
+if parsedLine.startswith("/") and parsedLine.endswith("/"):
+parsedLine = parsedLine[1:-1]
+self.__type = AdBlockRuleType.RegExpMatchRule
+if self.__caseSensitivity:
+self.__regExp = re.compile(parsedLine)
+else:
+self.__regExp = re.compile(parsedLine, re.IGNORECASE)
+self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
+return
+# Remove starting / ending wildcards (*)
+if parsedLine.startswith("*"):
+parsedLine = parsedLine[1:]
+if parsedLine.endswith("*"):
+parsedLine = parsedLine[:-1]
+# Fast string matching for domain here
+if self.__filterIsOnlyDomain(parsedLine):
+parsedLine = parsedLine[2:-1]
+self.__type = AdBlockRuleType.DomainMatchRule
+self.__matchString = parsedLine
+return
+# If rule contains '|' only at the end, string matching can be used
+if self.__filterIsOnlyEndsMatch(parsedLine):
+parsedLine = parsedLine[:-1]
+self.__type = AdBlockRuleType.StringEndsMatchRule
+self.__matchString = parsedLine
+return
+# If there is still a wildcard (*) or separator (^) or (|),
+# the rule must be modified to comply with re.
+if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
+self.__type = AdBlockRuleType.RegExpMatchRule
+pattern = self.__convertPatternToRegExp(parsedLine)
+if self.__caseSensitivity:
+self.__regExp = re.compile(pattern)
+else:
+self.__regExp = re.compile(pattern, re.IGNORECASE)
+self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
+return
+# This rule matches all URLs
+if len(parsedLine) == 0:
+if self.__options == AdBlockRuleOption.NoOption:
+self.__isInternalDisabled = True
+self.__type = AdBlockRuleType.Invalid
+return
+self.__type = AdBlockRuleType.MatchAllUrlsRule
+return
+# no regexp required
+self.__type = AdBlockRuleType.StringContainsMatchRule
+self.__matchString = parsedLine
+def __parseDomains(self, domains, separator):
+"""
+Private method to parse a string with a domain list.
+@param domains list of domains
+@type str
+@param separator separator character used by the list
+@type str
+"""
+domainsList = [d for d in domains.split(separator) if d]
+for domain in domainsList:
+if not domain:
+continue
+if domain.startswith("~"):
+self.__blockedDomains.append(domain[1:])
+else:
+self.__allowedDomains.append(domain)
+if bool(self.__blockedDomains) or bool(self.__allowedDomains):
+self.setOption(AdBlockRuleOption.DomainRestrictedOption)
+def networkMatch(self, request, domain, encodedUrl):
+"""
+Public method to check the rule for a match.
+@param request reference to the network request
+@type QWebEngineUrlRequestInfo
+@param domain domain name
+@type str
+@param encodedUrl string encoded URL to be checked
+@type str
+@return flag indicating a match
+@rtype bool
+"""
+if (
+self.__type == AdBlockRuleType.CssRule or
+not self.__isEnabled or
+self.__isInternalDisabled
+):
+return False
+matched = self.__stringMatch(domain, encodedUrl)
+if matched:
+# check domain restrictions
+if (
+self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and
+not self.matchDomain(request.firstPartyUrl().host())
+):
+return False
+# check third-party restrictions
+if (
+self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and
+not self.matchThirdParty(request)
+):
+return False
+# check object restrictions
+if (
+self.__hasOption(AdBlockRuleOption.ObjectOption) and
+not self.matchObject(request)
+):
+return False
+# check subdocument restrictions
+if (
+self.__hasOption(AdBlockRuleOption.SubdocumentOption) and
+not self.matchSubdocument(request)
+):
+return False
+# check xmlhttprequest restriction
+if (
+self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and
+not self.matchXmlHttpRequest(request)
+):
+return False
+# check image restriction
+if (
+self.__hasOption(AdBlockRuleOption.ImageOption) and
+not self.matchImage(request)
+):
+return False
+# check script restriction
+if (
+self.__hasOption(AdBlockRuleOption.ScriptOption) and
+not self.matchScript(request)
+):
+return False
+# check stylesheet restriction
+if (
+self.__hasOption(AdBlockRuleOption.StyleSheetOption) and
+not self.matchStyleSheet(request)
+):
+return False
+# check object-subrequest restriction
+if (
+self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and
+not self.matchObjectSubrequest(request)
+):
+return False
+# check ping restriction
+if (
+self.__hasOption(AdBlockRuleOption.PingOption) and
+not self.matchPing(request)
+):
+return False
+# check media restriction
+if (
+self.__hasOption(AdBlockRuleOption.MediaOption) and
+not self.matchMedia(request)
+):
+return False
+# check font restriction
+if (
+self.__hasOption(AdBlockRuleOption.FontOption) and
+not self.matchFont(request)
+):
+return False
+return matched
+def urlMatch(self, url):
+"""
+Public method to check an URL against the rule.
+@param url URL to check
+@type QUrl
+@return flag indicating a match
+@rtype bool
+"""
+if (
+not self.__hasOption(AdBlockRuleOption.DocumentOption) and
+not self.__hasOption(AdBlockRuleOption.ElementHideOption)
+):
+return False
+encodedUrl = bytes(url.toEncoded()).decode()
+domain = url.host()
+return self.__stringMatch(domain, encodedUrl)
+def __stringMatch(self, domain, encodedUrl):
+"""
+Private method to match a domain string.
+@param domain domain to match
+@type str
+@param encodedUrl URL in encoded form
+@type str
+@return flag indicating a match
+@rtype bool
+"""
+matched = False
+if self.__type == AdBlockRuleType.StringContainsMatchRule:
+if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
+matched = self.__matchString.lower() in encodedUrl.lower()
+else:
+matched = self.__matchString in encodedUrl
+elif self.__type == AdBlockRuleType.DomainMatchRule:
+matched = self.__isMatchingDomain(domain, self.__matchString)
+elif self.__type == AdBlockRuleType.StringEndsMatchRule:
+if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
+matched = encodedUrl.lower().endswith(
+self.__matchString.lower())
+else:
+matched = encodedUrl.endswith(self.__matchString)
+elif self.__type == AdBlockRuleType.RegExpMatchRule:
+if not self.__isMatchingRegExpStrings(encodedUrl):
+matched = False
+else:
+matched = self.__regExp.search(encodedUrl) is not None
+elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
+matched = True
+return matched
+def matchDomain(self, domain):
+"""
+Public method to match a domain.
+@param domain domain name to check
+@type str
+@return flag indicating a match
+@rtype bool
+"""
+if not self.__isEnabled:
+return False
+if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
+return True
+if len(self.__blockedDomains) == 0:
+return any(self.__isMatchingDomain(domain, dom)
+for dom in self.__allowedDomains)
+elif len(self.__allowedDomains) == 0:
+return all(not self.__isMatchingDomain(domain, dom)
+for dom in self.__blockedDomains)
+else:
+return (
+all(not self.__isMatchingDomain(domain, dom)
+for dom in self.__blockedDomains) and
+any(self.__isMatchingDomain(domain, dom)
+for dom in self.__allowedDomains)
+)
+def matchThirdParty(self, req):
+"""
+Public method to match a third-party rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype boolean
+"""
+# Third-party matching should be performed on second-level domains
+firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
+host = toSecondLevelDomain(req.requestUrl())
+match = firstPartyHost != host
+if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
+return not match
+else:
+return match
+def matchObject(self, req):
+"""
+Public method to match an object rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject)
+if self.__hasException(AdBlockRuleOption.ObjectOption):
+return not match
+else:
+return match
+def matchSubdocument(self, req):
+"""
+Public method to match a sub-document rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype boolean
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame)
+if self.__hasException(AdBlockRuleOption.SubdocumentOption):
+return not match
+else:
+return match
+def matchXmlHttpRequest(self, req):
+"""
+Public method to match a XmlHttpRequest rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr)
+if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
+return not match
+else:
+return match
+def matchImage(self, req):
+"""
+Public method to match an Image rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage)
+if self.__hasException(AdBlockRuleOption.ImageOption):
+return not match
+else:
+return match
+def matchScript(self, req):
+"""
+Public method to match a Script rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript)
+if self.__hasException(AdBlockRuleOption.ScriptOption):
+return not match
+else:
+return match
+def matchStyleSheet(self, req):
+"""
+Public method to match a StyleSheet rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet)
+if self.__hasException(AdBlockRuleOption.StyleSheetOption):
+return not match
+else:
+return match
+def matchObjectSubrequest(self, req):
+"""
+Public method to match an Object Subrequest rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype boolean
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource
+)
+match = match or (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource
+)
+if self.__objectSubrequestException:
+return not match
+else:
+return match
+def matchPing(self, req):
+"""
+Public method to match a Ping rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing)
+if self.__hasException(AdBlockRuleOption.PingOption):
+return not match
+else:
+return match
+def matchMedia(self, req):
+"""
+Public method to match a Media rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia)
+if self.__hasException(AdBlockRuleOption.MediaOption):
+return not match
+else:
+return match
+def matchFont(self, req):
+"""
+Public method to match a Font rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = (
+req.resourceType() ==
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource)
+if self.__hasException(AdBlockRuleOption.FontOption):
+return not match
+else:
+return match
+def matchOther(self, req):
+"""
+Public method to match any other rule.
+@param req request object to check
+@type QWebEngineUrlRequestInfo
+@return flag indicating a match
+@rtype bool
+"""
+match = req.resourceType() in [
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource,
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeWorker,
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSharedWorker,
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker,
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch,
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon,
+QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown,
+]
+if self.__hasException(AdBlockRuleOption.OtherOption):
+return not match
+else:
+return match
+def isException(self):
+"""
+Public method to check, if the rule defines an exception.
+@return flag indicating an exception
+@rtype bool
+"""
+return self.__isException
+def setException(self, exception):
+"""
+Public method to set the rule's exception flag.
+@param exception flag indicating an exception rule
+@type bool
+"""
+self.__isException = exception
+def isEnabled(self):
+"""
+Public method to check, if the rule is enabled.
+@return flag indicating enabled state
+@rtype bool
+"""
+return self.__isEnabled
+def setEnabled(self, enabled):
+"""
+Public method to set the rule's enabled state.
+@param enabled flag indicating the new enabled state
+@type bool
+"""
+self.__isEnabled = enabled
+def isCSSRule(self):
+"""
+Public method to check, if the rule is a CSS rule.
+@return flag indicating a CSS rule
+@rtype bool
+"""
+return self.__type == AdBlockRuleType.CssRule
+def cssSelector(self):
+"""
+Public method to get the CSS selector of the rule.
+@return CSS selector
+@rtype str
+"""
+return self.__matchString
+def isDocument(self):
+"""
+Public method to check, if this is a document rule.
+@return flag indicating a document rule
+@rtype bool
+"""
+return self.__hasOption(AdBlockRuleOption.DocumentOption)
+def isElementHiding(self):
+"""
+Public method to check, if this is an element hiding rule.
+@return flag indicating an element hiding rule
+@rtype bool
+"""
+return self.__hasOption(AdBlockRuleOption.ElementHideOption)
+def isDomainRestricted(self):
+"""
+Public method to check, if this rule is restricted by domain.
+@return flag indicating a domain restriction
+@rtype bool
+"""
+return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
+def isComment(self):
+"""
+Public method to check, if this is a comment.
+@return flag indicating a comment
+@rtype bool
+"""
+return self.__filter.startswith("!")
+def isHeader(self):
+"""
+Public method to check, if this is a header.
+@return flag indicating a header
+@rtype bool
+"""
+return self.__filter.startswith("[Adblock")
+def isSlow(self):
+"""
+Public method to check, if this is a slow rule.
+@return flag indicating a slow rule
+@rtype bool
+"""
+return self.__regExp is not None
+def isInternalDisabled(self):
+"""
+Public method to check, if this rule was disabled internally.
+@return flag indicating an internally disabled rule
+@rtype bool
+"""
+return self.__isInternalDisabled
+def __convertPatternToRegExp(self, wildcardPattern):
+"""
+Private method to convert a wildcard pattern to a regular expression.
+@param wildcardPattern string containing the wildcard pattern
+@type str
+@return string containing a regular expression
+@rtype string
+"""
+pattern = wildcardPattern
+# remove multiple wildcards
+pattern = re.sub(r"\*+", "*", pattern)
+# remove anchors following separator placeholder
+pattern = re.sub(r"\^\|$", "^", pattern)
+# remove leading wildcards
+pattern = re.sub(r"^(\*)", "", pattern)
+# remove trailing wildcards
+pattern = re.sub(r"(\*)$", "", pattern)
+# escape special symbols
+pattern = re.sub(r"(\W)", r"\\\1", pattern)
+# process extended anchor at expression start
+pattern = re.sub(
+r"^\\\|\\\|",
+r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
+# process separator placeholders
+pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern)
+# process anchor at expression start
+pattern = re.sub(r"^\\\|", "^", pattern)
+# process anchor at expression end
+pattern = re.sub(r"\\\|$", "$", pattern)
+# replace wildcards by .*
+pattern = re.sub(r"\\\*", ".*", pattern)
+return pattern
+def __hasOption(self, opt):
+"""
+Private method to check, if the given option has been set.
+@param opt option to check for
+@type AdBlockRuleOption
+@return flag indicating the state of the option
+@rtype bool
+"""
+return bool(self.__options & opt)
+def setOption(self, opt):
+"""
+Public method to set the given option.
+@param opt option to be set
+@type AdBlockRuleOption
+"""
+self.__options |= opt
+def __hasException(self, opt):
+"""
+Private method to check, if the given option has been set as an
+exception.
+@param opt option to check for
+@type AdBlockRuleOption
+@return flag indicating the exception state of the option
+@rtype bool
+"""
+return bool(self.__exceptions & opt)
+def __setException(self, opt, on):
+"""
+Private method to set the given option as an exception.
+@param opt option to be set
+@type AdBlockRuleOption
+@param on flag indicating to set or unset the exception
+@type bool
+"""
+if on:
+self.__exceptions |= opt
+else:
+self.__exceptions &= ~opt
+def __filterIsOnlyDomain(self, filterString):
+"""
+Private method to check, if the given filter is a domain only filter.
+@param filterString filter string to be checked
+@type str
+@return flag indicating a domain only filter
+@rtype bool
+"""
+if not filterString.endswith("^") or not filterString.startswith("||"):
+return False
+return all(filterChar not in ["/", ":", "?", "=", "&", "*"]
+for filterChar in filterString)
+def __filterIsOnlyEndsMatch(self, filterString):
+"""
+Private method to check, if the given filter is to match against the
+end of a string.
+@param filterString filter string to be checked
+@type str
+@return flag indicating a end of string match filter
+@rtype bool
+"""
+for index, filterChar in enumerate(filterString):
+# __IGNORE_WARNING_Y111__
+if filterChar in ["^", "*"]:
+return False
+elif filterChar == "|":
+return index == len(filterString) - 1
+return False
+def __isMatchingDomain(self, domain, filterString):
+"""
+Private method to check, if a given domain matches the given filter
+string.
+@param domain domain to be checked
+@type str
+@param filterString filter string to check against
+@type str
+@return flag indicating a match
+@rtype bool
+"""
+if filterString == domain:
+return True
+if not domain.endswith(filterString):
+return False
+index = domain.find(filterString)
+return bool(index > 0 and domain[index - 1] == ".")
+def __isMatchingRegExpStrings(self, url):
+"""
+Private method to check the given URL against the fixed parts of
+the regexp.
+@param url URL to be checked
+@type str
+@return flag indicating a match
+@rtype bool
+"""
+if self.__regExp is not None:
+return all(matcher in url for matcher in self.__stringMatchers)
+return True
+def __parseRegExpFilter(self, filterString):
+"""
+Private method to split the given regular expression into strings that
+can be used with 'in'.
+@param filterString regexp filter string to be parsed
+@type str
+@return fixed string parts of the filter
+@rtype list of str
+"""
+matchers = []
+startPos = -1
+for index in range(len(filterString)):
+filterChar = filterString[index]
+if filterChar in ["|", "*", "^"]:
+sub = filterString[startPos:index]
+if len(sub) > 1:
+matchers.append(sub)
+startPos = index + 1
+sub = filterString[startPos:]
+if len(sub) > 1:
+matchers.append(sub)
+return list(set(matchers))
+def ruleType(self):
+"""
+Public method to get the rule type.
+@return rule type
+@rtype AdBlockRuleType
+"""
+return self.__type
+def ruleOptions(self):
+"""
+Public method to get the rule options.
+@return rule options
+@rtype AdBlockRuleOption
+"""
+return self.__options
+def ruleExceptions(self):
+"""
+Public method to get the rule exceptions.
+@return rule exceptions
+@rtype AdBlockRuleOption
+"""
+return self.__exceptions
+def matchString(self):
+"""
+Public method to get the match string.
+@return match string
+@rtype str
+"""
+return self.__matchString
+def caseSensitivity(self):
+"""
+Public method to get the case sensitivity.
+@return case sensitivity
+@rtype Qt.CaseSensitivity
+"""
+return self.__caseSensitivity
+def allowedDomains(self):
+"""
+Public method to get a copy of the list of allowed domains.
+@return list of allowed domains
+@rtype list of str
+"""
+return self.__allowedDomains[:]
+def blockedDomains(self):
+"""
+Public method to get a copy of the list of blocked domains.
+@return list of blocked domains
+@rtype list of str
+"""
+return self.__blockedDomains[:]
+def addBlockedDomains(self, domains):
+"""
+Public method to add to the list of blocked domains.
+@param domains list of domains to be added
+@type str or list of str
+"""
+if isinstance(domains, list):
+self.__blockedDomains.extend(domains)
+else:
+self.__blockedDomains.append(domains)
+def getRegExpAndMatchers(self):
+"""
+Public method to get the regular expression and associated string
+matchers.
+@return tuple containing the regular expression and the list of
+string matchers
+@rtype tuple of (re.Pattern, list of str)
+"""
+if self.__regExp is not None:
+return (re.compile(self.__regExp.pattern),
+self.__stringMatchers[:])
+else:
+return (None, [])
+def copyFrom(self, other):
+"""
+Public method to copy another AdBlock rule.
+@param other reference to the AdBlock rule to copy from
+@type AdBlockRule
+"""
+self.__subscription = other.subscription()
+self.__type = other.ruleType()
+self.__options = other.ruleOptions()
+self.__exceptions = other.ruleExceptions()
+self.__filter = other.filter()
+self.__matchString = other.matchString()
+self.__caseSensitivity = other.caseSensitivity()
+self.__isEnabled = other.isEnabled()
+self.__isException = other.isException()
+self.__isInternalDisabled = other.isInternalDisabled()
+self.__allowedDomains = other.allowedDomains()
+self.__blockedDomains = other.blockedDomains()
+self.__regExp, self.__stringMatchers = other.getRegExpAndMatchers()

Mercurial Repositories > eric / file comparison

comparison: eric7/WebBrowser/AdBlock/AdBlockRule.py

eric7/WebBrowser/AdBlock/AdBlockRule.py