Helpviewer/AdBlock/AdBlockRule.py

branch
Py2 comp.
changeset 3057
10516539f238
parent 2525
8b507a9a2d40
parent 3000
971d84f7a6d6
child 3058
0a02c433f52d
equal deleted inserted replaced
3056:9986ec0e559a 3057:10516539f238
56 Class implementing the AdBlock rule. 56 Class implementing the AdBlock rule.
57 """ 57 """
58 def __init__(self, filter="", subscription=None): 58 def __init__(self, filter="", subscription=None):
59 """ 59 """
60 Constructor 60 Constructor
61
62 @param filter filter string of the rule (string)
63 @param subscription reference to the subscription object
64 (AdBlockSubscription)
61 """ 65 """
62 self.__subscription = subscription 66 self.__subscription = subscription
63 67
64 self.__regExp = QRegExp() 68 self.__regExp = QRegExp()
65 self.__options = [] 69 self.__options = []
180 handledOptions += 1 184 handledOptions += 1
181 elif option == "collapse": 185 elif option == "collapse":
182 # Hiding placeholders of blocked elements 186 # Hiding placeholders of blocked elements
183 handledOptions += 1 187 handledOptions += 1
184 188
185 # If we don't handle all options, it's safer to just disable this rule 189 # If we don't handle all options, it's safer to just disable
190 # this rule
186 if handledOptions != len(options): 191 if handledOptions != len(options):
187 self.__internalDisabled = True 192 self.__internalDisabled = True
188 return 193 return
189 194
190 parsedLine = parsedLine[:optionsIndex] 195 parsedLine = parsedLine[:optionsIndex]
224 # If there is still a wildcard (*) or separator (^) or (|), 229 # If there is still a wildcard (*) or separator (^) or (|),
225 # the rule must be modified to comply with QRegExp. 230 # the rule must be modified to comply with QRegExp.
226 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: 231 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
227 pattern = self.__convertPatternToRegExp(parsedLine) 232 pattern = self.__convertPatternToRegExp(parsedLine)
228 self.__useRegExp = True 233 self.__useRegExp = True
229 self.__regExp = QRegExp(pattern, self.__caseSensitivity, QRegExp.RegExp) 234 self.__regExp = QRegExp(pattern, self.__caseSensitivity,
235 QRegExp.RegExp)
230 return 236 return
231 237
232 # no regexp required 238 # no regexp required
233 self.__useRegExp = False 239 self.__useRegExp = False
234 self.__matchString = parsedLine 240 self.__matchString = parsedLine
271 matched = self.__regExp.indexIn(encodedUrl) != -1 277 matched = self.__regExp.indexIn(encodedUrl) != -1
272 elif self.__useDomainMatch: 278 elif self.__useDomainMatch:
273 matched = domain.endswith(self.__matchString) 279 matched = domain.endswith(self.__matchString)
274 elif self.__useEndsMatch: 280 elif self.__useEndsMatch:
275 if self.__caseSensitivity == Qt.CaseInsensitive: 281 if self.__caseSensitivity == Qt.CaseInsensitive:
276 matched = encodedUrl.lower().endswith(self.__matchString.lower()) 282 matched = encodedUrl.lower().endswith(
283 self.__matchString.lower())
277 else: 284 else:
278 matched = encodedUrl.endswith(self.__matchString) 285 matched = encodedUrl.endswith(self.__matchString)
279 else: 286 else:
280 if self.__caseSensitivity == Qt.CaseInsensitive: 287 if self.__caseSensitivity == Qt.CaseInsensitive:
281 matched = self.__matchString.lower() in encodedUrl.lower() 288 matched = self.__matchString.lower() in encodedUrl.lower()
538 @param wildcardPattern string containing the wildcard pattern (string) 545 @param wildcardPattern string containing the wildcard pattern (string)
539 @return string containing a regular expression (string) 546 @return string containing a regular expression (string)
540 """ 547 """
541 pattern = wildcardPattern 548 pattern = wildcardPattern
542 549
543 pattern = re.sub(r"\*+", "*", pattern) # remove multiple wildcards 550 # remove multiple wildcards
544 pattern = re.sub(r"\^\|$", "^", pattern) # remove anchors following separator 551 pattern = re.sub(r"\*+", "*", pattern)
545 # placeholder 552 # remove anchors following separator placeholder
546 pattern = re.sub(r"^(\*)", "", pattern) # remove leading wildcards 553 pattern = re.sub(r"\^\|$", "^", pattern)
547 pattern = re.sub(r"(\*)$", "", pattern) # remove trailing wildcards 554 # remove leading wildcards
548 pattern = re.sub(r"(\W)", r"\\\1", pattern) # escape special symbols 555 pattern = re.sub(r"^(\*)", "", pattern)
556 # remove trailing wildcards
557 pattern = re.sub(r"(\*)$", "", pattern)
558 # escape special symbols
559 pattern = re.sub(r"(\W)", r"\\\1", pattern)
560 # process extended anchor at expression start
549 pattern = re.sub(r"^\\\|\\\|", 561 pattern = re.sub(r"^\\\|\\\|",
550 r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) # process extended anchor at 562 r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
551 # expression start 563 # process separator placeholders
552 pattern = re.sub(r"\\\^", 564 pattern = re.sub(r"\\\^", r"(?:[^\w\d\-.%]|$)", pattern)
553 r"(?:[^\w\d\-.%]|$)", pattern) # process separator placeholders 565 # process anchor at expression start
554 pattern = re.sub(r"^\\\|", "^", pattern) # process anchor at expression start 566 pattern = re.sub(r"^\\\|", "^", pattern)
555 pattern = re.sub(r"\\\|$", "$", pattern) # process anchor at expression end 567 # process anchor at expression end
556 pattern = re.sub(r"\\\*", ".*", pattern) # replace wildcards by .* 568 pattern = re.sub(r"\\\|$", "$", pattern)
569 # replace wildcards by .*
570 pattern = re.sub(r"\\\*", ".*", pattern)
557 571
558 return pattern 572 return pattern

eric ide

mercurial