eric6/WebBrowser/AdBlock/AdBlockRule.py

changeset 7775
4a1db75550bd
parent 7773
fe42bd17d4fe
child 7923
91e843545d9a
equal deleted inserted replaced
7774:9eed155411f0 7775:4a1db75550bd
5 5
6 """ 6 """
7 Module implementing the AdBlock rule class. 7 Module implementing the AdBlock rule class.
8 """ 8 """
9 9
10
11 import re 10 import re
12
13 from enum import IntEnum 11 from enum import IntEnum
14 12
15 from PyQt5.QtCore import Qt, QRegExp 13 from PyQt5.QtCore import Qt
16 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo 14 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo
17 15
18 16
19 def toSecondLevelDomain(url): 17 def toSecondLevelDomain(url):
20 """ 18 """
274 272
275 # Rule is classic regexp 273 # Rule is classic regexp
276 if parsedLine.startswith("/") and parsedLine.endswith("/"): 274 if parsedLine.startswith("/") and parsedLine.endswith("/"):
277 parsedLine = parsedLine[1:-1] 275 parsedLine = parsedLine[1:-1]
278 self.__type = AdBlockRuleType.RegExpMatchRule 276 self.__type = AdBlockRuleType.RegExpMatchRule
279 self.__regExp = QRegExp(parsedLine, self.__caseSensitivity, 277 if self.__caseSensitivity:
280 QRegExp.RegExp) 278 self.__regExp = re.compile(parsedLine)
279 else:
280 self.__regExp = re.compile(parsedLine, re.IGNORECASE)
281 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) 281 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
282 return 282 return
283 283
284 # Remove starting / ending wildcards (*) 284 # Remove starting / ending wildcards (*)
285 if parsedLine.startswith("*"): 285 if parsedLine.startswith("*"):
300 self.__type = AdBlockRuleType.StringEndsMatchRule 300 self.__type = AdBlockRuleType.StringEndsMatchRule
301 self.__matchString = parsedLine 301 self.__matchString = parsedLine
302 return 302 return
303 303
304 # If there is still a wildcard (*) or separator (^) or (|), 304 # If there is still a wildcard (*) or separator (^) or (|),
305 # the rule must be modified to comply with QRegExp. 305 # the rule must be modified to comply with re.
306 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: 306 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
307 self.__type = AdBlockRuleType.RegExpMatchRule 307 self.__type = AdBlockRuleType.RegExpMatchRule
308 pattern = self.__convertPatternToRegExp(parsedLine) 308 pattern = self.__convertPatternToRegExp(parsedLine)
309 self.__regExp = QRegExp(pattern, self.__caseSensitivity, 309 if self.__caseSensitivity:
310 QRegExp.RegExp) 310 self.__regExp = re.compile(pattern)
311 else:
312 self.__regExp = re.compile(pattern, re.IGNORECASE)
311 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) 313 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
312 return 314 return
313 315
314 # This rule matches all URLs 316 # This rule matches all URLs
315 if len(parsedLine) == 0: 317 if len(parsedLine) == 0:
503 matched = encodedUrl.endswith(self.__matchString) 505 matched = encodedUrl.endswith(self.__matchString)
504 elif self.__type == AdBlockRuleType.RegExpMatchRule: 506 elif self.__type == AdBlockRuleType.RegExpMatchRule:
505 if not self.__isMatchingRegExpStrings(encodedUrl): 507 if not self.__isMatchingRegExpStrings(encodedUrl):
506 matched = False 508 matched = False
507 else: 509 else:
508 matched = self.__regExp.indexIn(encodedUrl) != -1 510 matched = self.__regExp.search(encodedUrl) is not None
509 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: 511 elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
510 matched = True 512 matched = True
511 513
512 return matched 514 return matched
513 515
1151 Public method to get the regular expression and associated string 1153 Public method to get the regular expression and associated string
1152 matchers. 1154 matchers.
1153 1155
1154 @return tuple containing the regular expression and the list of 1156 @return tuple containing the regular expression and the list of
1155 string matchers 1157 string matchers
1156 @rtype tuple of (QRegExp, list of str) 1158 @rtype tuple of (re.Pattern, list of str)
1157 """ 1159 """
1158 if self.__regExp is not None: 1160 if self.__regExp is not None:
1159 return (QRegExp(self.__regExp), self.__stringMatchers[:]) 1161 return (re.compile(self.__regExp.pattern),
1162 self.__stringMatchers[:])
1160 else: 1163 else:
1161 return (None, []) 1164 return (None, [])
1162 1165
1163 def copyFrom(self, other): 1166 def copyFrom(self, other):
1164 """ 1167 """

eric ide

mercurial