5 |
5 |
6 """ |
6 """ |
7 Module implementing the AdBlock rule class. |
7 Module implementing the AdBlock rule class. |
8 """ |
8 """ |
9 |
9 |
10 |
|
11 import re |
10 import re |
12 |
|
13 from enum import IntEnum |
11 from enum import IntEnum |
14 |
12 |
15 from PyQt5.QtCore import Qt, QRegExp |
13 from PyQt5.QtCore import Qt |
16 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo |
14 from PyQt5.QtWebEngineCore import QWebEngineUrlRequestInfo |
17 |
15 |
18 |
16 |
19 def toSecondLevelDomain(url): |
17 def toSecondLevelDomain(url): |
20 """ |
18 """ |
274 |
272 |
275 # Rule is classic regexp |
273 # Rule is classic regexp |
276 if parsedLine.startswith("/") and parsedLine.endswith("/"): |
274 if parsedLine.startswith("/") and parsedLine.endswith("/"): |
277 parsedLine = parsedLine[1:-1] |
275 parsedLine = parsedLine[1:-1] |
278 self.__type = AdBlockRuleType.RegExpMatchRule |
276 self.__type = AdBlockRuleType.RegExpMatchRule |
279 self.__regExp = QRegExp(parsedLine, self.__caseSensitivity, |
277 if self.__caseSensitivity: |
280 QRegExp.RegExp) |
278 self.__regExp = re.compile(parsedLine) |
|
279 else: |
|
280 self.__regExp = re.compile(parsedLine, re.IGNORECASE) |
281 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
281 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
282 return |
282 return |
283 |
283 |
284 # Remove starting / ending wildcards (*) |
284 # Remove starting / ending wildcards (*) |
285 if parsedLine.startswith("*"): |
285 if parsedLine.startswith("*"): |
300 self.__type = AdBlockRuleType.StringEndsMatchRule |
300 self.__type = AdBlockRuleType.StringEndsMatchRule |
301 self.__matchString = parsedLine |
301 self.__matchString = parsedLine |
302 return |
302 return |
303 |
303 |
304 # If there is still a wildcard (*) or separator (^) or (|), |
304 # If there is still a wildcard (*) or separator (^) or (|), |
305 # the rule must be modified to comply with QRegExp. |
305 # the rule must be modified to comply with re. |
306 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: |
306 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: |
307 self.__type = AdBlockRuleType.RegExpMatchRule |
307 self.__type = AdBlockRuleType.RegExpMatchRule |
308 pattern = self.__convertPatternToRegExp(parsedLine) |
308 pattern = self.__convertPatternToRegExp(parsedLine) |
309 self.__regExp = QRegExp(pattern, self.__caseSensitivity, |
309 if self.__caseSensitivity: |
310 QRegExp.RegExp) |
310 self.__regExp = re.compile(pattern) |
|
311 else: |
|
312 self.__regExp = re.compile(pattern, re.IGNORECASE) |
311 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
313 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) |
312 return |
314 return |
313 |
315 |
314 # This rule matches all URLs |
316 # This rule matches all URLs |
315 if len(parsedLine) == 0: |
317 if len(parsedLine) == 0: |
503 matched = encodedUrl.endswith(self.__matchString) |
505 matched = encodedUrl.endswith(self.__matchString) |
504 elif self.__type == AdBlockRuleType.RegExpMatchRule: |
506 elif self.__type == AdBlockRuleType.RegExpMatchRule: |
505 if not self.__isMatchingRegExpStrings(encodedUrl): |
507 if not self.__isMatchingRegExpStrings(encodedUrl): |
506 matched = False |
508 matched = False |
507 else: |
509 else: |
508 matched = self.__regExp.indexIn(encodedUrl) != -1 |
510 matched = self.__regExp.search(encodedUrl) is not None |
509 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: |
511 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: |
510 matched = True |
512 matched = True |
511 |
513 |
512 return matched |
514 return matched |
513 |
515 |
1151 Public method to get the regular expression and associated string |
1153 Public method to get the regular expression and associated string |
1152 matchers. |
1154 matchers. |
1153 |
1155 |
1154 @return tuple containing the regular expression and the list of |
1156 @return tuple containing the regular expression and the list of |
1155 string matchers |
1157 string matchers |
1156 @rtype tuple of (QRegExp, list of str) |
1158 @rtype tuple of (re.Pattern, list of str) |
1157 """ |
1159 """ |
1158 if self.__regExp is not None: |
1160 if self.__regExp is not None: |
1159 return (QRegExp(self.__regExp), self.__stringMatchers[:]) |
1161 return (re.compile(self.__regExp.pattern), |
|
1162 self.__stringMatchers[:]) |
1160 else: |
1163 else: |
1161 return (None, []) |
1164 return (None, []) |
1162 |
1165 |
1163 def copyFrom(self, other): |
1166 def copyFrom(self, other): |
1164 """ |
1167 """ |