38 Public method to set the rule filter string. |
97 Public method to set the rule filter string. |
39 |
98 |
40 @param filter rule filter string (string) |
99 @param filter rule filter string (string) |
41 """ |
100 """ |
42 self.__filter = filter |
101 self.__filter = filter |
43 |
102 self.__parseFilter() |
44 self.__cssRule = False |
103 |
45 self.__enabled = True |
104 def __parseFilter(self): |
46 self.__exception = False |
105 """ |
47 regExpRule = False |
106 Private method to parse the filter pattern. |
48 |
107 """ |
49 if filter.startswith("!") or not filter.strip(): |
108 parsedLine = self.__filter |
|
109 |
|
110 # empty rule or just a comment |
|
111 if not parsedLine.strip() or parsedLine.startswith("!"): |
50 self.__enabled = False |
112 self.__enabled = False |
51 |
113 return |
52 if "##" in filter: |
114 |
|
115 # CSS element hiding rule |
|
116 if "##" in parsedLine: |
53 self.__cssRule = True |
117 self.__cssRule = True |
54 |
118 pos = parsedLine.find("##") |
55 parsedLine = filter |
119 |
|
120 # domain restricted rule |
|
121 if not parsedLine.startswith("##"): |
|
122 domains = parsedLine[:pos] |
|
123 self.__parseDomains(domains, ",") |
|
124 |
|
125 self.__cssSelector = parsedLine[pos + 2:] |
|
126 # CSS rule cannot have more options -> stop parsing |
|
127 return |
|
128 |
|
129 # Exception always starts with @@ |
56 if parsedLine.startswith("@@"): |
130 if parsedLine.startswith("@@"): |
57 self.__exception = True |
131 self.__exception = True |
58 parsedLine = parsedLine[2:] |
132 parsedLine = parsedLine[2:] |
59 if parsedLine.startswith("/"): |
133 |
60 if parsedLine.endswith("/"): |
134 # Parse all options following '$' character |
61 parsedLine = parsedLine[1:-1] |
135 optionsIndex = parsedLine.find("$") |
62 regExpRule = True |
136 if optionsIndex >= 0: |
63 |
137 options = parsedLine[optionsIndex + 1:].split(",") |
64 options = parsedLine.find("$") |
138 |
65 if options >= 0: |
139 handledOptions = 0 |
66 try: |
140 for option in options: |
67 self.__options = parsedLine[options + 1:].split(",") |
141 if option.startswith("domain="): |
68 except IndexError: |
142 self.__parseDomains(option[7:], "|") |
69 self.__options = [] |
143 handledOptions += 1 |
70 parsedLine = parsedLine[:options] |
144 elif option == "match-case": |
71 |
145 self.__caseSensitivity = Qt.CaseSensitive |
72 self.setPattern(parsedLine, regExpRule) |
146 handledOptions += 1 |
73 |
147 elif option.endswith("third-party"): |
74 if "match-case" in self.__options: |
148 self.__thirdParty = True |
75 self.__regExp.setCaseSensitivity(Qt.CaseSensitive) |
149 self.__thirdPartyException = option.startswith("~") |
76 self.__options.remove("match-case") |
150 handledOptions += 1 |
77 |
151 elif option.endswith("object"): |
78 def networkMatch(self, encodedUrl): |
152 self.__object = True |
|
153 self.__objectException = option.startswith("~") |
|
154 handledOptions += 1 |
|
155 elif option.endswith("subdocument"): |
|
156 self.__subdocument = True |
|
157 self.__subdocumentException = option.startswith("~") |
|
158 handledOptions += 1 |
|
159 elif option.endswith("xmlhttprequest"): |
|
160 self.__xmlhttprequest = True |
|
161 self.__xmlhttprequestException = option.startswith("~") |
|
162 handledOptions += 1 |
|
163 elif option == "document" and self.__exception: |
|
164 self.__document = True |
|
165 handledOptions += 1 |
|
166 elif option == "elemhide" and self.__exception: |
|
167 self.__elemhide = True |
|
168 handledOptions += 1 |
|
169 elif option == "collapse": |
|
170 # Hiding placeholders of blocked elements |
|
171 handledOptions += 1 |
|
172 |
|
173 # If we don't handle all options, it's safer to just disable this rule |
|
174 if handledOptions != len(options): |
|
175 self.__internalDisabled = True |
|
176 return |
|
177 |
|
178 parsedLine = parsedLine[:optionsIndex] |
|
179 |
|
180 # Rule is classic regexp |
|
181 if parsedLine.startswith("/") and parsedLine.endswith("/"): |
|
182 parsedLine = parsedLine[1:-1] |
|
183 self.__useRegExp = True |
|
184 self.__regExp = QRegExp(parsedLine, self.__caseSensitivity, |
|
185 QRegExp.RegExp) |
|
186 return |
|
187 |
|
188 # Remove starting / ending wildcards |
|
189 if parsedLine.startswith("*"): |
|
190 parsedLine = parsedLine[1:] |
|
191 if parsedLine.endswith("*"): |
|
192 parsedLine = parsedLine[:-1] |
|
193 |
|
194 # Fast string matching for domain can be used |
|
195 if parsedLine.startswith("||") and \ |
|
196 parsedLine.endswith("^") and \ |
|
197 QRegExp("[/:?=&\\*]").indexIn(parsedLine) == -1: |
|
198 parsedLine = parsedLine[2:-1] |
|
199 self.__useDomainMatch = True |
|
200 self.__matchString = parsedLine |
|
201 return |
|
202 |
|
203 # If rule contains '|' only at the end, string matching can be used |
|
204 if parsedLine.endswith("|") and \ |
|
205 QRegExp("[\\^\\*]").indexIn(parsedLine) == -1 and \ |
|
206 parsedLine.count("|") == 1: |
|
207 parsedLine = parsedLine[:-1] |
|
208 self.__useEndsMatch = True |
|
209 self.__matchString = parsedLine |
|
210 return |
|
211 |
|
212 # If there is still a wildcard (*) or separator (^) or (|), |
|
213 # the rule must be modified to comply with QRegExp. |
|
214 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: |
|
215 pattern = self.__convertPatternToRegExp(parsedLine) |
|
216 self.__useRegExp = True |
|
217 self.__regExp = QRegExp(pattern, self.__caseSensitivity, QRegExp.RegExp) |
|
218 return |
|
219 |
|
220 # no regexp required |
|
221 self.__useRegExp = False |
|
222 self.__matchString = parsedLine |
|
223 |
|
224 def __parseDomains(self, domains, separator): |
|
225 """ |
|
226 Private method to parse a string with a domain list. |
|
227 |
|
228 @param domains list of domains (string) |
|
229 @param separator separator character used by the list (string) |
|
230 """ |
|
231 domainsList = domains.split(separator) |
|
232 |
|
233 for domain in domainsList: |
|
234 if not domain: |
|
235 continue |
|
236 if domain.startswith("~"): |
|
237 self.__blockedDomains.append(domain[1:]) |
|
238 else: |
|
239 self.__allowedDomains.append(domain) |
|
240 |
|
241 self.__domainRestricted = \ |
|
242 bool(self.__blockedDomains) or bool(self.__allowedDomains) |
|
243 |
|
244 def networkMatch(self, request, domain, encodedUrl): |
79 """ |
245 """ |
80 Public method to check the rule for a match. |
246 Public method to check the rule for a match. |
81 |
247 |
|
248 @param request reference to the network request (QNetworkRequest) |
|
249 @param domain domain name (string) |
82 @param encodedUrl string encoded URL to be checked (string) |
250 @param encodedUrl string encoded URL to be checked (string) |
83 @return flag indicating a match (boolean) |
251 @return flag indicating a match (boolean) |
84 """ |
252 """ |
85 if self.__cssRule: |
253 if self.__cssRule or not self.__enabled or self.__internalDisabled: |
86 return False |
254 return False |
87 |
255 |
|
256 matched = False |
|
257 |
|
258 if self.__useRegExp: |
|
259 matched = self.__regExp.indexIn(encodedUrl) != -1 |
|
260 elif self.__useDomainMatch: |
|
261 matched = domain.endswith(self.__matchString) |
|
262 elif self.__useEndsMatch: |
|
263 if self.__caseSensitivity == Qt.CaseInsensitive: |
|
264 matched = encodedUrl.lower().endswith(self.__matchString.lower()) |
|
265 else: |
|
266 matched = encodedUrl.endswith(self.__matchString) |
|
267 else: |
|
268 if self.__caseSensitivity == Qt.CaseInsensitive: |
|
269 matched = self.__matchString.lower() in encodedUrl.lower() |
|
270 else: |
|
271 matched = self.__matchString in encodedUrl |
|
272 |
|
273 if matched: |
|
274 # check domain restrictions |
|
275 if self.__domainRestricted and not self.matchDomain(domain): |
|
276 return False |
|
277 |
|
278 # check third-party restrictions |
|
279 if self.__thirdParty and not self.matchThirdParty(request): |
|
280 return False |
|
281 |
|
282 # check object restrictions |
|
283 if self.__object and not self.matchObject(request): |
|
284 return False |
|
285 |
|
286 # check subdocument restrictions |
|
287 if self.__subdocument and not self.matchSubdocument(request): |
|
288 return False |
|
289 |
|
290 # check xmlhttprequest restriction |
|
291 if self.__xmlhttprequest and not self.matchXmlHttpRequest(request): |
|
292 return False |
|
293 |
|
294 return matched |
|
295 |
|
296 def urlMatch(self, url): |
|
297 """ |
|
298 Public method to check an URL against the rule. |
|
299 |
|
300 @param url URL to check (QUrl) |
|
301 @return flag indicating a match (boolean) |
|
302 """ |
|
303 if not self.__document and not self.__elemhide: |
|
304 return False |
|
305 |
|
306 encodedUrl = bytes(url.toEncoded()).decode() |
|
307 domain = url.host() |
|
308 return self.networkMatch(QNetworkRequest(url), domain, encodedUrl) |
|
309 |
|
310 def matchDomain(self, domain): |
|
311 """ |
|
312 Public method to match a domain. |
|
313 |
|
314 @param domain domain name to check (string) |
|
315 @return flag indicating a match (boolean) |
|
316 """ |
88 if not self.__enabled: |
317 if not self.__enabled: |
89 return False |
318 return False |
90 |
319 |
91 matched = self.__regExp.indexIn(encodedUrl) != -1 |
320 if not self.__domainRestricted: |
92 |
321 return True |
93 if matched and not len(self.__options) == 0: |
322 |
94 # only domain rules are supported |
323 if len(self.__blockedDomains) == 0: |
95 if len(self.__options) == 1: |
324 for dom in self.__allowedDomains: |
96 for option in self.__options: |
325 if domain.endswith(dom): |
97 if option.startswith("domain="): |
326 return True |
98 url = QUrl.fromEncoded(encodedUrl) |
327 elif len(self.__allowedDomains) == 0: |
99 host = url.host() |
328 for dom in self.__blockedDomains: |
100 domainOptions = option[7:].split("|") |
329 if domain.endswith(dom): |
101 for domainOption in domainOptions: |
330 return False |
102 negate = domainOption.startswith("~") |
331 return True |
103 if negate: |
332 else: |
104 domainOption = domainOption[1:] |
333 for dom in self.__blockedDomains: |
105 hostMatched = domainOption == host |
334 if domain.endswith(dom): |
106 if hostMatched and not negate: |
335 return False |
107 return True |
336 for dom in self.__allowedDomains: |
108 if not hostMatched and negate: |
337 if domain.endswith(dom): |
109 return True |
338 return True |
110 |
339 |
|
340 return False |
|
341 |
|
342 def matchThirdParty(self, req): |
|
343 """ |
|
344 Public slot to match a third-party rule. |
|
345 |
|
346 @param req request object to check (QNetworkRequest) |
|
347 @return flag indicating a match (boolean) |
|
348 """ |
|
349 referer = bytes(req.attribute(QNetworkRequest.User + 200, "")).decode() |
|
350 if referer == "": |
111 return False |
351 return False |
112 |
352 |
113 return matched |
353 # Third-party matching should be performed on second-level domains |
|
354 refererHost = toSecondLevelDomain(QUrl(referer)) |
|
355 host = toSecondLevelDomain(req.url()) |
|
356 |
|
357 match = refererHost != host |
|
358 |
|
359 if self.__thirdPartyException: |
|
360 return not match |
|
361 else: |
|
362 return match |
|
363 |
|
364 def matchObject(self, req): |
|
365 """ |
|
366 Public slot to match an object rule. |
|
367 |
|
368 @param req request object to check (QNetworkRequest) |
|
369 @return flag indicating a match (boolean) |
|
370 """ |
|
371 match = req.attribute(QNetworkRequest.User + 200) == "object" |
|
372 |
|
373 if self.__objectException: |
|
374 return not match |
|
375 else: |
|
376 return match |
|
377 |
|
378 def matchSubdocument(self, req): |
|
379 """ |
|
380 Public slot to match a sub-document rule. |
|
381 |
|
382 @param req request object to check (QNetworkRequest) |
|
383 @return flag indicating a match (boolean) |
|
384 """ |
|
385 originatingFrame = req.originatingObject() |
|
386 if originatingFrame is None: |
|
387 return False |
|
388 |
|
389 page = originatingFrame.page() |
|
390 if page is None: |
|
391 return False |
|
392 |
|
393 match = originatingFrame != page.mainFrame() |
|
394 |
|
395 if self.__subdocumentException: |
|
396 return not match |
|
397 else: |
|
398 return match |
|
399 |
|
400 def matchXmlHttpRequest(self, req): |
|
401 """ |
|
402 Public slot to match a XmlHttpRequest rule. |
|
403 |
|
404 @param req request object to check (QNetworkRequest) |
|
405 @return flag indicating a match (boolean) |
|
406 """ |
|
407 match = req.rawHeader("X-Request-With") == "XMLHttpRequest" |
|
408 |
|
409 if self.__xmlhttprequestException: |
|
410 return not match |
|
411 else: |
|
412 return match |
114 |
413 |
115 def isException(self): |
414 def isException(self): |
116 """ |
415 """ |
117 Public method to check, if the rule defines an exception. |
416 Public method to check, if the rule defines an exception. |
118 |
417 |
154 |
453 |
155 @return flag indicating a CSS rule (boolean) |
454 @return flag indicating a CSS rule (boolean) |
156 """ |
455 """ |
157 return self.__cssRule |
456 return self.__cssRule |
158 |
457 |
159 def regExpPattern(self): |
458 def cssSelector(self): |
160 """ |
459 """ |
161 Public method to get the regexp pattern of the rule. |
460 Public method to get the CSS selector of the rule. |
162 |
461 |
163 @return regexp pattern (QRegExp) |
462 @return CSS selector (string) |
164 """ |
463 """ |
165 return self.__regExp.pattern() |
464 return self.__cssSelector |
|
465 |
|
466 def isDocument(self): |
|
467 """ |
|
468 Public method to check, if this is a document rule. |
|
469 |
|
470 @return flag indicating a document rule (boolean) |
|
471 """ |
|
472 return self.__document |
|
473 |
|
474 def isElementHiding(self): |
|
475 """ |
|
476 Public method to check, if this is an element hiding rule. |
|
477 |
|
478 @return flag indicating an element hiding rule (boolean) |
|
479 """ |
|
480 return self.__elemhide |
|
481 |
|
482 def isDomainRestricted(self): |
|
483 """ |
|
484 Public method to check, if this rule is restricted by domain. |
|
485 |
|
486 @return flag indicating a domain restriction (boolean) |
|
487 """ |
|
488 return self.__domainRestricted |
|
489 |
|
490 def isComment(self): |
|
491 """ |
|
492 Public method to check, if this is a comment. |
|
493 |
|
494 @return flag indicating a comment (boolean) |
|
495 """ |
|
496 return self.__filter.startswith("!") |
|
497 |
|
498 def isSlow(self): |
|
499 """ |
|
500 Public method to check, if this is a slow rule. |
|
501 |
|
502 @return flag indicating a slow rule (boolean) |
|
503 """ |
|
504 return self.__useRegExp |
|
505 |
|
506 def isInternalDisabled(self): |
|
507 """ |
|
508 Public method to check, if this rule was disabled internally. |
|
509 |
|
510 @return flag indicating an internally disabled rule (boolean) |
|
511 """ |
|
512 return self.__internalDisabled |
166 |
513 |
167 def __convertPatternToRegExp(self, wildcardPattern): |
514 def __convertPatternToRegExp(self, wildcardPattern): |
168 """ |
515 """ |
169 Private method to convert a wildcard pattern to a regular expression. |
516 Private method to convert a wildcard pattern to a regular expression. |
170 |
517 |
171 @param wildcardPattern string containing the wildcard pattern (string) |
518 @param wildcardPattern string containing the wildcard pattern (string) |
172 @return string containing a regular expression (string) |
519 @return string containing a regular expression (string) |
173 """ |
520 """ |
174 pattern = wildcardPattern |
521 pattern = wildcardPattern |
175 |
522 |
176 pattern = re.sub(r"\*+", "*", pattern) # remove multiple wildcards |
523 pattern = re.sub(r"\*+", "*", pattern) # remove multiple wildcards |
177 pattern = re.sub(r"\^\|$", "^", pattern) # remove anchors following separator placeholder |
524 pattern = re.sub(r"\^\|$", "^", pattern) # remove anchors following separator |
178 pattern = re.sub(r"^(\*)", "", pattern) # remove leading wildcards |
525 # placeholder |
179 pattern = re.sub(r"(\*)$", "", pattern) # remove trailing wildcards |
526 pattern = re.sub(r"^(\*)", "", pattern) # remove leading wildcards |
180 pattern = re.sub(r"(\W)", r"\\\1", pattern) # escape special symbols |
527 pattern = re.sub(r"(\*)$", "", pattern) # remove trailing wildcards |
|
528 pattern = re.sub(r"(\W)", r"\\\1", pattern) # escape special symbols |
181 pattern = re.sub(r"^\\\|\\\|", |
529 pattern = re.sub(r"^\\\|\\\|", |
182 r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) # process extended anchor at expression start |
530 r"^[\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern) # process extended anchor at |
|
531 # expression start |
183 pattern = re.sub(r"\\\^", |
532 pattern = re.sub(r"\\\^", |
184 r"(?:[^\w\d\-.%]|$)", pattern) # process separator placeholders |
533 r"(?:[^\w\d\-.%]|$)", pattern) # process separator placeholders |
185 pattern = re.sub(r"^\\\|", "^", pattern) # process anchor at expression start |
534 pattern = re.sub(r"^\\\|", "^", pattern) # process anchor at expression start |
186 pattern = re.sub(r"\\\|$", "$", pattern) # process anchor at expression end |
535 pattern = re.sub(r"\\\|$", "$", pattern) # process anchor at expression end |
187 pattern = re.sub(r"\\\*", ".*", pattern) # replace wildcards by .* |
536 pattern = re.sub(r"\\\*", ".*", pattern) # replace wildcards by .* |
188 |
537 |
189 return pattern |
538 return pattern |
190 |
|
191 def setPattern(self, pattern, isRegExp): |
|
192 """ |
|
193 Public method to set the rule pattern. |
|
194 |
|
195 @param pattern string containing the pattern (string) |
|
196 @param isRegExp flag indicating a reg exp pattern (boolean) |
|
197 """ |
|
198 if isRegExp: |
|
199 self.__regExp = QRegExp(pattern, Qt.CaseInsensitive, QRegExp.RegExp2) |
|
200 else: |
|
201 self.__regExp = QRegExp(self.__convertPatternToRegExp(pattern), |
|
202 Qt.CaseInsensitive, QRegExp.RegExp2) |
|