src/eric7/WebBrowser/AdBlock/AdBlockRule.py

branch
eric7
changeset 9221
bf71ee032bb4
parent 9209
b99e7fd55fd3
child 9413
80c06d472826
equal deleted inserted replaced
9220:e9e7eca7efee 9221:bf71ee032bb4
17 17
18 18
19 def toSecondLevelDomain(url): 19 def toSecondLevelDomain(url):
20 """ 20 """
21 Module function to get a second level domain from the given URL. 21 Module function to get a second level domain from the given URL.
22 22
23 @param url URL to extract domain from 23 @param url URL to extract domain from
24 @type QUrl 24 @type QUrl
25 @return name of second level domain 25 @return name of second level domain
26 @rtype str 26 @rtype str
27 """ 27 """
28 topLevelDomain = EricTldExtractor.instance().tld(url.host()) 28 topLevelDomain = EricTldExtractor.instance().tld(url.host())
29 urlHost = url.host() 29 urlHost = url.host()
30 30
31 if not topLevelDomain or not urlHost: 31 if not topLevelDomain or not urlHost:
32 return "" 32 return ""
33 33
34 domain = urlHost[:len(urlHost) - len(topLevelDomain)] 34 domain = urlHost[: len(urlHost) - len(topLevelDomain)]
35 if domain.count(".") == 0: 35 if domain.count(".") == 0:
36 return urlHost 36 return urlHost
37 37
38 while domain.count(".") != 0: 38 while domain.count(".") != 0:
39 domain = domain[domain.find(".") + 1:] 39 domain = domain[domain.find(".") + 1 :]
40 40
41 return domain + topLevelDomain 41 return domain + topLevelDomain
42 42
43 43
44 class AdBlockRuleType(IntEnum): 44 class AdBlockRuleType(IntEnum):
45 """ 45 """
46 Class implementing the rule type enum. 46 Class implementing the rule type enum.
47 """ 47 """
48
48 CssRule = 0 49 CssRule = 0
49 DomainMatchRule = 1 50 DomainMatchRule = 1
50 RegExpMatchRule = 2 51 RegExpMatchRule = 2
51 StringEndsMatchRule = 3 52 StringEndsMatchRule = 3
52 StringContainsMatchRule = 4 53 StringContainsMatchRule = 4
56 57
57 class AdBlockRuleOption(IntEnum): 58 class AdBlockRuleOption(IntEnum):
58 """ 59 """
59 Class implementing the rule option enum. 60 Class implementing the rule option enum.
60 """ 61 """
62
61 NoOption = 0 63 NoOption = 0
62 DomainRestrictedOption = 1 64 DomainRestrictedOption = 1
63 ThirdPartyOption = 2 65 ThirdPartyOption = 2
64 ObjectOption = 4 66 ObjectOption = 4
65 SubdocumentOption = 8 67 SubdocumentOption = 8
70 ObjectSubrequestOption = 256 72 ObjectSubrequestOption = 256
71 PingOption = 512 73 PingOption = 512
72 MediaOption = 1024 74 MediaOption = 1024
73 FontOption = 2048 75 FontOption = 2048
74 OtherOption = 4096 76 OtherOption = 4096
75 77
76 # Exception only options 78 # Exception only options
77 DocumentOption = 8192 79 DocumentOption = 8192
78 ElementHideOption = 16384 80 ElementHideOption = 16384
79 81
80 82
81 class AdBlockRule: 83 class AdBlockRule:
82 """ 84 """
83 Class implementing the AdBlock rule. 85 Class implementing the AdBlock rule.
84 """ 86 """
87
85 def __init__(self, filterRule="", subscription=None): 88 def __init__(self, filterRule="", subscription=None):
86 """ 89 """
87 Constructor 90 Constructor
88 91
89 @param filterRule filter string of the rule 92 @param filterRule filter string of the rule
90 @type str 93 @type str
91 @param subscription reference to the subscription object 94 @param subscription reference to the subscription object
92 @type AdBlockSubscription 95 @type AdBlockSubscription
93 """ 96 """
94 self.__subscription = subscription 97 self.__subscription = subscription
95 98
96 self.__regExp = None 99 self.__regExp = None
97 self.__stringMatchers = [] 100 self.__stringMatchers = []
98 101
99 self.__blockedDomains = [] 102 self.__blockedDomains = []
100 self.__allowedDomains = [] 103 self.__allowedDomains = []
101 104
102 self.__isEnabled = True 105 self.__isEnabled = True
103 self.__isException = False 106 self.__isException = False
104 self.__isInternalDisabled = False 107 self.__isInternalDisabled = False
105 self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive 108 self.__caseSensitivity = Qt.CaseSensitivity.CaseInsensitive
106 109
107 self.__type = AdBlockRuleType.StringContainsMatchRule 110 self.__type = AdBlockRuleType.StringContainsMatchRule
108 self.__options = AdBlockRuleOption.NoOption 111 self.__options = AdBlockRuleOption.NoOption
109 self.__exceptions = AdBlockRuleOption.NoOption 112 self.__exceptions = AdBlockRuleOption.NoOption
110 113
111 self.setFilter(filterRule) 114 self.setFilter(filterRule)
112 115
113 def subscription(self): 116 def subscription(self):
114 """ 117 """
115 Public method to get the subscription this rule belongs to. 118 Public method to get the subscription this rule belongs to.
116 119
117 @return subscription of the rule 120 @return subscription of the rule
118 @rtype AdBlockSubscription 121 @rtype AdBlockSubscription
119 """ 122 """
120 return self.__subscription 123 return self.__subscription
121 124
122 def setSubscription(self, subscription): 125 def setSubscription(self, subscription):
123 """ 126 """
124 Public method to set the subscription this rule belongs to. 127 Public method to set the subscription this rule belongs to.
125 128
126 @param subscription subscription of the rule 129 @param subscription subscription of the rule
127 @type AdBlockSubscription 130 @type AdBlockSubscription
128 """ 131 """
129 self.__subscription = subscription 132 self.__subscription = subscription
130 133
131 def filter(self): 134 def filter(self):
132 """ 135 """
133 Public method to get the rule filter string. 136 Public method to get the rule filter string.
134 137
135 @return rule filter string 138 @return rule filter string
136 @rtype str 139 @rtype str
137 """ 140 """
138 return self.__filter 141 return self.__filter
139 142
140 def setFilter(self, filterRule): 143 def setFilter(self, filterRule):
141 """ 144 """
142 Public method to set the rule filter string. 145 Public method to set the rule filter string.
143 146
144 @param filterRule rule filter string 147 @param filterRule rule filter string
145 @type str 148 @type str
146 """ 149 """
147 self.__filter = filterRule 150 self.__filter = filterRule
148 self.__parseFilter() 151 self.__parseFilter()
149 152
150 def __parseFilter(self): 153 def __parseFilter(self):
151 """ 154 """
152 Private method to parse the filter pattern. 155 Private method to parse the filter pattern.
153 """ 156 """
154 parsedLine = self.__filter 157 parsedLine = self.__filter
155 158
156 # empty rule or just a comment 159 # empty rule or just a comment
157 if not parsedLine.strip() or parsedLine.startswith("!"): 160 if not parsedLine.strip() or parsedLine.startswith("!"):
158 self.__isEnabled = False 161 self.__isEnabled = False
159 self.__isInternalDisabled = True 162 self.__isInternalDisabled = True
160 self.__type = AdBlockRuleType.Invalid 163 self.__type = AdBlockRuleType.Invalid
161 return 164 return
162 165
163 # CSS element hiding rule 166 # CSS element hiding rule
164 if "##" in parsedLine or "#@#" in parsedLine: 167 if "##" in parsedLine or "#@#" in parsedLine:
165 self.__type = AdBlockRuleType.CssRule 168 self.__type = AdBlockRuleType.CssRule
166 pos = parsedLine.find("#") 169 pos = parsedLine.find("#")
167 170
168 # domain restricted rule 171 # domain restricted rule
169 if not parsedLine.startswith("##"): 172 if not parsedLine.startswith("##"):
170 domains = parsedLine[:pos] 173 domains = parsedLine[:pos]
171 self.__parseDomains(domains, ",") 174 self.__parseDomains(domains, ",")
172 175
173 self.__isException = parsedLine[pos + 1] == "@" 176 self.__isException = parsedLine[pos + 1] == "@"
174 if self.__isException: 177 if self.__isException:
175 self.__matchString = parsedLine[pos + 3:] 178 self.__matchString = parsedLine[pos + 3 :]
176 else: 179 else:
177 self.__matchString = parsedLine[pos + 2:] 180 self.__matchString = parsedLine[pos + 2 :]
178 181
179 # CSS rule cannot have more options -> stop parsing 182 # CSS rule cannot have more options -> stop parsing
180 return 183 return
181 184
182 # Exception always starts with @@ 185 # Exception always starts with @@
183 if parsedLine.startswith("@@"): 186 if parsedLine.startswith("@@"):
184 self.__isException = True 187 self.__isException = True
185 parsedLine = parsedLine[2:] 188 parsedLine = parsedLine[2:]
186 189
187 # Parse all options following '$' character 190 # Parse all options following '$' character
188 optionsIndex = parsedLine.find("$") 191 optionsIndex = parsedLine.find("$")
189 if optionsIndex >= 0: 192 if optionsIndex >= 0:
190 options = [opt 193 options = [opt for opt in parsedLine[optionsIndex + 1 :].split(",") if opt]
191 for opt in parsedLine[optionsIndex + 1:].split(",") 194
192 if opt]
193
194 handledOptions = 0 195 handledOptions = 0
195 for option in options: 196 for option in options:
196 if option.startswith("domain="): 197 if option.startswith("domain="):
197 self.__parseDomains(option[7:], "|") 198 self.__parseDomains(option[7:], "|")
198 handledOptions += 1 199 handledOptions += 1
199 elif option == "match-case": 200 elif option == "match-case":
200 self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive 201 self.__caseSensitivity = Qt.CaseSensitivity.CaseSensitive
201 handledOptions += 1 202 handledOptions += 1
202 elif option.endswith("third-party"): 203 elif option.endswith("third-party"):
203 self.setOption(AdBlockRuleOption.ThirdPartyOption) 204 self.setOption(AdBlockRuleOption.ThirdPartyOption)
204 self.__setException(AdBlockRuleOption.ThirdPartyOption, 205 self.__setException(
205 option.startswith("~")) 206 AdBlockRuleOption.ThirdPartyOption, option.startswith("~")
207 )
206 handledOptions += 1 208 handledOptions += 1
207 elif option.endswith("object"): 209 elif option.endswith("object"):
208 self.setOption(AdBlockRuleOption.ObjectOption) 210 self.setOption(AdBlockRuleOption.ObjectOption)
209 self.__setException(AdBlockRuleOption.ObjectOption, 211 self.__setException(
210 option.startswith("~")) 212 AdBlockRuleOption.ObjectOption, option.startswith("~")
213 )
211 handledOptions += 1 214 handledOptions += 1
212 elif option.endswith("subdocument"): 215 elif option.endswith("subdocument"):
213 self.setOption(AdBlockRuleOption.SubdocumentOption) 216 self.setOption(AdBlockRuleOption.SubdocumentOption)
214 self.__setException(AdBlockRuleOption.SubdocumentOption, 217 self.__setException(
215 option.startswith("~")) 218 AdBlockRuleOption.SubdocumentOption, option.startswith("~")
219 )
216 handledOptions += 1 220 handledOptions += 1
217 elif option.endswith("xmlhttprequest"): 221 elif option.endswith("xmlhttprequest"):
218 self.setOption(AdBlockRuleOption.XMLHttpRequestOption) 222 self.setOption(AdBlockRuleOption.XMLHttpRequestOption)
219 self.__setException(AdBlockRuleOption.XMLHttpRequestOption, 223 self.__setException(
220 option.startswith("~")) 224 AdBlockRuleOption.XMLHttpRequestOption, option.startswith("~")
225 )
221 handledOptions += 1 226 handledOptions += 1
222 elif option.endswith("image"): 227 elif option.endswith("image"):
223 self.setOption(AdBlockRuleOption.ImageOption) 228 self.setOption(AdBlockRuleOption.ImageOption)
224 self.__setException(AdBlockRuleOption.ImageOption, 229 self.__setException(
225 option.startswith("~")) 230 AdBlockRuleOption.ImageOption, option.startswith("~")
231 )
226 elif option.endswith("script"): 232 elif option.endswith("script"):
227 self.setOption(AdBlockRuleOption.ScriptOption) 233 self.setOption(AdBlockRuleOption.ScriptOption)
228 self.__setException(AdBlockRuleOption.ScriptOption, 234 self.__setException(
229 option.startswith("~")) 235 AdBlockRuleOption.ScriptOption, option.startswith("~")
236 )
230 elif option.endswith("stylesheet"): 237 elif option.endswith("stylesheet"):
231 self.setOption(AdBlockRuleOption.StyleSheetOption) 238 self.setOption(AdBlockRuleOption.StyleSheetOption)
232 self.__setException(AdBlockRuleOption.StyleSheetOption, 239 self.__setException(
233 option.startswith("~")) 240 AdBlockRuleOption.StyleSheetOption, option.startswith("~")
241 )
234 elif option.endswith("object-subrequest"): 242 elif option.endswith("object-subrequest"):
235 self.setOption(AdBlockRuleOption.ObjectSubrequestOption) 243 self.setOption(AdBlockRuleOption.ObjectSubrequestOption)
236 self.__setException( 244 self.__setException(
237 AdBlockRuleOption.ObjectSubrequestOption, 245 AdBlockRuleOption.ObjectSubrequestOption, option.startswith("~")
238 option.startswith("~")) 246 )
239 elif option.endswith("ping"): 247 elif option.endswith("ping"):
240 self.setOption(AdBlockRuleOption.PingOption) 248 self.setOption(AdBlockRuleOption.PingOption)
241 self.__setException(AdBlockRuleOption.PingOption, 249 self.__setException(
242 option.startswith("~")) 250 AdBlockRuleOption.PingOption, option.startswith("~")
251 )
243 elif option.endswith("media"): 252 elif option.endswith("media"):
244 self.setOption(AdBlockRuleOption.MediaOption) 253 self.setOption(AdBlockRuleOption.MediaOption)
245 self.__setException(AdBlockRuleOption.MediaOption, 254 self.__setException(
246 option.startswith("~")) 255 AdBlockRuleOption.MediaOption, option.startswith("~")
256 )
247 elif option.endswith("font"): 257 elif option.endswith("font"):
248 self.setOption(AdBlockRuleOption.FontOption) 258 self.setOption(AdBlockRuleOption.FontOption)
249 self.__setException(AdBlockRuleOption.FontOption, 259 self.__setException(
250 option.startswith("~")) 260 AdBlockRuleOption.FontOption, option.startswith("~")
261 )
251 elif option.endswith("other"): 262 elif option.endswith("other"):
252 self.setOption(AdBlockRuleOption.OtherOption) 263 self.setOption(AdBlockRuleOption.OtherOption)
253 self.__setException(AdBlockRuleOption.OtherOption, 264 self.__setException(
254 option.startswith("~")) 265 AdBlockRuleOption.OtherOption, option.startswith("~")
266 )
255 elif option == "document" and self.__isException: 267 elif option == "document" and self.__isException:
256 self.setOption(AdBlockRuleOption.DocumentOption) 268 self.setOption(AdBlockRuleOption.DocumentOption)
257 handledOptions += 1 269 handledOptions += 1
258 elif option == "elemhide" and self.__isException: 270 elif option == "elemhide" and self.__isException:
259 self.setOption(AdBlockRuleOption.ElementHideOption) 271 self.setOption(AdBlockRuleOption.ElementHideOption)
260 handledOptions += 1 272 handledOptions += 1
261 elif option == "collapse": 273 elif option == "collapse":
262 # Hiding placeholders of blocked elements is enabled by 274 # Hiding placeholders of blocked elements is enabled by
263 # default 275 # default
264 handledOptions += 1 276 handledOptions += 1
265 277
266 # If we don't handle all options, it's safer to just disable 278 # If we don't handle all options, it's safer to just disable
267 # this rule 279 # this rule
268 if handledOptions != len(options): 280 if handledOptions != len(options):
269 self.__isInternalDisabled = True 281 self.__isInternalDisabled = True
270 self.__type = AdBlockRuleType.Invalid 282 self.__type = AdBlockRuleType.Invalid
271 return 283 return
272 284
273 parsedLine = parsedLine[:optionsIndex] 285 parsedLine = parsedLine[:optionsIndex]
274 286
275 # Rule is classic regexp 287 # Rule is classic regexp
276 if parsedLine.startswith("/") and parsedLine.endswith("/"): 288 if parsedLine.startswith("/") and parsedLine.endswith("/"):
277 parsedLine = parsedLine[1:-1] 289 parsedLine = parsedLine[1:-1]
278 self.__type = AdBlockRuleType.RegExpMatchRule 290 self.__type = AdBlockRuleType.RegExpMatchRule
279 if self.__caseSensitivity: 291 if self.__caseSensitivity:
280 self.__regExp = re.compile(parsedLine) 292 self.__regExp = re.compile(parsedLine)
281 else: 293 else:
282 self.__regExp = re.compile(parsedLine, re.IGNORECASE) 294 self.__regExp = re.compile(parsedLine, re.IGNORECASE)
283 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) 295 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
284 return 296 return
285 297
286 # Remove starting / ending wildcards (*) 298 # Remove starting / ending wildcards (*)
287 if parsedLine.startswith("*"): 299 if parsedLine.startswith("*"):
288 parsedLine = parsedLine[1:] 300 parsedLine = parsedLine[1:]
289 if parsedLine.endswith("*"): 301 if parsedLine.endswith("*"):
290 parsedLine = parsedLine[:-1] 302 parsedLine = parsedLine[:-1]
291 303
292 # Fast string matching for domain here 304 # Fast string matching for domain here
293 if self.__filterIsOnlyDomain(parsedLine): 305 if self.__filterIsOnlyDomain(parsedLine):
294 parsedLine = parsedLine[2:-1] 306 parsedLine = parsedLine[2:-1]
295 self.__type = AdBlockRuleType.DomainMatchRule 307 self.__type = AdBlockRuleType.DomainMatchRule
296 self.__matchString = parsedLine 308 self.__matchString = parsedLine
297 return 309 return
298 310
299 # If rule contains '|' only at the end, string matching can be used 311 # If rule contains '|' only at the end, string matching can be used
300 if self.__filterIsOnlyEndsMatch(parsedLine): 312 if self.__filterIsOnlyEndsMatch(parsedLine):
301 parsedLine = parsedLine[:-1] 313 parsedLine = parsedLine[:-1]
302 self.__type = AdBlockRuleType.StringEndsMatchRule 314 self.__type = AdBlockRuleType.StringEndsMatchRule
303 self.__matchString = parsedLine 315 self.__matchString = parsedLine
304 return 316 return
305 317
306 # If there is still a wildcard (*) or separator (^) or (|), 318 # If there is still a wildcard (*) or separator (^) or (|),
307 # the rule must be modified to comply with re. 319 # the rule must be modified to comply with re.
308 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine: 320 if "*" in parsedLine or "^" in parsedLine or "|" in parsedLine:
309 self.__type = AdBlockRuleType.RegExpMatchRule 321 self.__type = AdBlockRuleType.RegExpMatchRule
310 pattern = self.__convertPatternToRegExp(parsedLine) 322 pattern = self.__convertPatternToRegExp(parsedLine)
312 self.__regExp = re.compile(pattern) 324 self.__regExp = re.compile(pattern)
313 else: 325 else:
314 self.__regExp = re.compile(pattern, re.IGNORECASE) 326 self.__regExp = re.compile(pattern, re.IGNORECASE)
315 self.__stringMatchers = self.__parseRegExpFilter(parsedLine) 327 self.__stringMatchers = self.__parseRegExpFilter(parsedLine)
316 return 328 return
317 329
318 # This rule matches all URLs 330 # This rule matches all URLs
319 if len(parsedLine) == 0: 331 if len(parsedLine) == 0:
320 if self.__options == AdBlockRuleOption.NoOption: 332 if self.__options == AdBlockRuleOption.NoOption:
321 self.__isInternalDisabled = True 333 self.__isInternalDisabled = True
322 self.__type = AdBlockRuleType.Invalid 334 self.__type = AdBlockRuleType.Invalid
323 return 335 return
324 336
325 self.__type = AdBlockRuleType.MatchAllUrlsRule 337 self.__type = AdBlockRuleType.MatchAllUrlsRule
326 return 338 return
327 339
328 # no regexp required 340 # no regexp required
329 self.__type = AdBlockRuleType.StringContainsMatchRule 341 self.__type = AdBlockRuleType.StringContainsMatchRule
330 self.__matchString = parsedLine 342 self.__matchString = parsedLine
331 343
332 def __parseDomains(self, domains, separator): 344 def __parseDomains(self, domains, separator):
333 """ 345 """
334 Private method to parse a string with a domain list. 346 Private method to parse a string with a domain list.
335 347
336 @param domains list of domains 348 @param domains list of domains
337 @type str 349 @type str
338 @param separator separator character used by the list 350 @param separator separator character used by the list
339 @type str 351 @type str
340 """ 352 """
341 domainsList = [d for d in domains.split(separator) if d] 353 domainsList = [d for d in domains.split(separator) if d]
342 354
343 for domain in domainsList: 355 for domain in domainsList:
344 if not domain: 356 if not domain:
345 continue 357 continue
346 if domain.startswith("~"): 358 if domain.startswith("~"):
347 self.__blockedDomains.append(domain[1:]) 359 self.__blockedDomains.append(domain[1:])
348 else: 360 else:
349 self.__allowedDomains.append(domain) 361 self.__allowedDomains.append(domain)
350 362
351 if bool(self.__blockedDomains) or bool(self.__allowedDomains): 363 if bool(self.__blockedDomains) or bool(self.__allowedDomains):
352 self.setOption(AdBlockRuleOption.DomainRestrictedOption) 364 self.setOption(AdBlockRuleOption.DomainRestrictedOption)
353 365
354 def networkMatch(self, request, domain, encodedUrl): 366 def networkMatch(self, request, domain, encodedUrl):
355 """ 367 """
356 Public method to check the rule for a match. 368 Public method to check the rule for a match.
357 369
358 @param request reference to the network request 370 @param request reference to the network request
359 @type QWebEngineUrlRequestInfo 371 @type QWebEngineUrlRequestInfo
360 @param domain domain name 372 @param domain domain name
361 @type str 373 @type str
362 @param encodedUrl string encoded URL to be checked 374 @param encodedUrl string encoded URL to be checked
363 @type str 375 @type str
364 @return flag indicating a match 376 @return flag indicating a match
365 @rtype bool 377 @rtype bool
366 """ 378 """
367 if ( 379 if (
368 self.__type == AdBlockRuleType.CssRule or 380 self.__type == AdBlockRuleType.CssRule
369 not self.__isEnabled or 381 or not self.__isEnabled
370 self.__isInternalDisabled 382 or self.__isInternalDisabled
371 ): 383 ):
372 return False 384 return False
373 385
374 matched = self.__stringMatch(domain, encodedUrl) 386 matched = self.__stringMatch(domain, encodedUrl)
375 387
376 if matched: 388 if matched:
377 # check domain restrictions 389 # check domain restrictions
378 if ( 390 if self.__hasOption(
379 self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) and 391 AdBlockRuleOption.DomainRestrictedOption
380 not self.matchDomain(request.firstPartyUrl().host()) 392 ) and not self.matchDomain(request.firstPartyUrl().host()):
393 return False
394
395 # check third-party restrictions
396 if self.__hasOption(
397 AdBlockRuleOption.ThirdPartyOption
398 ) and not self.matchThirdParty(request):
399 return False
400
401 # check object restrictions
402 if self.__hasOption(
403 AdBlockRuleOption.ObjectOption
404 ) and not self.matchObject(request):
405 return False
406
407 # check subdocument restrictions
408 if self.__hasOption(
409 AdBlockRuleOption.SubdocumentOption
410 ) and not self.matchSubdocument(request):
411 return False
412
413 # check xmlhttprequest restriction
414 if self.__hasOption(
415 AdBlockRuleOption.XMLHttpRequestOption
416 ) and not self.matchXmlHttpRequest(request):
417 return False
418
419 # check image restriction
420 if self.__hasOption(AdBlockRuleOption.ImageOption) and not self.matchImage(
421 request
381 ): 422 ):
382 return False 423 return False
383 424
384 # check third-party restrictions 425 # check script restriction
385 if ( 426 if self.__hasOption(
386 self.__hasOption(AdBlockRuleOption.ThirdPartyOption) and 427 AdBlockRuleOption.ScriptOption
387 not self.matchThirdParty(request) 428 ) and not self.matchScript(request):
429 return False
430
431 # check stylesheet restriction
432 if self.__hasOption(
433 AdBlockRuleOption.StyleSheetOption
434 ) and not self.matchStyleSheet(request):
435 return False
436
437 # check object-subrequest restriction
438 if self.__hasOption(
439 AdBlockRuleOption.ObjectSubrequestOption
440 ) and not self.matchObjectSubrequest(request):
441 return False
442
443 # check ping restriction
444 if self.__hasOption(AdBlockRuleOption.PingOption) and not self.matchPing(
445 request
388 ): 446 ):
389 return False 447 return False
390 448
391 # check object restrictions 449 # check media restriction
392 if ( 450 if self.__hasOption(AdBlockRuleOption.MediaOption) and not self.matchMedia(
393 self.__hasOption(AdBlockRuleOption.ObjectOption) and 451 request
394 not self.matchObject(request)
395 ): 452 ):
396 return False 453 return False
397 454
398 # check subdocument restrictions 455 # check font restriction
399 if ( 456 if self.__hasOption(AdBlockRuleOption.FontOption) and not self.matchFont(
400 self.__hasOption(AdBlockRuleOption.SubdocumentOption) and 457 request
401 not self.matchSubdocument(request)
402 ): 458 ):
403 return False 459 return False
404 460
405 # check xmlhttprequest restriction
406 if (
407 self.__hasOption(AdBlockRuleOption.XMLHttpRequestOption) and
408 not self.matchXmlHttpRequest(request)
409 ):
410 return False
411
412 # check image restriction
413 if (
414 self.__hasOption(AdBlockRuleOption.ImageOption) and
415 not self.matchImage(request)
416 ):
417 return False
418
419 # check script restriction
420 if (
421 self.__hasOption(AdBlockRuleOption.ScriptOption) and
422 not self.matchScript(request)
423 ):
424 return False
425
426 # check stylesheet restriction
427 if (
428 self.__hasOption(AdBlockRuleOption.StyleSheetOption) and
429 not self.matchStyleSheet(request)
430 ):
431 return False
432
433 # check object-subrequest restriction
434 if (
435 self.__hasOption(AdBlockRuleOption.ObjectSubrequestOption) and
436 not self.matchObjectSubrequest(request)
437 ):
438 return False
439
440 # check ping restriction
441 if (
442 self.__hasOption(AdBlockRuleOption.PingOption) and
443 not self.matchPing(request)
444 ):
445 return False
446
447 # check media restriction
448 if (
449 self.__hasOption(AdBlockRuleOption.MediaOption) and
450 not self.matchMedia(request)
451 ):
452 return False
453
454 # check font restriction
455 if (
456 self.__hasOption(AdBlockRuleOption.FontOption) and
457 not self.matchFont(request)
458 ):
459 return False
460
461 return matched 461 return matched
462 462
463 def urlMatch(self, url): 463 def urlMatch(self, url):
464 """ 464 """
465 Public method to check an URL against the rule. 465 Public method to check an URL against the rule.
466 466
467 @param url URL to check 467 @param url URL to check
468 @type QUrl 468 @type QUrl
469 @return flag indicating a match 469 @return flag indicating a match
470 @rtype bool 470 @rtype bool
471 """ 471 """
472 if ( 472 if not self.__hasOption(
473 not self.__hasOption(AdBlockRuleOption.DocumentOption) and 473 AdBlockRuleOption.DocumentOption
474 not self.__hasOption(AdBlockRuleOption.ElementHideOption) 474 ) and not self.__hasOption(AdBlockRuleOption.ElementHideOption):
475 ):
476 return False 475 return False
477 476
478 encodedUrl = bytes(url.toEncoded()).decode() 477 encodedUrl = bytes(url.toEncoded()).decode()
479 domain = url.host() 478 domain = url.host()
480 return self.__stringMatch(domain, encodedUrl) 479 return self.__stringMatch(domain, encodedUrl)
481 480
482 def __stringMatch(self, domain, encodedUrl): 481 def __stringMatch(self, domain, encodedUrl):
483 """ 482 """
484 Private method to match a domain string. 483 Private method to match a domain string.
485 484
486 @param domain domain to match 485 @param domain domain to match
487 @type str 486 @type str
488 @param encodedUrl URL in encoded form 487 @param encodedUrl URL in encoded form
489 @type str 488 @type str
490 @return flag indicating a match 489 @return flag indicating a match
491 @rtype bool 490 @rtype bool
492 """ 491 """
493 matched = False 492 matched = False
494 493
495 if self.__type == AdBlockRuleType.StringContainsMatchRule: 494 if self.__type == AdBlockRuleType.StringContainsMatchRule:
496 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: 495 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
497 matched = self.__matchString.lower() in encodedUrl.lower() 496 matched = self.__matchString.lower() in encodedUrl.lower()
498 else: 497 else:
499 matched = self.__matchString in encodedUrl 498 matched = self.__matchString in encodedUrl
500 elif self.__type == AdBlockRuleType.DomainMatchRule: 499 elif self.__type == AdBlockRuleType.DomainMatchRule:
501 matched = self.__isMatchingDomain(domain, self.__matchString) 500 matched = self.__isMatchingDomain(domain, self.__matchString)
502 elif self.__type == AdBlockRuleType.StringEndsMatchRule: 501 elif self.__type == AdBlockRuleType.StringEndsMatchRule:
503 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive: 502 if self.__caseSensitivity == Qt.CaseSensitivity.CaseInsensitive:
504 matched = encodedUrl.lower().endswith( 503 matched = encodedUrl.lower().endswith(self.__matchString.lower())
505 self.__matchString.lower())
506 else: 504 else:
507 matched = encodedUrl.endswith(self.__matchString) 505 matched = encodedUrl.endswith(self.__matchString)
508 elif self.__type == AdBlockRuleType.RegExpMatchRule: 506 elif self.__type == AdBlockRuleType.RegExpMatchRule:
509 if not self.__isMatchingRegExpStrings(encodedUrl): 507 if not self.__isMatchingRegExpStrings(encodedUrl):
510 matched = False 508 matched = False
511 else: 509 else:
512 matched = self.__regExp.search(encodedUrl) is not None 510 matched = self.__regExp.search(encodedUrl) is not None
513 elif self.__type == AdBlockRuleType.MatchAllUrlsRule: 511 elif self.__type == AdBlockRuleType.MatchAllUrlsRule:
514 matched = True 512 matched = True
515 513
516 return matched 514 return matched
517 515
518 def matchDomain(self, domain): 516 def matchDomain(self, domain):
519 """ 517 """
520 Public method to match a domain. 518 Public method to match a domain.
521 519
522 @param domain domain name to check 520 @param domain domain name to check
523 @type str 521 @type str
524 @return flag indicating a match 522 @return flag indicating a match
525 @rtype bool 523 @rtype bool
526 """ 524 """
527 if not self.__isEnabled: 525 if not self.__isEnabled:
528 return False 526 return False
529 527
530 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption): 528 if not self.__hasOption(AdBlockRuleOption.DomainRestrictedOption):
531 return True 529 return True
532 530
533 if len(self.__blockedDomains) == 0: 531 if len(self.__blockedDomains) == 0:
534 return any(self.__isMatchingDomain(domain, dom) 532 return any(
535 for dom in self.__allowedDomains) 533 self.__isMatchingDomain(domain, dom) for dom in self.__allowedDomains
534 )
536 elif len(self.__allowedDomains) == 0: 535 elif len(self.__allowedDomains) == 0:
537 return all(not self.__isMatchingDomain(domain, dom) 536 return all(
538 for dom in self.__blockedDomains) 537 not self.__isMatchingDomain(domain, dom)
539 else: 538 for dom in self.__blockedDomains
540 return (
541 all(not self.__isMatchingDomain(domain, dom)
542 for dom in self.__blockedDomains) and
543 any(self.__isMatchingDomain(domain, dom)
544 for dom in self.__allowedDomains)
545 ) 539 )
546 540 else:
541 return all(
542 not self.__isMatchingDomain(domain, dom)
543 for dom in self.__blockedDomains
544 ) and any(
545 self.__isMatchingDomain(domain, dom) for dom in self.__allowedDomains
546 )
547
547 def matchThirdParty(self, req): 548 def matchThirdParty(self, req):
548 """ 549 """
549 Public method to match a third-party rule. 550 Public method to match a third-party rule.
550 551
551 @param req request object to check 552 @param req request object to check
552 @type QWebEngineUrlRequestInfo 553 @type QWebEngineUrlRequestInfo
553 @return flag indicating a match 554 @return flag indicating a match
554 @rtype boolean 555 @rtype boolean
555 """ 556 """
556 # Third-party matching should be performed on second-level domains 557 # Third-party matching should be performed on second-level domains
557 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl()) 558 firstPartyHost = toSecondLevelDomain(req.firstPartyUrl())
558 host = toSecondLevelDomain(req.requestUrl()) 559 host = toSecondLevelDomain(req.requestUrl())
559 560
560 match = firstPartyHost != host 561 match = firstPartyHost != host
561 562
562 if self.__hasException(AdBlockRuleOption.ThirdPartyOption): 563 if self.__hasException(AdBlockRuleOption.ThirdPartyOption):
563 return not match 564 return not match
564 else: 565 else:
565 return match 566 return match
566 567
567 def matchObject(self, req): 568 def matchObject(self, req):
568 """ 569 """
569 Public method to match an object rule. 570 Public method to match an object rule.
570 571
571 @param req request object to check 572 @param req request object to check
572 @type QWebEngineUrlRequestInfo 573 @type QWebEngineUrlRequestInfo
573 @return flag indicating a match 574 @return flag indicating a match
574 @rtype bool 575 @rtype bool
575 """ 576 """
576 match = ( 577 match = (
577 req.resourceType() == 578 req.resourceType()
578 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject) 579 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeObject
579 580 )
581
580 if self.__hasException(AdBlockRuleOption.ObjectOption): 582 if self.__hasException(AdBlockRuleOption.ObjectOption):
581 return not match 583 return not match
582 else: 584 else:
583 return match 585 return match
584 586
585 def matchSubdocument(self, req): 587 def matchSubdocument(self, req):
586 """ 588 """
587 Public method to match a sub-document rule. 589 Public method to match a sub-document rule.
588 590
589 @param req request object to check 591 @param req request object to check
590 @type QWebEngineUrlRequestInfo 592 @type QWebEngineUrlRequestInfo
591 @return flag indicating a match 593 @return flag indicating a match
592 @rtype boolean 594 @rtype boolean
593 """ 595 """
594 match = ( 596 match = (
595 req.resourceType() == 597 req.resourceType()
596 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame) 598 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubFrame
597 599 )
600
598 if self.__hasException(AdBlockRuleOption.SubdocumentOption): 601 if self.__hasException(AdBlockRuleOption.SubdocumentOption):
599 return not match 602 return not match
600 else: 603 else:
601 return match 604 return match
602 605
603 def matchXmlHttpRequest(self, req): 606 def matchXmlHttpRequest(self, req):
604 """ 607 """
605 Public method to match a XmlHttpRequest rule. 608 Public method to match a XmlHttpRequest rule.
606 609
607 @param req request object to check 610 @param req request object to check
608 @type QWebEngineUrlRequestInfo 611 @type QWebEngineUrlRequestInfo
609 @return flag indicating a match 612 @return flag indicating a match
610 @rtype bool 613 @rtype bool
611 """ 614 """
612 match = ( 615 match = (
613 req.resourceType() == 616 req.resourceType() == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr
614 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeXhr) 617 )
615 618
616 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption): 619 if self.__hasException(AdBlockRuleOption.XMLHttpRequestOption):
617 return not match 620 return not match
618 else: 621 else:
619 return match 622 return match
620 623
621 def matchImage(self, req): 624 def matchImage(self, req):
622 """ 625 """
623 Public method to match an Image rule. 626 Public method to match an Image rule.
624 627
625 @param req request object to check 628 @param req request object to check
626 @type QWebEngineUrlRequestInfo 629 @type QWebEngineUrlRequestInfo
627 @return flag indicating a match 630 @return flag indicating a match
628 @rtype bool 631 @rtype bool
629 """ 632 """
630 match = ( 633 match = (
631 req.resourceType() == 634 req.resourceType()
632 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage) 635 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeImage
633 636 )
637
634 if self.__hasException(AdBlockRuleOption.ImageOption): 638 if self.__hasException(AdBlockRuleOption.ImageOption):
635 return not match 639 return not match
636 else: 640 else:
637 return match 641 return match
638 642
639 def matchScript(self, req): 643 def matchScript(self, req):
640 """ 644 """
641 Public method to match a Script rule. 645 Public method to match a Script rule.
642 646
643 @param req request object to check 647 @param req request object to check
644 @type QWebEngineUrlRequestInfo 648 @type QWebEngineUrlRequestInfo
645 @return flag indicating a match 649 @return flag indicating a match
646 @rtype bool 650 @rtype bool
647 """ 651 """
648 match = ( 652 match = (
649 req.resourceType() == 653 req.resourceType()
650 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript) 654 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeScript
651 655 )
656
652 if self.__hasException(AdBlockRuleOption.ScriptOption): 657 if self.__hasException(AdBlockRuleOption.ScriptOption):
653 return not match 658 return not match
654 else: 659 else:
655 return match 660 return match
656 661
657 def matchStyleSheet(self, req): 662 def matchStyleSheet(self, req):
658 """ 663 """
659 Public method to match a StyleSheet rule. 664 Public method to match a StyleSheet rule.
660 665
661 @param req request object to check 666 @param req request object to check
662 @type QWebEngineUrlRequestInfo 667 @type QWebEngineUrlRequestInfo
663 @return flag indicating a match 668 @return flag indicating a match
664 @rtype bool 669 @rtype bool
665 """ 670 """
666 match = ( 671 match = (
667 req.resourceType() == 672 req.resourceType()
668 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet) 673 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeStylesheet
669 674 )
675
670 if self.__hasException(AdBlockRuleOption.StyleSheetOption): 676 if self.__hasException(AdBlockRuleOption.StyleSheetOption):
671 return not match 677 return not match
672 else: 678 else:
673 return match 679 return match
674 680
675 def matchObjectSubrequest(self, req): 681 def matchObjectSubrequest(self, req):
676 """ 682 """
677 Public method to match an Object Subrequest rule. 683 Public method to match an Object Subrequest rule.
678 684
679 @param req request object to check 685 @param req request object to check
680 @type QWebEngineUrlRequestInfo 686 @type QWebEngineUrlRequestInfo
681 @return flag indicating a match 687 @return flag indicating a match
682 @rtype boolean 688 @rtype boolean
683 """ 689 """
684 match = ( 690 match = (
685 req.resourceType() == 691 req.resourceType()
686 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource 692 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeSubResource
687 ) 693 )
688 match = match or ( 694 match = match or (
689 req.resourceType() == 695 req.resourceType()
690 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource 696 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypePluginResource
691 ) 697 )
692 698
693 if self.__objectSubrequestException: 699 if self.__objectSubrequestException:
694 return not match 700 return not match
695 else: 701 else:
696 return match 702 return match
697 703
698 def matchPing(self, req): 704 def matchPing(self, req):
699 """ 705 """
700 Public method to match a Ping rule. 706 Public method to match a Ping rule.
701 707
702 @param req request object to check 708 @param req request object to check
703 @type QWebEngineUrlRequestInfo 709 @type QWebEngineUrlRequestInfo
704 @return flag indicating a match 710 @return flag indicating a match
705 @rtype bool 711 @rtype bool
706 """ 712 """
707 match = ( 713 match = (
708 req.resourceType() == 714 req.resourceType() == QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing
709 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePing) 715 )
710 716
711 if self.__hasException(AdBlockRuleOption.PingOption): 717 if self.__hasException(AdBlockRuleOption.PingOption):
712 return not match 718 return not match
713 else: 719 else:
714 return match 720 return match
715 721
716 def matchMedia(self, req): 722 def matchMedia(self, req):
717 """ 723 """
718 Public method to match a Media rule. 724 Public method to match a Media rule.
719 725
720 @param req request object to check 726 @param req request object to check
721 @type QWebEngineUrlRequestInfo 727 @type QWebEngineUrlRequestInfo
722 @return flag indicating a match 728 @return flag indicating a match
723 @rtype bool 729 @rtype bool
724 """ 730 """
725 match = ( 731 match = (
726 req.resourceType() == 732 req.resourceType()
727 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia) 733 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeMedia
728 734 )
735
729 if self.__hasException(AdBlockRuleOption.MediaOption): 736 if self.__hasException(AdBlockRuleOption.MediaOption):
730 return not match 737 return not match
731 else: 738 else:
732 return match 739 return match
733 740
734 def matchFont(self, req): 741 def matchFont(self, req):
735 """ 742 """
736 Public method to match a Font rule. 743 Public method to match a Font rule.
737 744
738 @param req request object to check 745 @param req request object to check
739 @type QWebEngineUrlRequestInfo 746 @type QWebEngineUrlRequestInfo
740 @return flag indicating a match 747 @return flag indicating a match
741 @rtype bool 748 @rtype bool
742 """ 749 """
743 match = ( 750 match = (
744 req.resourceType() == 751 req.resourceType()
745 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource) 752 == QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFontResource
746 753 )
754
747 if self.__hasException(AdBlockRuleOption.FontOption): 755 if self.__hasException(AdBlockRuleOption.FontOption):
748 return not match 756 return not match
749 else: 757 else:
750 return match 758 return match
751 759
752 def matchOther(self, req): 760 def matchOther(self, req):
753 """ 761 """
754 Public method to match any other rule. 762 Public method to match any other rule.
755 763
756 @param req request object to check 764 @param req request object to check
757 @type QWebEngineUrlRequestInfo 765 @type QWebEngineUrlRequestInfo
758 @return flag indicating a match 766 @return flag indicating a match
759 @rtype bool 767 @rtype bool
760 """ 768 """
765 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker, 773 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeServiceWorker,
766 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch, 774 QWebEngineUrlRequestInfo.ResourceType.ResourceTypePrefetch,
767 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon, 775 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeFavicon,
768 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown, 776 QWebEngineUrlRequestInfo.ResourceType.ResourceTypeUnknown,
769 ] 777 ]
770 778
771 if self.__hasException(AdBlockRuleOption.OtherOption): 779 if self.__hasException(AdBlockRuleOption.OtherOption):
772 return not match 780 return not match
773 else: 781 else:
774 return match 782 return match
775 783
776 def isException(self): 784 def isException(self):
777 """ 785 """
778 Public method to check, if the rule defines an exception. 786 Public method to check, if the rule defines an exception.
779 787
780 @return flag indicating an exception 788 @return flag indicating an exception
781 @rtype bool 789 @rtype bool
782 """ 790 """
783 return self.__isException 791 return self.__isException
784 792
785 def setException(self, exception): 793 def setException(self, exception):
786 """ 794 """
787 Public method to set the rule's exception flag. 795 Public method to set the rule's exception flag.
788 796
789 @param exception flag indicating an exception rule 797 @param exception flag indicating an exception rule
790 @type bool 798 @type bool
791 """ 799 """
792 self.__isException = exception 800 self.__isException = exception
793 801
794 def isEnabled(self): 802 def isEnabled(self):
795 """ 803 """
796 Public method to check, if the rule is enabled. 804 Public method to check, if the rule is enabled.
797 805
798 @return flag indicating enabled state 806 @return flag indicating enabled state
799 @rtype bool 807 @rtype bool
800 """ 808 """
801 return self.__isEnabled 809 return self.__isEnabled
802 810
803 def setEnabled(self, enabled): 811 def setEnabled(self, enabled):
804 """ 812 """
805 Public method to set the rule's enabled state. 813 Public method to set the rule's enabled state.
806 814
807 @param enabled flag indicating the new enabled state 815 @param enabled flag indicating the new enabled state
808 @type bool 816 @type bool
809 """ 817 """
810 self.__isEnabled = enabled 818 self.__isEnabled = enabled
811 819
812 def isCSSRule(self): 820 def isCSSRule(self):
813 """ 821 """
814 Public method to check, if the rule is a CSS rule. 822 Public method to check, if the rule is a CSS rule.
815 823
816 @return flag indicating a CSS rule 824 @return flag indicating a CSS rule
817 @rtype bool 825 @rtype bool
818 """ 826 """
819 return self.__type == AdBlockRuleType.CssRule 827 return self.__type == AdBlockRuleType.CssRule
820 828
821 def cssSelector(self): 829 def cssSelector(self):
822 """ 830 """
823 Public method to get the CSS selector of the rule. 831 Public method to get the CSS selector of the rule.
824 832
825 @return CSS selector 833 @return CSS selector
826 @rtype str 834 @rtype str
827 """ 835 """
828 return self.__matchString 836 return self.__matchString
829 837
830 def isDocument(self): 838 def isDocument(self):
831 """ 839 """
832 Public method to check, if this is a document rule. 840 Public method to check, if this is a document rule.
833 841
834 @return flag indicating a document rule 842 @return flag indicating a document rule
835 @rtype bool 843 @rtype bool
836 """ 844 """
837 return self.__hasOption(AdBlockRuleOption.DocumentOption) 845 return self.__hasOption(AdBlockRuleOption.DocumentOption)
838 846
839 def isElementHiding(self): 847 def isElementHiding(self):
840 """ 848 """
841 Public method to check, if this is an element hiding rule. 849 Public method to check, if this is an element hiding rule.
842 850
843 @return flag indicating an element hiding rule 851 @return flag indicating an element hiding rule
844 @rtype bool 852 @rtype bool
845 """ 853 """
846 return self.__hasOption(AdBlockRuleOption.ElementHideOption) 854 return self.__hasOption(AdBlockRuleOption.ElementHideOption)
847 855
848 def isDomainRestricted(self): 856 def isDomainRestricted(self):
849 """ 857 """
850 Public method to check, if this rule is restricted by domain. 858 Public method to check, if this rule is restricted by domain.
851 859
852 @return flag indicating a domain restriction 860 @return flag indicating a domain restriction
853 @rtype bool 861 @rtype bool
854 """ 862 """
855 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption) 863 return self.__hasOption(AdBlockRuleOption.DomainRestrictedOption)
856 864
857 def isComment(self): 865 def isComment(self):
858 """ 866 """
859 Public method to check, if this is a comment. 867 Public method to check, if this is a comment.
860 868
861 @return flag indicating a comment 869 @return flag indicating a comment
862 @rtype bool 870 @rtype bool
863 """ 871 """
864 return self.__filter.startswith("!") 872 return self.__filter.startswith("!")
865 873
866 def isHeader(self): 874 def isHeader(self):
867 """ 875 """
868 Public method to check, if this is a header. 876 Public method to check, if this is a header.
869 877
870 @return flag indicating a header 878 @return flag indicating a header
871 @rtype bool 879 @rtype bool
872 """ 880 """
873 return self.__filter.startswith("[Adblock") 881 return self.__filter.startswith("[Adblock")
874 882
875 def isSlow(self): 883 def isSlow(self):
876 """ 884 """
877 Public method to check, if this is a slow rule. 885 Public method to check, if this is a slow rule.
878 886
879 @return flag indicating a slow rule 887 @return flag indicating a slow rule
880 @rtype bool 888 @rtype bool
881 """ 889 """
882 return self.__regExp is not None 890 return self.__regExp is not None
883 891
884 def isInternalDisabled(self): 892 def isInternalDisabled(self):
885 """ 893 """
886 Public method to check, if this rule was disabled internally. 894 Public method to check, if this rule was disabled internally.
887 895
888 @return flag indicating an internally disabled rule 896 @return flag indicating an internally disabled rule
889 @rtype bool 897 @rtype bool
890 """ 898 """
891 return self.__isInternalDisabled 899 return self.__isInternalDisabled
892 900
893 def __convertPatternToRegExp(self, wildcardPattern): 901 def __convertPatternToRegExp(self, wildcardPattern):
894 """ 902 """
895 Private method to convert a wildcard pattern to a regular expression. 903 Private method to convert a wildcard pattern to a regular expression.
896 904
897 @param wildcardPattern string containing the wildcard pattern 905 @param wildcardPattern string containing the wildcard pattern
898 @type str 906 @type str
899 @return string containing a regular expression 907 @return string containing a regular expression
900 @rtype string 908 @rtype string
901 """ 909 """
902 pattern = wildcardPattern 910 pattern = wildcardPattern
903 911
904 # remove multiple wildcards 912 # remove multiple wildcards
905 pattern = re.sub(r"\*+", "*", pattern) 913 pattern = re.sub(r"\*+", "*", pattern)
906 # remove anchors following separator placeholder 914 # remove anchors following separator placeholder
907 pattern = re.sub(r"\^\|$", "^", pattern) 915 pattern = re.sub(r"\^\|$", "^", pattern)
908 # remove leading wildcards 916 # remove leading wildcards
910 # remove trailing wildcards 918 # remove trailing wildcards
911 pattern = re.sub(r"(\*)$", "", pattern) 919 pattern = re.sub(r"(\*)$", "", pattern)
912 # escape special symbols 920 # escape special symbols
913 pattern = re.sub(r"(\W)", r"\\\1", pattern) 921 pattern = re.sub(r"(\W)", r"\\\1", pattern)
914 # process extended anchor at expression start 922 # process extended anchor at expression start
915 pattern = re.sub( 923 pattern = re.sub(r"^\\\|\\\|", r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
916 r"^\\\|\\\|",
917 r"^[\\w\-]+:\/+(?!\/)(?:[^\/]+\.)?", pattern)
918 # process separator placeholders 924 # process separator placeholders
919 pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern) 925 pattern = re.sub(r"\\\^", r"(?:[^\\w\\d\-.%]|$)", pattern)
920 # process anchor at expression start 926 # process anchor at expression start
921 pattern = re.sub(r"^\\\|", "^", pattern) 927 pattern = re.sub(r"^\\\|", "^", pattern)
922 # process anchor at expression end 928 # process anchor at expression end
923 pattern = re.sub(r"\\\|$", "$", pattern) 929 pattern = re.sub(r"\\\|$", "$", pattern)
924 # replace wildcards by .* 930 # replace wildcards by .*
925 pattern = re.sub(r"\\\*", ".*", pattern) 931 pattern = re.sub(r"\\\*", ".*", pattern)
926 932
927 return pattern 933 return pattern
928 934
929 def __hasOption(self, opt): 935 def __hasOption(self, opt):
930 """ 936 """
931 Private method to check, if the given option has been set. 937 Private method to check, if the given option has been set.
932 938
933 @param opt option to check for 939 @param opt option to check for
934 @type AdBlockRuleOption 940 @type AdBlockRuleOption
935 @return flag indicating the state of the option 941 @return flag indicating the state of the option
936 @rtype bool 942 @rtype bool
937 """ 943 """
938 return bool(self.__options & opt) 944 return bool(self.__options & opt)
939 945
940 def setOption(self, opt): 946 def setOption(self, opt):
941 """ 947 """
942 Public method to set the given option. 948 Public method to set the given option.
943 949
944 @param opt option to be set 950 @param opt option to be set
945 @type AdBlockRuleOption 951 @type AdBlockRuleOption
946 """ 952 """
947 self.__options |= opt 953 self.__options |= opt
948 954
949 def __hasException(self, opt): 955 def __hasException(self, opt):
950 """ 956 """
951 Private method to check, if the given option has been set as an 957 Private method to check, if the given option has been set as an
952 exception. 958 exception.
953 959
954 @param opt option to check for 960 @param opt option to check for
955 @type AdBlockRuleOption 961 @type AdBlockRuleOption
956 @return flag indicating the exception state of the option 962 @return flag indicating the exception state of the option
957 @rtype bool 963 @rtype bool
958 """ 964 """
959 return bool(self.__exceptions & opt) 965 return bool(self.__exceptions & opt)
960 966
961 def __setException(self, opt, on): 967 def __setException(self, opt, on):
962 """ 968 """
963 Private method to set the given option as an exception. 969 Private method to set the given option as an exception.
964 970
965 @param opt option to be set 971 @param opt option to be set
966 @type AdBlockRuleOption 972 @type AdBlockRuleOption
967 @param on flag indicating to set or unset the exception 973 @param on flag indicating to set or unset the exception
968 @type bool 974 @type bool
969 """ 975 """
970 if on: 976 if on:
971 self.__exceptions |= opt 977 self.__exceptions |= opt
972 else: 978 else:
973 self.__exceptions &= ~opt 979 self.__exceptions &= ~opt
974 980
975 def __filterIsOnlyDomain(self, filterString): 981 def __filterIsOnlyDomain(self, filterString):
976 """ 982 """
977 Private method to check, if the given filter is a domain only filter. 983 Private method to check, if the given filter is a domain only filter.
978 984
979 @param filterString filter string to be checked 985 @param filterString filter string to be checked
980 @type str 986 @type str
981 @return flag indicating a domain only filter 987 @return flag indicating a domain only filter
982 @rtype bool 988 @rtype bool
983 """ 989 """
984 if not filterString.endswith("^") or not filterString.startswith("||"): 990 if not filterString.endswith("^") or not filterString.startswith("||"):
985 return False 991 return False
986 992
987 return all(filterChar not in ["/", ":", "?", "=", "&", "*"] 993 return all(
988 for filterChar in filterString) 994 filterChar not in ["/", ":", "?", "=", "&", "*"]
989 995 for filterChar in filterString
996 )
997
990 def __filterIsOnlyEndsMatch(self, filterString): 998 def __filterIsOnlyEndsMatch(self, filterString):
991 """ 999 """
992 Private method to check, if the given filter is to match against the 1000 Private method to check, if the given filter is to match against the
993 end of a string. 1001 end of a string.
994 1002
995 @param filterString filter string to be checked 1003 @param filterString filter string to be checked
996 @type str 1004 @type str
997 @return flag indicating a end of string match filter 1005 @return flag indicating a end of string match filter
998 @rtype bool 1006 @rtype bool
999 """ 1007 """
1001 # __IGNORE_WARNING_Y111__ 1009 # __IGNORE_WARNING_Y111__
1002 if filterChar in ["^", "*"]: 1010 if filterChar in ["^", "*"]:
1003 return False 1011 return False
1004 elif filterChar == "|": 1012 elif filterChar == "|":
1005 return index == len(filterString) - 1 1013 return index == len(filterString) - 1
1006 1014
1007 return False 1015 return False
1008 1016
1009 def __isMatchingDomain(self, domain, filterString): 1017 def __isMatchingDomain(self, domain, filterString):
1010 """ 1018 """
1011 Private method to check, if a given domain matches the given filter 1019 Private method to check, if a given domain matches the given filter
1012 string. 1020 string.
1013 1021
1014 @param domain domain to be checked 1022 @param domain domain to be checked
1015 @type str 1023 @type str
1016 @param filterString filter string to check against 1024 @param filterString filter string to check against
1017 @type str 1025 @type str
1018 @return flag indicating a match 1026 @return flag indicating a match
1019 @rtype bool 1027 @rtype bool
1020 """ 1028 """
1021 if filterString == domain: 1029 if filterString == domain:
1022 return True 1030 return True
1023 1031
1024 if not domain.endswith(filterString): 1032 if not domain.endswith(filterString):
1025 return False 1033 return False
1026 1034
1027 index = domain.find(filterString) 1035 index = domain.find(filterString)
1028 1036
1029 return bool(index > 0 and domain[index - 1] == ".") 1037 return bool(index > 0 and domain[index - 1] == ".")
1030 1038
1031 def __isMatchingRegExpStrings(self, url): 1039 def __isMatchingRegExpStrings(self, url):
1032 """ 1040 """
1033 Private method to check the given URL against the fixed parts of 1041 Private method to check the given URL against the fixed parts of
1034 the regexp. 1042 the regexp.
1035 1043
1036 @param url URL to be checked 1044 @param url URL to be checked
1037 @type str 1045 @type str
1038 @return flag indicating a match 1046 @return flag indicating a match
1039 @rtype bool 1047 @rtype bool
1040 """ 1048 """
1041 if self.__regExp is not None: 1049 if self.__regExp is not None:
1042 return all(matcher in url for matcher in self.__stringMatchers) 1050 return all(matcher in url for matcher in self.__stringMatchers)
1043 1051
1044 return True 1052 return True
1045 1053
1046 def __parseRegExpFilter(self, filterString): 1054 def __parseRegExpFilter(self, filterString):
1047 """ 1055 """
1048 Private method to split the given regular expression into strings that 1056 Private method to split the given regular expression into strings that
1049 can be used with 'in'. 1057 can be used with 'in'.
1050 1058
1051 @param filterString regexp filter string to be parsed 1059 @param filterString regexp filter string to be parsed
1052 @type str 1060 @type str
1053 @return fixed string parts of the filter 1061 @return fixed string parts of the filter
1054 @rtype list of str 1062 @rtype list of str
1055 """ 1063 """
1056 matchers = [] 1064 matchers = []
1057 1065
1058 startPos = -1 1066 startPos = -1
1059 for index in range(len(filterString)): 1067 for index in range(len(filterString)):
1060 filterChar = filterString[index] 1068 filterChar = filterString[index]
1061 if filterChar in ["|", "*", "^"]: 1069 if filterChar in ["|", "*", "^"]:
1062 sub = filterString[startPos:index] 1070 sub = filterString[startPos:index]
1063 if len(sub) > 1: 1071 if len(sub) > 1:
1064 matchers.append(sub) 1072 matchers.append(sub)
1065 startPos = index + 1 1073 startPos = index + 1
1066 1074
1067 sub = filterString[startPos:] 1075 sub = filterString[startPos:]
1068 if len(sub) > 1: 1076 if len(sub) > 1:
1069 matchers.append(sub) 1077 matchers.append(sub)
1070 1078
1071 return list(set(matchers)) 1079 return list(set(matchers))
1072 1080
1073 def ruleType(self): 1081 def ruleType(self):
1074 """ 1082 """
1075 Public method to get the rule type. 1083 Public method to get the rule type.
1076 1084
1077 @return rule type 1085 @return rule type
1078 @rtype AdBlockRuleType 1086 @rtype AdBlockRuleType
1079 """ 1087 """
1080 return self.__type 1088 return self.__type
1081 1089
1082 def ruleOptions(self): 1090 def ruleOptions(self):
1083 """ 1091 """
1084 Public method to get the rule options. 1092 Public method to get the rule options.
1085 1093
1086 @return rule options 1094 @return rule options
1087 @rtype AdBlockRuleOption 1095 @rtype AdBlockRuleOption
1088 """ 1096 """
1089 return self.__options 1097 return self.__options
1090 1098
1091 def ruleExceptions(self): 1099 def ruleExceptions(self):
1092 """ 1100 """
1093 Public method to get the rule exceptions. 1101 Public method to get the rule exceptions.
1094 1102
1095 @return rule exceptions 1103 @return rule exceptions
1096 @rtype AdBlockRuleOption 1104 @rtype AdBlockRuleOption
1097 """ 1105 """
1098 return self.__exceptions 1106 return self.__exceptions
1099 1107
1100 def matchString(self): 1108 def matchString(self):
1101 """ 1109 """
1102 Public method to get the match string. 1110 Public method to get the match string.
1103 1111
1104 @return match string 1112 @return match string
1105 @rtype str 1113 @rtype str
1106 """ 1114 """
1107 return self.__matchString 1115 return self.__matchString
1108 1116
1109 def caseSensitivity(self): 1117 def caseSensitivity(self):
1110 """ 1118 """
1111 Public method to get the case sensitivity. 1119 Public method to get the case sensitivity.
1112 1120
1113 @return case sensitivity 1121 @return case sensitivity
1114 @rtype Qt.CaseSensitivity 1122 @rtype Qt.CaseSensitivity
1115 """ 1123 """
1116 return self.__caseSensitivity 1124 return self.__caseSensitivity
1117 1125
1118 def allowedDomains(self): 1126 def allowedDomains(self):
1119 """ 1127 """
1120 Public method to get a copy of the list of allowed domains. 1128 Public method to get a copy of the list of allowed domains.
1121 1129
1122 @return list of allowed domains 1130 @return list of allowed domains
1123 @rtype list of str 1131 @rtype list of str
1124 """ 1132 """
1125 return self.__allowedDomains[:] 1133 return self.__allowedDomains[:]
1126 1134
1127 def blockedDomains(self): 1135 def blockedDomains(self):
1128 """ 1136 """
1129 Public method to get a copy of the list of blocked domains. 1137 Public method to get a copy of the list of blocked domains.
1130 1138
1131 @return list of blocked domains 1139 @return list of blocked domains
1132 @rtype list of str 1140 @rtype list of str
1133 """ 1141 """
1134 return self.__blockedDomains[:] 1142 return self.__blockedDomains[:]
1135 1143
1136 def addBlockedDomains(self, domains): 1144 def addBlockedDomains(self, domains):
1137 """ 1145 """
1138 Public method to add to the list of blocked domains. 1146 Public method to add to the list of blocked domains.
1139 1147
1140 @param domains list of domains to be added 1148 @param domains list of domains to be added
1141 @type str or list of str 1149 @type str or list of str
1142 """ 1150 """
1143 if isinstance(domains, list): 1151 if isinstance(domains, list):
1144 self.__blockedDomains.extend(domains) 1152 self.__blockedDomains.extend(domains)
1145 else: 1153 else:
1146 self.__blockedDomains.append(domains) 1154 self.__blockedDomains.append(domains)
1147 1155
1148 def getRegExpAndMatchers(self): 1156 def getRegExpAndMatchers(self):
1149 """ 1157 """
1150 Public method to get the regular expression and associated string 1158 Public method to get the regular expression and associated string
1151 matchers. 1159 matchers.
1152 1160
1153 @return tuple containing the regular expression and the list of 1161 @return tuple containing the regular expression and the list of
1154 string matchers 1162 string matchers
1155 @rtype tuple of (re.Pattern, list of str) 1163 @rtype tuple of (re.Pattern, list of str)
1156 """ 1164 """
1157 if self.__regExp is not None: 1165 if self.__regExp is not None:
1158 return (re.compile(self.__regExp.pattern), 1166 return (re.compile(self.__regExp.pattern), self.__stringMatchers[:])
1159 self.__stringMatchers[:])
1160 else: 1167 else:
1161 return (None, []) 1168 return (None, [])
1162 1169
1163 def copyFrom(self, other): 1170 def copyFrom(self, other):
1164 """ 1171 """
1165 Public method to copy another AdBlock rule. 1172 Public method to copy another AdBlock rule.
1166 1173
1167 @param other reference to the AdBlock rule to copy from 1174 @param other reference to the AdBlock rule to copy from
1168 @type AdBlockRule 1175 @type AdBlockRule
1169 """ 1176 """
1170 self.__subscription = other.subscription() 1177 self.__subscription = other.subscription()
1171 self.__type = other.ruleType() 1178 self.__type = other.ruleType()

eric ide

mercurial