|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2017 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock matcher. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 from PyQt5.QtCore import QObject |
|
13 |
|
14 from .AdBlockSearchTree import AdBlockSearchTree |
|
15 from .AdBlockRule import AdBlockRule, AdBlockRuleOption |
|
16 |
|
17 |
|
18 class AdBlockMatcher(QObject): |
|
19 """ |
|
20 Class implementing the AdBlock matcher. |
|
21 """ |
|
22 def __init__(self, manager): |
|
23 """ |
|
24 Constructor |
|
25 |
|
26 @param manager reference to the AdBlock manager object |
|
27 @type AdBlockManager |
|
28 """ |
|
29 super(AdBlockMatcher, self).__init__(manager) |
|
30 |
|
31 self.__manager = manager |
|
32 |
|
33 self.__createdRules = [] |
|
34 self.__networkExceptionRules = [] |
|
35 self.__networkBlockRules = [] |
|
36 self.__domainRestrictedCssRules = [] |
|
37 self.__documentRules = [] |
|
38 self.__elemhideRules = [] |
|
39 |
|
40 self.__elementHidingRules = "" |
|
41 self.__networkBlockTree = AdBlockSearchTree() |
|
42 self.__networkExceptionTree = AdBlockSearchTree() |
|
43 |
|
44 def match(self, request, urlDomain, urlString): |
|
45 """ |
|
46 Public method to match a request. |
|
47 |
|
48 @param request URL request to be matched |
|
49 @type QWebEngineUrlRequestInfo |
|
50 @param urlDomain domain of the URL |
|
51 @type str |
|
52 @param urlString requested URL as a lowercase string |
|
53 @type str |
|
54 @return reference to the matched rule |
|
55 @rtype AdBlockRule |
|
56 """ |
|
57 # exception rules |
|
58 if self.__networkExceptionTree.find(request, urlDomain, urlString): |
|
59 return None |
|
60 |
|
61 for rule in self.__networkExceptionRules: |
|
62 if rule.networkMatch(request, urlDomain, urlString): |
|
63 return None |
|
64 |
|
65 # block rules |
|
66 rule = self.__networkBlockTree.find(request, urlDomain, urlString) |
|
67 if rule: |
|
68 return rule |
|
69 |
|
70 for rule in self.__networkBlockRules: |
|
71 if rule.networkMatch(request, urlDomain, urlString): |
|
72 return rule |
|
73 |
|
74 return None |
|
75 |
|
76 def adBlockDisabledForUrl(self, url): |
|
77 """ |
|
78 Public method to check, if AdBlock is disabled for the given URL. |
|
79 |
|
80 @param url URL to check |
|
81 @type QUrl |
|
82 @return flag indicating disabled state |
|
83 @rtype bool |
|
84 """ |
|
85 for rule in self.__documentRules: |
|
86 if rule.urlMatch(url): |
|
87 return True |
|
88 |
|
89 return False |
|
90 |
|
91 def elemHideDisabledForUrl(self, url): |
|
92 """ |
|
93 Public method to check, if element hiding is disabled for the given |
|
94 URL. |
|
95 |
|
96 @param url URL to check |
|
97 @type QUrl |
|
98 @return flag indicating disabled state |
|
99 @rtype bool |
|
100 """ |
|
101 if self.adBlockDisabledForUrl(url): |
|
102 return True |
|
103 |
|
104 for rule in self.__elemhideRules: |
|
105 if rule.urlMatch(url): |
|
106 return True |
|
107 |
|
108 return False |
|
109 |
|
110 def elementHidingRules(self): |
|
111 """ |
|
112 Public method to get the element hiding rules. |
|
113 |
|
114 @return element hiding rules |
|
115 @rtype str |
|
116 """ |
|
117 return self.__elementHidingRules |
|
118 |
|
119 def elementHidingRulesForDomain(self, domain): |
|
120 """ |
|
121 Public method to get the element hiding rules for the given domain. |
|
122 |
|
123 @param domain domain name |
|
124 @type str |
|
125 @return element hiding rules |
|
126 @rtype str |
|
127 """ |
|
128 rules = "" |
|
129 addedRulesCount = 0 |
|
130 |
|
131 for rule in self.__domainRestrictedCssRules: |
|
132 if not rule.matchDomain(domain): |
|
133 continue |
|
134 |
|
135 if addedRulesCount == 1000: |
|
136 rules += rule.cssSelector() |
|
137 rules += "{display:none !important;}\n" |
|
138 addedRulesCount = 0 |
|
139 else: |
|
140 rules += rule.cssSelector() + "," |
|
141 addedRulesCount += 1 |
|
142 |
|
143 if addedRulesCount != 0: |
|
144 rules = rules[:-1] |
|
145 rules += "{display:none !important;}\n" |
|
146 |
|
147 return rules |
|
148 |
|
149 def update(self): |
|
150 """ |
|
151 Public slot to update the internal state. |
|
152 """ |
|
153 self.clear() |
|
154 |
|
155 cssRulesDict = {} |
|
156 exceptionCssRules = [] |
|
157 |
|
158 for subscription in self.__manager.subscriptions(): |
|
159 if subscription.isEnabled(): |
|
160 for rule in subscription.allRules(): |
|
161 # Don't add internally disabled rules to the cache |
|
162 if rule.isInternalDisabled(): |
|
163 continue |
|
164 |
|
165 if rule.isCSSRule(): |
|
166 # Only enabled CSS rules are added to the cache because |
|
167 # there is no enabled/disabled check on match. They are |
|
168 # directly embedded to pages. |
|
169 if not rule.isEnabled(): |
|
170 continue |
|
171 |
|
172 if rule.isException(): |
|
173 exceptionCssRules.append(rule) |
|
174 else: |
|
175 cssRulesDict[rule.cssSelector()] = rule |
|
176 elif rule.isDocument(): |
|
177 self.__documentRules.append(rule) |
|
178 elif rule.isElementHiding(): |
|
179 self.__elemhideRules.append(rule) |
|
180 elif rule.isException(): |
|
181 if not self.__networkExceptionTree.add(rule): |
|
182 self.__networkBlockRules.append(rule) |
|
183 else: |
|
184 if not self.__networkBlockTree.add(rule): |
|
185 self.__networkBlockRules.append(rule) |
|
186 |
|
187 for rule in exceptionCssRules: |
|
188 try: |
|
189 originalRule = cssRulesDict[rule.cssSelector()] |
|
190 except KeyError: |
|
191 # If there is no such selector, the exception does nothing. |
|
192 continue |
|
193 |
|
194 copiedRule = AdBlockRule() |
|
195 copiedRule.copyFrom(originalRule) |
|
196 copiedRule.setOption(AdBlockRuleOption.DomainRestrictedOption) |
|
197 copiedRule.addBlockedDomains(rule.allowedDomains()) |
|
198 |
|
199 cssRulesDict[rule.cssSelector()] = copiedRule |
|
200 self.__createdRules.append(copiedRule) |
|
201 |
|
202 # Excessive amount of selectors for one CSS rule is not what the |
|
203 # rendering engine likes. So split them up by 1.000 selectors. |
|
204 hidingRulesCount = 0 |
|
205 for key in cssRulesDict: |
|
206 rule = cssRulesDict[key] |
|
207 |
|
208 if rule.isDomainRestricted(): |
|
209 self.__domainRestrictedCssRules.append(rule) |
|
210 elif hidingRulesCount == 1000: |
|
211 self.__elementHidingRules += rule.cssSelector() |
|
212 self.__elementHidingRules += "{display:none !important;} " |
|
213 hidingRulesCount = 0 |
|
214 else: |
|
215 self.__elementHidingRules += rule.cssSelector() + "," |
|
216 hidingRulesCount += 1 |
|
217 |
|
218 if hidingRulesCount != 0: |
|
219 self.__elementHidingRules = self.__elementHidingRules[:-1] |
|
220 self.__elementHidingRules += "{display:none !important;} " |
|
221 |
|
222 def clear(self): |
|
223 """ |
|
224 Public slot to clear the internal structures. |
|
225 """ |
|
226 self.__createdRules = [] |
|
227 self.__networkExceptionRules = [] |
|
228 self.__networkBlockRules = [] |
|
229 self.__domainRestrictedCssRules = [] |
|
230 self.__documentRules = [] |
|
231 self.__elemhideRules = [] |
|
232 |
|
233 self.__elementHidingRules = "" |
|
234 self.__networkBlockTree.clear() |
|
235 self.__networkExceptionTree.clear() |