|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2017 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock matcher. |
|
8 """ |
|
9 |
|
10 from PyQt5.QtCore import QObject |
|
11 |
|
12 from .AdBlockSearchTree import AdBlockSearchTree |
|
13 from .AdBlockRule import AdBlockRule, AdBlockRuleOption |
|
14 |
|
15 |
|
16 class AdBlockMatcher(QObject): |
|
17 """ |
|
18 Class implementing the AdBlock matcher. |
|
19 """ |
|
20 def __init__(self, manager): |
|
21 """ |
|
22 Constructor |
|
23 |
|
24 @param manager reference to the AdBlock manager object |
|
25 @type AdBlockManager |
|
26 """ |
|
27 super().__init__(manager) |
|
28 |
|
29 self.__manager = manager |
|
30 |
|
31 self.__createdRules = [] |
|
32 self.__networkExceptionRules = [] |
|
33 self.__networkBlockRules = [] |
|
34 self.__domainRestrictedCssRules = [] |
|
35 self.__documentRules = [] |
|
36 self.__elemhideRules = [] |
|
37 |
|
38 self.__elementHidingRules = "" |
|
39 self.__networkBlockTree = AdBlockSearchTree() |
|
40 self.__networkExceptionTree = AdBlockSearchTree() |
|
41 |
|
42 def match(self, request, urlDomain, urlString): |
|
43 """ |
|
44 Public method to match a request. |
|
45 |
|
46 @param request URL request to be matched |
|
47 @type QWebEngineUrlRequestInfo |
|
48 @param urlDomain domain of the URL |
|
49 @type str |
|
50 @param urlString requested URL as a lowercase string |
|
51 @type str |
|
52 @return reference to the matched rule |
|
53 @rtype AdBlockRule |
|
54 """ |
|
55 # exception rules |
|
56 if self.__networkExceptionTree.find(request, urlDomain, urlString): |
|
57 return None |
|
58 |
|
59 for rule in self.__networkExceptionRules: |
|
60 if rule.networkMatch(request, urlDomain, urlString): |
|
61 return None |
|
62 |
|
63 # block rules |
|
64 rule = self.__networkBlockTree.find(request, urlDomain, urlString) |
|
65 if rule: |
|
66 return rule |
|
67 |
|
68 for rule in self.__networkBlockRules: |
|
69 if rule.networkMatch(request, urlDomain, urlString): |
|
70 return rule |
|
71 |
|
72 return None |
|
73 |
|
74 def adBlockDisabledForUrl(self, url): |
|
75 """ |
|
76 Public method to check, if AdBlock is disabled for the given URL. |
|
77 |
|
78 @param url URL to check |
|
79 @type QUrl |
|
80 @return flag indicating disabled state |
|
81 @rtype bool |
|
82 """ |
|
83 return any(rule.urlMatch(url) for rule in self.__documentRules) |
|
84 |
|
85 def elemHideDisabledForUrl(self, url): |
|
86 """ |
|
87 Public method to check, if element hiding is disabled for the given |
|
88 URL. |
|
89 |
|
90 @param url URL to check |
|
91 @type QUrl |
|
92 @return flag indicating disabled state |
|
93 @rtype bool |
|
94 """ |
|
95 if self.adBlockDisabledForUrl(url): |
|
96 return True |
|
97 |
|
98 return any(rule.urlMatch(url) for rule in self.__elemhideRules) |
|
99 |
|
100 def elementHidingRules(self): |
|
101 """ |
|
102 Public method to get the element hiding rules. |
|
103 |
|
104 @return element hiding rules |
|
105 @rtype str |
|
106 """ |
|
107 return self.__elementHidingRules |
|
108 |
|
109 def elementHidingRulesForDomain(self, domain): |
|
110 """ |
|
111 Public method to get the element hiding rules for the given domain. |
|
112 |
|
113 @param domain domain name |
|
114 @type str |
|
115 @return element hiding rules |
|
116 @rtype str |
|
117 """ |
|
118 rules = "" |
|
119 addedRulesCount = 0 |
|
120 |
|
121 for rule in self.__domainRestrictedCssRules: |
|
122 if not rule.matchDomain(domain): |
|
123 continue |
|
124 |
|
125 if addedRulesCount == 1000: |
|
126 rules += rule.cssSelector() |
|
127 rules += "{display:none !important;}\n" |
|
128 addedRulesCount = 0 |
|
129 else: |
|
130 rules += rule.cssSelector() + "," |
|
131 addedRulesCount += 1 |
|
132 |
|
133 if addedRulesCount != 0: |
|
134 rules = rules[:-1] |
|
135 rules += "{display:none !important;}\n" |
|
136 |
|
137 return rules |
|
138 |
|
139 def update(self): |
|
140 """ |
|
141 Public slot to update the internal state. |
|
142 """ |
|
143 self.clear() |
|
144 |
|
145 cssRulesDict = {} |
|
146 exceptionCssRules = [] |
|
147 |
|
148 for subscription in self.__manager.subscriptions(): |
|
149 if subscription.isEnabled(): |
|
150 for rule in subscription.allRules(): |
|
151 # Don't add internally disabled rules to the cache |
|
152 if rule.isInternalDisabled(): |
|
153 continue |
|
154 |
|
155 if rule.isCSSRule(): |
|
156 # Only enabled CSS rules are added to the cache because |
|
157 # there is no enabled/disabled check on match. They are |
|
158 # directly embedded to pages. |
|
159 if not rule.isEnabled(): |
|
160 continue |
|
161 |
|
162 if rule.isException(): |
|
163 exceptionCssRules.append(rule) |
|
164 else: |
|
165 cssRulesDict[rule.cssSelector()] = rule |
|
166 elif rule.isDocument(): |
|
167 self.__documentRules.append(rule) |
|
168 elif rule.isElementHiding(): |
|
169 self.__elemhideRules.append(rule) |
|
170 elif rule.isException(): |
|
171 if not self.__networkExceptionTree.add(rule): |
|
172 self.__networkBlockRules.append(rule) |
|
173 else: |
|
174 if not self.__networkBlockTree.add(rule): |
|
175 self.__networkBlockRules.append(rule) |
|
176 |
|
177 for rule in exceptionCssRules: |
|
178 try: |
|
179 originalRule = cssRulesDict[rule.cssSelector()] |
|
180 except KeyError: |
|
181 # If there is no such selector, the exception does nothing. |
|
182 continue |
|
183 |
|
184 copiedRule = AdBlockRule() |
|
185 copiedRule.copyFrom(originalRule) |
|
186 copiedRule.setOption(AdBlockRuleOption.DomainRestrictedOption) |
|
187 copiedRule.addBlockedDomains(rule.allowedDomains()) |
|
188 |
|
189 cssRulesDict[rule.cssSelector()] = copiedRule |
|
190 self.__createdRules.append(copiedRule) |
|
191 |
|
192 # Excessive amount of selectors for one CSS rule is not what the |
|
193 # rendering engine likes. So split them up by 1.000 selectors. |
|
194 hidingRulesCount = 0 |
|
195 for key in cssRulesDict: |
|
196 rule = cssRulesDict[key] |
|
197 |
|
198 if rule.isDomainRestricted(): |
|
199 self.__domainRestrictedCssRules.append(rule) |
|
200 elif hidingRulesCount == 1000: |
|
201 self.__elementHidingRules += rule.cssSelector() |
|
202 self.__elementHidingRules += "{display:none !important;} " |
|
203 hidingRulesCount = 0 |
|
204 else: |
|
205 self.__elementHidingRules += rule.cssSelector() + "," |
|
206 hidingRulesCount += 1 |
|
207 |
|
208 if hidingRulesCount != 0: |
|
209 self.__elementHidingRules = self.__elementHidingRules[:-1] |
|
210 self.__elementHidingRules += "{display:none !important;} " |
|
211 |
|
212 def clear(self): |
|
213 """ |
|
214 Public slot to clear the internal structures. |
|
215 """ |
|
216 self.__createdRules = [] |
|
217 self.__networkExceptionRules = [] |
|
218 self.__networkBlockRules = [] |
|
219 self.__domainRestrictedCssRules = [] |
|
220 self.__documentRules = [] |
|
221 self.__elemhideRules = [] |
|
222 |
|
223 self.__elementHidingRules = "" |
|
224 self.__networkBlockTree.clear() |
|
225 self.__networkExceptionTree.clear() |