WebBrowser/AdBlock/AdBlockMatcher.py

changeset 6028
859f6894eed9
child 6048
82ad8ec9548c
equal deleted inserted replaced
6027:d056a536670e 6028:859f6894eed9
1 # -*- coding: utf-8 -*-
2
3 # Copyright (c) 2017 Detlev Offenbach <detlev@die-offenbachs.de>
4 #
5
6 """
7 Module implementing the AdBlock matcher.
8 """
9
10 from __future__ import unicode_literals
11
12 from PyQt5.QtCore import QObject
13
14 from .AdBlockSearchTree import AdBlockSearchTree
15 from .AdBlockRule import AdBlockRule, AdBlockRuleOption
16
17
18 class AdBlockMatcher(QObject):
19 """
20 Class implementing the AdBlock matcher.
21 """
22 def __init__(self, manager):
23 """
24 Constructor
25
26 @param manager reference to the AdBlock manager object
27 @type AdBlockManager
28 """
29 super(AdBlockMatcher, self).__init__(manager)
30
31 self.__manager = manager
32
33 self.__createdRules = []
34 self.__networkExceptionRules = []
35 self.__networkBlockRules = []
36 self.__domainRestrictedCssRules = []
37 self.__documentRules = []
38 self.__elemhideRules = []
39
40 self.__elementHidingRules = ""
41 self.__networkBlockTree = AdBlockSearchTree()
42 self.__networkExceptionTree = AdBlockSearchTree()
43
44 def match(self, request, urlDomain, urlString):
45 """
46 Public method to match a request.
47
48 @param request URL request to be matched
49 @type QWebEngineUrlRequestInfo
50 @param urlDomain domain of the URL
51 @type str
52 @param urlString requested URL as a lowercase string
53 @type str
54 @return reference to the matched rule
55 @rtype AdBlockRule
56 """
57 # exception rules
58 if self.__networkExceptionTree.find(request, urlDomain, urlString):
59 return None
60
61 for rule in self.__networkExceptionRules:
62 if rule.networkMatch(request, urlDomain, urlString):
63 return None
64
65 # block rules
66 rule = self.__networkBlockTree.find(request, urlDomain, urlString)
67 if rule:
68 return rule
69
70 for rule in self.__networkBlockRules:
71 if rule.networkMatch(request, urlDomain, urlString):
72 return rule
73
74 return None
75
76 def adBlockDisabledForUrl(self, url):
77 """
78 Public method to check, if AdBlock is disabled for the given URL.
79
80 @param url URL to check
81 @type QUrl
82 @return flag indicating disabled state
83 @rtype bool
84 """
85 for rule in self.__documentRules:
86 if rule.urlMatch(url):
87 return True
88
89 return False
90
91 def elemHideDisabledForUrl(self, url):
92 """
93 Public method to check, if element hiding is disabled for the given
94 URL.
95
96 @param url URL to check
97 @type QUrl
98 @return flag indicating disabled state
99 @rtype bool
100 """
101 if self.adBlockDisabledForUrl(url):
102 return True
103
104 for rule in self.__elemhideRules:
105 if rule.urlMatch(url):
106 return True
107
108 return False
109
110 def elementHidingRules(self):
111 """
112 Public method to get the element hiding rules.
113
114 @return element hiding rules
115 @rtype str
116 """
117 return self.__elementHidingRules
118
119 def elementHidingRulesForDomain(self, domain):
120 """
121 Public method to get the element hiding rules for the given domain.
122
123 @param domain domain name
124 @type str
125 @return element hiding rules
126 @rtype str
127 """
128 rules = ""
129 addedRulesCount = 0
130
131 for rule in self.__domainRestrictedCssRules:
132 if not rule.matchDomain(domain):
133 continue
134
135 if addedRulesCount == 1000:
136 rules += rule.cssSelector()
137 rules += "{display:none !important;}\n"
138 addedRulesCount = 0
139 else:
140 rules += rule.cssSelector() + ","
141 addedRulesCount += 1
142
143 if addedRulesCount != 0:
144 rules = rules[:-1]
145 rules += "{display:none !important;}\n"
146
147 return rules
148
149 def update(self):
150 """
151 Public slot to update the internal state.
152 """
153 self.clear()
154
155 cssRulesDict = {}
156 exceptionCssRules = []
157
158 for subscription in self.__manager.subscriptions():
159 if subscription.isEnabled():
160 for rule in subscription.allRules():
161 # Don't add internally disabled rules to the cache
162 if rule.isInternalDisabled():
163 continue
164
165 if rule.isCSSRule():
166 # Only enabled CSS rules are added to the cache because
167 # there is no enabled/disabled check on match. They are
168 # directly embedded to pages.
169 if not rule.isEnabled():
170 continue
171
172 if rule.isException():
173 exceptionCssRules.append(rule)
174 else:
175 cssRulesDict[rule.cssSelector()] = rule
176 elif rule.isDocument():
177 self.__documentRules.append(rule)
178 elif rule.isElementHiding():
179 self.__elemhideRules.append(rule)
180 elif rule.isException():
181 if not self.__networkExceptionTree.add(rule):
182 self.__networkBlockRules.append(rule)
183 else:
184 if not self.__networkBlockTree.add(rule):
185 self.__networkBlockRules.append(rule)
186
187 for rule in exceptionCssRules:
188 try:
189 originalRule = cssRulesDict[rule.cssSelector()]
190 except KeyError:
191 # If there is no such selector, the exception does nothing.
192 continue
193
194 copiedRule = AdBlockRule()
195 copiedRule.copyFrom(originalRule)
196 copiedRule.setOption(AdBlockRuleOption.DomainRestrictedOption)
197 copiedRule.addBlockedDomains(rule.allowedDomains())
198
199 cssRulesDict[rule.cssSelector()] = copiedRule
200 self.__createdRules.append(copiedRule)
201
202 # Excessive amount of selectors for one CSS rule is not what the
203 # rendering engine likes. So split them up by 1.000 selectors.
204 hidingRulesCount = 0
205 for key in cssRulesDict:
206 rule = cssRulesDict[key]
207
208 if rule.isDomainRestricted():
209 self.__domainRestrictedCssRules.append(rule)
210 elif hidingRulesCount == 1000:
211 self.__elementHidingRules += rule.cssSelector()
212 self.__elementHidingRules += "{display:none !important;} "
213 hidingRulesCount = 0
214 else:
215 self.__elementHidingRules += rule.cssSelector() + ","
216 hidingRulesCount += 1
217
218 if hidingRulesCount != 0:
219 self.__elementHidingRules = self.__elementHidingRules[:-1]
220 self.__elementHidingRules += "{display:none !important;} "
221
222 def clear(self):
223 """
224 Public slot to clear the internal structures.
225 """
226 self.__createdRules = []
227 self.__networkExceptionRules = []
228 self.__networkBlockRules = []
229 self.__domainRestrictedCssRules = []
230 self.__documentRules = []
231 self.__elemhideRules = []
232
233 self.__elementHidingRules = ""
234 self.__networkBlockTree.clear()
235 self.__networkExceptionTree.clear()

eric ide

mercurial