|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2017 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the AdBlock search tree. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 from .AdBlockRule import AdBlockRuleType |
|
13 |
|
14 |
|
15 class AdBlockSearchTreeNode(object): |
|
16 """ |
|
17 Class implementing the AdBlock search tree node. |
|
18 """ |
|
19 def __init__(self): |
|
20 """ |
|
21 Constructor |
|
22 """ |
|
23 self.char = '' |
|
24 self.rule = None |
|
25 self.children = {} |
|
26 |
|
27 |
|
28 class AdBlockSearchTree(object): |
|
29 """ |
|
30 Class implementing the AdBlock search tree. |
|
31 """ |
|
32 def __init__(self): |
|
33 """ |
|
34 Constructor |
|
35 """ |
|
36 self.__root = AdBlockSearchTreeNode() |
|
37 |
|
38 def clear(self): |
|
39 """ |
|
40 Public method to clear the search tree. |
|
41 """ |
|
42 self.__deleteNode(self.__root) |
|
43 self.__root = AdBlockSearchTreeNode() |
|
44 |
|
45 def add(self, rule): |
|
46 """ |
|
47 Public method to add a rule to the search tree. |
|
48 |
|
49 @param rule rule to be added |
|
50 @type AdBlockRule |
|
51 @return flag indicating a successful addition |
|
52 @rtype bool |
|
53 """ |
|
54 if rule.ruleType() != AdBlockRuleType.StringContainsMatchRule: |
|
55 return False |
|
56 |
|
57 filterString = rule.matchString() |
|
58 |
|
59 if len(filterString) <= 0: |
|
60 return False |
|
61 |
|
62 node = self.__root |
|
63 |
|
64 for filterChar in filterString: |
|
65 try: |
|
66 nextNode = node.children[filterChar] |
|
67 except KeyError: |
|
68 nextNode = AdBlockSearchTreeNode() |
|
69 nextNode.char = filterChar |
|
70 node.children[filterChar] = nextNode |
|
71 node = nextNode |
|
72 |
|
73 node.rule = rule |
|
74 |
|
75 return True |
|
76 |
|
77 def find(self, request, domain, urlString): |
|
78 """ |
|
79 Public method to find a matching rule. |
|
80 |
|
81 @param request URL request to be matched |
|
82 @type QWebEngineUrlRequestInfo |
|
83 @param domain domain of the URL |
|
84 @type str |
|
85 @param urlString requested URL as a lowercase string |
|
86 @type str |
|
87 @return reference to the matched rule |
|
88 @rtype AdBlockRule |
|
89 """ |
|
90 length = len(urlString) |
|
91 |
|
92 if length <= 0: |
|
93 return None |
|
94 |
|
95 for index in range(length): |
|
96 rule = self.__prefixSearch(request, domain, urlString, |
|
97 urlString[index:], length - index) |
|
98 if rule: |
|
99 return rule |
|
100 |
|
101 return None |
|
102 |
|
103 def __deleteNode(self, node): |
|
104 """ |
|
105 Private method to delete a search tree node. |
|
106 |
|
107 @param node reference to the node to be deleted |
|
108 @type AdBlockSearchTreeNode |
|
109 """ |
|
110 if not node: |
|
111 return |
|
112 |
|
113 for key in node.children.keys(): |
|
114 self.__deleteNode(node.children[key]) |
|
115 |
|
116 node.children = {} |
|
117 node = None |
|
118 |
|
119 def __prefixSearch(self, request, domain, urlString, string, length): |
|
120 """ |
|
121 Private method to perform a prefix search. |
|
122 |
|
123 @param request URL request to be matched |
|
124 @type QWebEngineUrlRequestInfo |
|
125 @param domain domain of the URL |
|
126 @type str |
|
127 @param urlString requested URL as a lowercase string |
|
128 @type str |
|
129 @param string prefix string to search for |
|
130 @type str |
|
131 @param length length to be considered |
|
132 @type int |
|
133 @return reference to the matched rule |
|
134 @rtype AdBlockRule |
|
135 """ |
|
136 if length <= 0: |
|
137 return None |
|
138 |
|
139 char = string[0] |
|
140 |
|
141 try: |
|
142 node = self.__root.children[char] |
|
143 except KeyError: |
|
144 return None |
|
145 |
|
146 for char in string[1:]: |
|
147 if node.rule and \ |
|
148 node.rule.networkMatch(request, domain, urlString): |
|
149 return node.rule |
|
150 |
|
151 try: |
|
152 node = node.children[char] |
|
153 except KeyError: |
|
154 return None |
|
155 |
|
156 if node.rule and node.rule.networkMatch(request, domain, urlString): |
|
157 return node.rule |
|
158 |
|
159 return None |