|
1 # -*- coding: utf-8 -*- |
|
2 |
|
3 # Copyright (c) 2012 - 2019 Detlev Offenbach <detlev@die-offenbachs.de> |
|
4 # |
|
5 |
|
6 """ |
|
7 Module implementing the GreaseMonkey URL matcher. |
|
8 """ |
|
9 |
|
10 from __future__ import unicode_literals |
|
11 |
|
12 import re |
|
13 |
|
14 from PyQt5.QtCore import Qt, QRegExp |
|
15 |
|
16 |
|
17 def wildcardMatch(string, pattern): |
|
18 """ |
|
19 Module function implementing a special wildcard matcher. |
|
20 |
|
21 @param string string to match (string) |
|
22 @param pattern pattern to be used (string) |
|
23 @return flag indicating a successful match (boolean) |
|
24 """ |
|
25 stringSize = len(string) |
|
26 |
|
27 startsWithWildcard = pattern.startswith("*") |
|
28 endsWithWildcard = pattern.endswith("*") |
|
29 |
|
30 parts = pattern.split("*") |
|
31 pos = 0 |
|
32 |
|
33 if startsWithWildcard: |
|
34 pos = string.find(parts[1]) |
|
35 if pos == -1: |
|
36 return False |
|
37 |
|
38 for part in parts: |
|
39 pos = string.find(part, pos) |
|
40 if pos == -1: |
|
41 return False |
|
42 |
|
43 if not endsWithWildcard and stringSize - pos != len(parts[-1]): |
|
44 return False |
|
45 |
|
46 return True |
|
47 |
|
48 |
|
49 class GreaseMonkeyUrlMatcher(object): |
|
50 """ |
|
51 Class implementing the GreaseMonkey URL matcher. |
|
52 """ |
|
53 def __init__(self, pattern): |
|
54 """ |
|
55 Constructor |
|
56 |
|
57 @param pattern pattern to be used for the matching (string) |
|
58 """ |
|
59 self.__pattern = pattern |
|
60 self.__matchString = "" |
|
61 self.__regExp = QRegExp() |
|
62 self.__useRegExp = False |
|
63 |
|
64 self.__parsePattern(self.__pattern) |
|
65 |
|
66 def pattern(self): |
|
67 """ |
|
68 Public method to get the match pattern. |
|
69 |
|
70 @return match pattern (string) |
|
71 """ |
|
72 return self.__pattern |
|
73 |
|
74 def match(self, urlString): |
|
75 """ |
|
76 Public method to match the given URL. |
|
77 |
|
78 @param urlString URL to match (string) |
|
79 @return flag indicating a successful match (boolean) |
|
80 """ |
|
81 if self.__useRegExp: |
|
82 return self.__regExp.indexIn(urlString) != -1 |
|
83 else: |
|
84 return wildcardMatch(urlString, self.__matchString) |
|
85 |
|
86 def __parsePattern(self, pattern): |
|
87 """ |
|
88 Private method to parse the match pattern. |
|
89 |
|
90 @param pattern match pattern to be used (string) |
|
91 """ |
|
92 if pattern.startswith("/") and pattern.endswith("/"): |
|
93 pattern = pattern[1:-1] |
|
94 |
|
95 self.__regExp = QRegExp(pattern, Qt.CaseInsensitive) |
|
96 self.__useRegExp = True |
|
97 elif ".tld" in pattern: |
|
98 # escape special symbols |
|
99 pattern = re.sub(r"(\W)", r"\\\1", pattern) |
|
100 # remove multiple wildcards |
|
101 pattern = re.sub(r"\*+", "*", pattern) |
|
102 # process anchor at expression start |
|
103 pattern = re.sub(r"^\\\|", "^", pattern) |
|
104 # process anchor at expression end |
|
105 pattern = re.sub(r"\\\|$", "$", pattern) |
|
106 # replace wildcards by .* |
|
107 pattern = re.sub(r"\\\*", ".*", pattern) |
|
108 # replace domain pattern |
|
109 pattern = re.sub(r"\.tld", r"\.[a-z.]{2,6}") |
|
110 |
|
111 self.__useRegExp = True |
|
112 self.__regExp = QRegExp(pattern, Qt.CaseInsensitive) |
|
113 else: |
|
114 self.__matchString = pattern |