Wed, 30 Dec 2020 11:00:05 +0100
Updated copyright for 2021.
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
1 | # -*- coding: utf-8 -*- |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
2 | |
7923
91e843545d9a
Updated copyright for 2021.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7775
diff
changeset
|
3 | # Copyright (c) 2016 - 2021 Detlev Offenbach <detlev@die-offenbachs.de> |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
4 | # |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
5 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
6 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
7 | Module implementing the TLD Extractor. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
8 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
9 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
10 | # |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
11 | # This is a Python port of the TLDExtractor of Qupzilla |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
12 | # Copyright (C) 2014 Razi Alavizadeh <s.r.alavizadeh@gmail.com> |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
13 | # |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
14 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
15 | import collections |
7717
f32d7965a17e
Changed the code to not rely on the Qt Resource system anymore (no .qrc files and no use of pyrcc5 anymore).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7716
diff
changeset
|
16 | import os |
7775
4a1db75550bd
Changed code to not use deprecated 'QRegExp' anymore.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7717
diff
changeset
|
17 | import re |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
18 | |
7775
4a1db75550bd
Changed code to not use deprecated 'QRegExp' anymore.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7717
diff
changeset
|
19 | from PyQt5.QtCore import QObject, QUrl, QFile, QFileInfo, qWarning |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
20 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
21 | from E5Gui import E5MessageBox |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
22 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
23 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
24 | class E5TldHostParts(object): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
25 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
26 | Class implementing the host parts helper. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
27 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
28 | def __init__(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
29 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
30 | Constructor |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
31 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
32 | self.host = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
33 | self.tld = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
34 | self.domain = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
35 | self.registrableDomain = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
36 | self.subdomain = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
37 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
38 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
39 | class E5TldExtractor(QObject): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
40 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
41 | Class implementing the TLD Extractor. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
42 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
43 | Note: The module function instance() should be used to get a reference |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
44 | to a global object to avoid overhead. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
45 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
46 | def __init__(self, withPrivate=False, parent=None): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
47 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
48 | Constructor |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
49 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
50 | @param withPrivate flag indicating to load private TLDs as well |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
51 | @type bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
52 | @param parent reference to the parent object |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
53 | @type QObject |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
54 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
55 | super(E5TldExtractor, self).__init__(parent) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
56 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
57 | self.__withPrivate = withPrivate |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
58 | self.__dataFileName = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
59 | self.__dataSearchPaths = [] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
60 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
61 | self.__tldDict = collections.defaultdict(list) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
62 | # dict with list of str as values |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
63 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
64 | self.setDataSearchPaths() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
65 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
66 | def isDataLoaded(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
67 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
68 | Public method to check, if the TLD data ia already loaded. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
69 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
70 | @return flag indicating data is loaded |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
71 | @rtype bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
72 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
73 | return bool(self.__tldDict) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
74 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
75 | def tld(self, host): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
76 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
77 | Public method to get the top level domain for a host. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
78 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
79 | @param host host name to get TLD for |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
80 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
81 | @return TLD for host |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
82 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
83 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
84 | if not host or host.startswith("."): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
85 | return "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
86 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
87 | cleanHost = self.__normalizedHost(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
88 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
89 | tldPart = cleanHost[cleanHost.rfind(".") + 1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
90 | cleanHost = bytes(QUrl.toAce(cleanHost)).decode("utf-8") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
91 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
92 | self.__loadData() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
93 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
94 | if tldPart not in self.__tldDict: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
95 | return tldPart |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
96 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
97 | tldRules = self.__tldDict[tldPart][:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
98 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
99 | if tldPart not in tldRules: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
100 | tldRules.append(tldPart) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
101 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
102 | maxLabelCount = 0 |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
103 | isWildcardTLD = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
104 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
105 | for rule in tldRules: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
106 | labelCount = rule.count(".") + 1 |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
107 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
108 | if rule.startswith("!"): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
109 | rule = rule[1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
110 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
111 | rule = bytes(QUrl.toAce(rule)).decode("utf-8") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
112 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
113 | # matches with exception TLD |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
114 | if cleanHost.endswith(rule): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
115 | tldPart = rule[rule.find(".") + 1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
116 | break |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
117 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
118 | if rule.startswith("*"): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
119 | rule = rule[1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
120 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
121 | if rule.startswith("."): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
122 | rule = rule[1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
123 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
124 | isWildcardTLD = True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
125 | else: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
126 | isWildcardTLD = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
127 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
128 | rule = bytes(QUrl.toAce(rule)).decode("utf-8") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
129 | testRule = "." + rule |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
130 | testUrl = "." + cleanHost |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
131 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
132 | if labelCount > maxLabelCount and testUrl.endswith(testRule): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
133 | tldPart = rule |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
134 | maxLabelCount = labelCount |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
135 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
136 | if isWildcardTLD: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
137 | temp = cleanHost |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
138 | temp = temp[:temp.rfind(tldPart)] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
139 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
140 | if temp.endswith("."): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
141 | temp = temp[:-1] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
142 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
143 | temp = temp[temp.rfind(".") + 1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
144 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
145 | if temp: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
146 | tldPart = temp + "." + rule |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
147 | else: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
148 | tldPart = rule |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
149 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
150 | temp = self.__normalizedHost(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
151 | tldPart = ".".join( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
152 | temp.split(".")[temp.count(".") - tldPart.count("."):]) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
153 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
154 | return tldPart |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
155 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
156 | def domain(self, host): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
157 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
158 | Public method to get the domain for a host. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
159 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
160 | @param host host name to get the domain for |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
161 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
162 | @return domain for host |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
163 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
164 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
165 | tldPart = self.tld(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
166 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
167 | return self.__domainHelper(host, tldPart) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
168 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
169 | def registrableDomain(self, host): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
170 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
171 | Public method to get the registrable domain for a host. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
172 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
173 | @param host host name to get the registrable domain for |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
174 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
175 | @return registrable domain for host |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
176 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
177 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
178 | tldPart = self.tld(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
179 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
180 | return self.__registrableDomainHelper( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
181 | self.__domainHelper(host, tldPart), tldPart) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
182 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
183 | def subdomain(self, host): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
184 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
185 | Public method to get the subdomain for a host. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
186 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
187 | @param host host name to get the subdomain for |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
188 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
189 | @return subdomain for host |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
190 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
191 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
192 | return self.__subdomainHelper(host, self.registrableDomain(host)) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
193 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
194 | def splitParts(self, host): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
195 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
196 | Public method to split a host address into its parts. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
197 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
198 | @param host host address to be split |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
199 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
200 | @return splitted host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
201 | @rtype E5TldHostParts |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
202 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
203 | hostParts = E5TldHostParts() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
204 | hostParts.host = host |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
205 | hostParts.tld = self.tld(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
206 | hostParts.domain = self.__domainHelper(host, hostParts.tld) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
207 | hostParts.registrableDomain = self.__registrableDomainHelper( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
208 | hostParts.domain, hostParts.tld) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
209 | hostParts.subdomain = self.__subdomainHelper( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
210 | host, hostParts.registrableDomain) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
211 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
212 | return hostParts |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
213 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
214 | def dataSearchPaths(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
215 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
216 | Public method to get the search paths for the TLD data file. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
217 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
218 | @return search paths for the TLD data file |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
219 | @rtype list of str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
220 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
221 | return self.__dataSearchPaths[:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
222 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
223 | def setDataSearchPaths(self, searchPaths=None): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
224 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
225 | Public method to set the search paths for the TLD data file. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
226 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
227 | @param searchPaths search paths for the TLD data file or None, |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
228 | if the default search paths shall be set |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
229 | @type list of str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
230 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
231 | if searchPaths: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
232 | self.__dataSearchPaths = searchPaths[:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
233 | self.__dataSearchPaths.extend(self.__defaultDataSearchPaths()) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
234 | else: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
235 | self.__dataSearchPaths = self.__defaultDataSearchPaths()[:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
236 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
237 | # remove duplicates |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
238 | paths = [] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
239 | for p in self.__dataSearchPaths: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
240 | if p not in paths: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
241 | paths.append(p) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
242 | self.__dataSearchPaths = paths |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
243 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
244 | def __defaultDataSearchPaths(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
245 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
246 | Private method to get the default search paths for the TLD data file. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
247 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
248 | @return default search paths for the TLD data file |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
249 | @rtype list of str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
250 | """ |
7717
f32d7965a17e
Changed the code to not rely on the Qt Resource system anymore (no .qrc files and no use of pyrcc5 anymore).
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7716
diff
changeset
|
251 | return [os.path.join(os.path.dirname(__file__), "data")] |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
252 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
253 | def getTldDownloadUrl(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
254 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
255 | Public method to get the TLD data file download URL. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
256 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
257 | @return download URL |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
258 | @rtype QUrl |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
259 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
260 | return QUrl( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
261 | "http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
262 | "effective_tld_names.dat?raw=1") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
263 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
264 | def __loadData(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
265 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
266 | Private method to load the TLD data. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
267 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
268 | if self.isDataLoaded(): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
269 | return |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
270 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
271 | dataFileName = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
272 | parsedDataFileExist = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
273 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
274 | for path in self.__dataSearchPaths: |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
275 | dataFileName = ( |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
276 | QFileInfo(path + "/effective_tld_names.dat").absoluteFilePath() |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
277 | ) |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
278 | if QFileInfo(dataFileName).exists(): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
279 | parsedDataFileExist = True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
280 | break |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
281 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
282 | if not parsedDataFileExist: |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
283 | tldDataFileDownloadLink = ( |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
284 | "http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/" |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
285 | "effective_tld_names.dat?raw=1" |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
286 | ) |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
287 | E5MessageBox.information( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
288 | None, |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
289 | self.tr("TLD Data File not found"), |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
290 | self.tr("""<p>The file 'effective_tld_names.dat' was not""" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
291 | """ found!<br/>You can download it from """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
292 | """'<a href="{0}"><b>here</b></a>' to one of the""" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
293 | """ following paths:</p><ul>{1}</ul>""").format( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
294 | tldDataFileDownloadLink, |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
295 | "".join(["<li>{0}</li>".format(p) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
296 | for p in self.__dataSearchPaths])) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
297 | ) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
298 | return |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
299 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
300 | self.__dataFileName = dataFileName |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
301 | if not self.__parseData(dataFileName, |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
302 | loadPrivateDomains=self.__withPrivate): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
303 | qWarning( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
304 | "E5TldExtractor: There are some parse errors for file: {0}" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
305 | .format(dataFileName)) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
306 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
307 | def __parseData(self, dataFile, loadPrivateDomains=False): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
308 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
309 | Private method to parse TLD data. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
310 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
311 | @param dataFile name of the file containing the TLD data |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
312 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
313 | @param loadPrivateDomains flag indicating to load private domains |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
314 | @type bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
315 | @return flag indicating success |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
316 | @rtype bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
317 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
318 | # start with a fresh dictionary |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
319 | self.__tldDict = collections.defaultdict(list) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
320 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
321 | file = QFile(dataFile) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
322 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
323 | if not file.open(QFile.ReadOnly | QFile.Text): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
324 | return False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
325 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
326 | seekToEndOfPrivateDomains = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
327 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
328 | while not file.atEnd(): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
329 | line = bytes(file.readLine()).decode("utf-8").strip() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
330 | if not line: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
331 | continue |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
332 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
333 | if line.startswith("."): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
334 | line = line[1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
335 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
336 | if line.startswith("//"): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
337 | if "===END PRIVATE DOMAINS===" in line: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
338 | seekToEndOfPrivateDomains = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
339 | |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
340 | if ( |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
341 | not loadPrivateDomains and |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
342 | "===BEGIN PRIVATE DOMAINS===" in line |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
343 | ): |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
344 | seekToEndOfPrivateDomains = True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
345 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
346 | continue |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
347 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
348 | if seekToEndOfPrivateDomains: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
349 | continue |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
350 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
351 | # only data up to the first whitespace is used |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
352 | line = line.split(None, 1)[0] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
353 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
354 | if "." not in line: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
355 | self.__tldDict[line].append(line) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
356 | else: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
357 | key = line[line.rfind(".") + 1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
358 | self.__tldDict[key].append(line) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
359 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
360 | return self.isDataLoaded() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
361 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
362 | def __domainHelper(self, host, tldPart): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
363 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
364 | Private method to get the domain name without TLD. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
365 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
366 | @param host host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
367 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
368 | @param tldPart TLD part of the host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
369 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
370 | @return domain name |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
371 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
372 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
373 | if not host or not tldPart: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
374 | return "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
375 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
376 | temp = self.__normalizedHost(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
377 | temp = temp[:temp.rfind(tldPart)] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
378 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
379 | if temp.endswith("."): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
380 | temp = temp[:-1] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
381 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
382 | return temp[temp.rfind(".") + 1:] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
383 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
384 | def __registrableDomainHelper(self, domainPart, tldPart): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
385 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
386 | Private method to get the registrable domain (i.e. domain plus TLD). |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
387 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
388 | @param domainPart domain part of a host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
389 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
390 | @param tldPart TLD part of a host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
391 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
392 | @return registrable domain name |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
393 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
394 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
395 | if not tldPart or not domainPart: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
396 | return "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
397 | else: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
398 | return "{0}.{1}".format(domainPart, tldPart) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
399 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
400 | def __subdomainHelper(self, host, registrablePart): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
401 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
402 | Private method to get the subdomain of a host address (i.e. domain part |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
403 | without the registrable domain name). |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
404 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
405 | @param host host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
406 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
407 | @param registrablePart registrable domain part of the host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
408 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
409 | @return subdomain name |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
410 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
411 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
412 | if not host or not registrablePart: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
413 | return "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
414 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
415 | subdomain = self.__normalizedHost(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
416 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
417 | subdomain = subdomain[:subdomain.rfind(registrablePart)] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
418 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
419 | if subdomain.endswith("."): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
420 | subdomain = subdomain[:-1] |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
421 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
422 | return subdomain |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
423 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
424 | def __normalizedHost(self, host): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
425 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
426 | Private method to get the normalized host for a host address. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
427 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
428 | @param host host address to be normalized |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
429 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
430 | @return normalized host address |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
431 | @rtype str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
432 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
433 | return host.lower() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
434 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
435 | ################################################################# |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
436 | ## Methods below are for testing purposes |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
437 | ################################################################# |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
438 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
439 | def test(self): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
440 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
441 | Public method to execute the tests. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
442 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
443 | @return flag indicating the test result |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
444 | @rtype bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
445 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
446 | self.__withPrivate = True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
447 | self.__loadData() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
448 | if not self.__tldDict: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
449 | return False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
450 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
451 | testDataFileName = "" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
452 | testDataFileExist = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
453 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
454 | for path in self.__dataSearchPaths: |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
455 | testDataFileName = ( |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
456 | QFileInfo(path + "/test_psl.txt").absoluteFilePath() |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
457 | ) |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
458 | if QFileInfo(testDataFileName).exists(): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
459 | testDataFileExist = True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
460 | break |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
461 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
462 | if not testDataFileExist: |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
463 | testFileDownloadLink = ( |
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
464 | "http://mxr.mozilla.org/mozilla-central/source/netwerk/test/" |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
465 | "unit/data/test_psl.txt?raw=1" |
7253
50dbe65a1334
Continued to resolve code style issue M841.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7229
diff
changeset
|
466 | ) |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
467 | E5MessageBox.information( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
468 | None, |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
469 | self.tr("TLD Data File not found"), |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
470 | self.tr("""<p>The file 'test_psl.txt' was not found!""" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
471 | """<br/>You can download it from '<a href="{0}">""" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
472 | """<b>here</b></a>' to one of the following""" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
473 | """ paths:</p><ul>{1}</ul>""").format( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
474 | testFileDownloadLink, |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
475 | "".join(["<li>{0}</li>".format(p) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
476 | for p in self.__dataSearchPaths])) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
477 | ) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
478 | return False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
479 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
480 | file = QFile(testDataFileName) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
481 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
482 | if not file.open(QFile.ReadOnly | QFile.Text): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
483 | return False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
484 | |
7775
4a1db75550bd
Changed code to not use deprecated 'QRegExp' anymore.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7717
diff
changeset
|
485 | testRegExp = re.compile( |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
486 | "checkPublicSuffix\\(('([^']+)'|null), ('([^']+)'|null)\\);") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
487 | allTestSuccess = True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
488 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
489 | while not file.atEnd(): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
490 | line = bytes(file.readLine()).decode("utf-8").strip() |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
491 | if not line or line.startswith("//"): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
492 | continue |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
493 | |
7775
4a1db75550bd
Changed code to not use deprecated 'QRegExp' anymore.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7717
diff
changeset
|
494 | match = testRegExp.search(line) |
4a1db75550bd
Changed code to not use deprecated 'QRegExp' anymore.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7717
diff
changeset
|
495 | if match is None: |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
496 | allTestSuccess = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
497 | else: |
7775
4a1db75550bd
Changed code to not use deprecated 'QRegExp' anymore.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7717
diff
changeset
|
498 | hostName, registrableName = match.group(2, 4) |
4971
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
499 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
500 | if not self.__checkPublicSuffix(hostName, registrableName): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
501 | allTestSuccess = False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
502 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
503 | if allTestSuccess: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
504 | qWarning("E5TldExtractor: Test passed successfully.") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
505 | else: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
506 | qWarning("E5TldExtractor: Test finished with some errors!") |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
507 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
508 | # reset the TLD dictionary |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
509 | self.__tldDict = collections.defaultdict(list) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
510 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
511 | return allTestSuccess |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
512 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
513 | def __checkPublicSuffix(self, host, registrableName): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
514 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
515 | Private method to test a host name against a registrable name. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
516 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
517 | @param host host name to test |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
518 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
519 | @param registrableName registrable domain name to test against |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
520 | @type str |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
521 | @return flag indicating the check result |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
522 | @rtype bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
523 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
524 | regName = self.registrableDomain(host) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
525 | if regName != registrableName: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
526 | qWarning( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
527 | "E5TldExtractor Test Error: hostName: {0}\n" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
528 | " Correct registrableName: {1}\n" |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
529 | " Calculated registrableName: {2}".format( |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
530 | host, registrableName, regName)) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
531 | return False |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
532 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
533 | return True |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
534 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
535 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
536 | _TLDExtractor = None |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
537 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
538 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
539 | def instance(withPrivate=False): |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
540 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
541 | Global function to get a reference to the TLD extractor and create it, if |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
542 | it hasn't been yet. |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
543 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
544 | @param withPrivate flag indicating to load private TLDs as well |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
545 | @type bool |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
546 | @return reference to the zoom manager object |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
547 | @rtype E5TldExtractor |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
548 | """ |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
549 | global _TLDExtractor |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
550 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
551 | if _TLDExtractor is None: |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
552 | _TLDExtractor = E5TldExtractor(withPrivate=withPrivate) |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
553 | |
0f21662c0d2d
Added a class to extract all top level domains from a file provided by Mozilla.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
554 | return _TLDExtractor |