diff -r e9e7eca7efee -r bf71ee032bb4 src/eric7/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py --- a/src/eric7/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py Wed Jul 13 11:16:20 2022 +0200 +++ b/src/eric7/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py Wed Jul 13 14:55:47 2022 +0200 @@ -22,6 +22,7 @@ """ Class implementing an URL representation suitable for Google Safe Browsing. """ + # # Modeled after the URL class of the gglsbl package. # https://github.com/afilipovich/gglsbl @@ -29,35 +30,36 @@ def __init__(self, url): """ Constructor - + @param url URL to be embedded @type str """ self.__url = url - + def hashes(self): """ Public method to get the hashes of all possible permutations of the URL in canonical form. - + @yield URL hashes @ytype bytes """ for variant in self.permutations(self.canonical()): urlHash = self.digest(variant) yield urlHash - + def canonical(self): """ Public method to convert the URL to the canonical form. - + @return canonical form of the URL @rtype str """ + def fullUnescape(u): """ Method to recursively unescape an URL. - + @param u URL string to unescape @type str @return unescaped URL string @@ -68,25 +70,25 @@ return uu else: return fullUnescape(uu) - + def quote(s): """ Method to quote a string. - + @param string to be quoted @type str @return quoted string @rtype str """ - safeChars = '!"$&\'()*+,-./:;<=>?@[\\]^_`{|}~' + safeChars = "!\"$&'()*+,-./:;<=>?@[\\]^_`{|}~" return urllib.parse.quote(s, safe=safeChars) - + url = self.__url.strip() - url = url.replace('\n', '').replace('\r', '').replace('\t', '') - url = url.split('#', 1)[0] - if url.startswith('//'): + url = url.replace("\n", "").replace("\r", "").replace("\t", "") + url = url.split("#", 1)[0] + if url.startswith("//"): url = Preferences.getWebBrowser("DefaultScheme")[:-3] + url - if len(url.split('://')) <= 1: + if len(url.split("://")) <= 1: url = Preferences.getWebBrowser("DefaultScheme") + url url = quote(fullUnescape(url)) urlParts = urllib.parse.urlsplit(url) @@ -97,65 +99,66 @@ host = fullUnescape(urlParts.hostname) path = fullUnescape(urlParts.path) query = urlParts.query - if not query and '?' not in url: + if not query and "?" not in url: query = None if not path: - path = '/' - path = posixpath.normpath(path).replace('//', '/') - if path[-1] != '/': - path += '/' + path = "/" + path = posixpath.normpath(path).replace("//", "/") + if path[-1] != "/": + path += "/" port = urlParts.port - host = host.strip('.') - host = re.sub(r'\.+', '.', host).lower() + host = host.strip(".") + host = re.sub(r"\.+", ".", host).lower() if host.isdigit(): with contextlib.suppress(Exception): host = socket.inet_ntoa(struct.pack("!I", int(host))) - if host.startswith('0x') and '.' not in host: + if host.startswith("0x") and "." not in host: with contextlib.suppress(Exception): host = socket.inet_ntoa(struct.pack("!I", int(host, 16))) quotedPath = quote(path) quotedHost = quote(host) if port is not None: - quotedHost = '{0}:{1}'.format(quotedHost, port) - canonicalUrl = '{0}://{1}{2}'.format(protocol, quotedHost, quotedPath) + quotedHost = "{0}:{1}".format(quotedHost, port) + canonicalUrl = "{0}://{1}{2}".format(protocol, quotedHost, quotedPath) if query is not None: - canonicalUrl = '{0}?{1}'.format(canonicalUrl, query) + canonicalUrl = "{0}?{1}".format(canonicalUrl, query) return canonicalUrl - + @staticmethod def permutations(url): """ Static method to determine all permutations of host name and path which can be applied to blacklisted URLs. - + @param url URL string to be permuted @type str @yield permutated URL strings @ytype str """ + def hostPermutations(host): """ Method to generate the permutations of the host name. - + @param host host name @type str @yield permutated host names @ytype str """ - if re.match(r'\d+\.\d+\.\d+\.\d+', host): + if re.match(r"\d+\.\d+\.\d+\.\d+", host): yield host return - parts = host.split('.') + parts = host.split(".") partsLen = min(len(parts), 5) if partsLen > 4: yield host for i in range(partsLen - 1): - yield '.'.join(parts[i - partsLen:]) - + yield ".".join(parts[i - partsLen :]) + def pathPermutations(path): """ Method to generate the permutations of the path. - + @param path path to be processed @type str @yield permutated paths @@ -163,25 +166,25 @@ """ yield path query = None - if '?' in path: - path, query = path.split('?', 1) + if "?" in path: + path, query = path.split("?", 1) if query is not None: yield path - pathParts = path.split('/')[0:-1] - curPath = '' + pathParts = path.split("/")[0:-1] + curPath = "" for i in range(min(4, len(pathParts))): - curPath = curPath + pathParts[i] + '/' + curPath = curPath + pathParts[i] + "/" yield curPath - + protocol, addressStr = urllib.parse.splittype(url) host, path = urllib.parse.splithost(addressStr) user, host = urllib.parse.splituser(host) host, port = urllib.parse.splitport(host) - host = host.strip('/') + host = host.strip("/") seenPermutations = set() for h in hostPermutations(host): for p in pathPermutations(path): - u = '{0}{1}'.format(h, p) + u = "{0}{1}".format(h, p) if u not in seenPermutations: yield u seenPermutations.add(u) @@ -190,10 +193,10 @@ def digest(url): """ Static method to calculate the SHA256 digest of an URL string. - + @param url URL string @type str @return SHA256 digest of the URL string @rtype bytes """ - return hashlib.sha256(url.encode('utf-8')).digest() + return hashlib.sha256(url.encode("utf-8")).digest()