diff -r 7bf90dcae4e1 -r 5b53c17b7d93 WebBrowser/SafeBrowsing/SafeBrowsingUrl.py --- a/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py Sun Jul 16 19:34:54 2017 +0200 +++ b/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py Mon Jul 17 19:58:37 2017 +0200 @@ -20,6 +20,7 @@ import posixpath import socket import struct +import hashlib import Preferences @@ -47,7 +48,7 @@ in canonical form. @return generator for the URL hashes - @rtype generator of str + @rtype generator of str (Python2) or bytes (Python3) """ for variant in self.permutations(self.canonical()): urlHash = self.digest(variant) @@ -132,3 +133,79 @@ if query is not None: canonicalUrl = '{0}?{1}'.format(canonicalUrl, query) return canonicalUrl + + @staticmethod + def permutations(url): + """ + Static method to determine all permutations of host name and path + which can be applied to blacklisted URLs. + + @param url URL string to be permuted + @type str + @return generator of permuted URL strings + @type generator of str + """ + def hostPermutations(host): + """ + Method to generate the permutations of the host name. + + @param host host name + @type str + @return generator of permuted host names + @rtype generator of str + """ + if re.match(r'\d+\.\d+\.\d+\.\d+', host): + yield host + return + parts = host.split('.') + l = min(len(parts), 5) + if l > 4: + yield host + for i in range(l - 1): + yield '.'.join(parts[i - l:]) + + def pathPermutations(path): + """ + Method to generate the permutations of the path. + + @param path path to be processed + @type str + @return generator of permuted paths + @rtype generator of str + """ + yield path + query = None + if '?' in path: + path, query = path.split('?', 1) + if query is not None: + yield path + pathParts = path.split('/')[0:-1] + curPath = '' + for i in range(min(4, len(pathParts))): + curPath = curPath + pathParts[i] + '/' + yield curPath + + protocol, addressStr = urllib.splittype(url) + host, path = urllib.splithost(addressStr) + user, host = urllib.splituser(host) + host, port = urllib.splitport(host) + host = host.strip('/') + seenPermutations = set() + for h in hostPermutations(host): + for p in pathPermutations(path): + u = '{0}{1}'.format(h, p) + if u not in seenPermutations: + yield u + seenPermutations.add(u) + + @staticmethod + def digest(url): + """ + Static method to calculate the SHA256 digest of an URL string. + + @param url URL string + @type str + @return SHA256 digest of the URL string + @rtype str (Python2) or bytes (Python3) + """ + return hashlib.sha256(url.encode('utf-8')).digest()