WebBrowser/SafeBrowsing/SafeBrowsingUrl.py

branch
safe_browsing
changeset 5809
5b53c17b7d93
parent 5808
7bf90dcae4e1
child 5811
5358a3c7995f
--- a/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py	Sun Jul 16 19:34:54 2017 +0200
+++ b/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py	Mon Jul 17 19:58:37 2017 +0200
@@ -20,6 +20,7 @@
 import posixpath
 import socket
 import struct
+import hashlib
 
 import Preferences
 
@@ -47,7 +48,7 @@
         in canonical form.
         
         @return generator for the URL hashes
-        @rtype generator of str
+        @rtype generator of str (Python2) or bytes (Python3)
         """
         for variant in self.permutations(self.canonical()):
             urlHash = self.digest(variant)
@@ -132,3 +133,79 @@
         if query is not None:
             canonicalUrl = '{0}?{1}'.format(canonicalUrl, query)
         return canonicalUrl
+    
+    @staticmethod
+    def permutations(url):
+        """
+        Static method to determine all permutations of host name and path
+        which can be applied to blacklisted URLs.
+        
+        @param url URL string to be permuted
+        @type str
+        @return generator of permuted URL strings
+        @type generator of str
+        """
+        def hostPermutations(host):
+            """
+            Method to generate the permutations of the host name.
+            
+            @param host host name
+            @type str
+            @return generator of permuted host names
+            @rtype generator of str
+            """
+            if re.match(r'\d+\.\d+\.\d+\.\d+', host):
+                yield host
+                return
+            parts = host.split('.')
+            l = min(len(parts), 5)
+            if l > 4:
+                yield host
+            for i in range(l - 1):
+                yield '.'.join(parts[i - l:])
+        
+        def pathPermutations(path):
+            """
+            Method to generate the permutations of the path.
+            
+            @param path path to be processed
+            @type str
+            @return generator of permuted paths
+            @rtype generator of str
+            """
+            yield path
+            query = None
+            if '?' in path:
+                path, query =  path.split('?', 1)
+            if query is not None:
+                yield path
+            pathParts = path.split('/')[0:-1]
+            curPath = ''
+            for i in range(min(4, len(pathParts))):
+                curPath = curPath + pathParts[i] + '/'
+                yield curPath
+        
+        protocol, addressStr = urllib.splittype(url)
+        host, path = urllib.splithost(addressStr)
+        user, host = urllib.splituser(host)
+        host, port = urllib.splitport(host)
+        host = host.strip('/')
+        seenPermutations = set()
+        for h in hostPermutations(host):
+            for p in pathPermutations(path):
+                u = '{0}{1}'.format(h, p)
+                if u not in seenPermutations:
+                    yield u
+                    seenPermutations.add(u)
+
+    @staticmethod
+    def digest(url):
+        """
+        Static method to calculate the SHA256 digest of an URL string.
+        
+        @param url URL string
+        @type str
+        @return SHA256 digest of the URL string
+        @rtype str (Python2) or bytes (Python3)
+        """
+        return hashlib.sha256(url.encode('utf-8')).digest()

eric ide

mercurial