Done implementing the SafeBrowsingUrl class. safe_browsing

Mon, 17 Jul 2017 19:58:37 +0200

author
Detlev Offenbach <detlev@die-offenbachs.de>
date
Mon, 17 Jul 2017 19:58:37 +0200
branch
safe_browsing
changeset 5809
5b53c17b7d93
parent 5808
7bf90dcae4e1
child 5811
5358a3c7995f

Done implementing the SafeBrowsingUrl class.
Started implementing the SafeBrowsingAPIClient class.

WebBrowser/SafeBrowsing/SafeBrowsingAPIClient.py file | annotate | diff | comparison | revisions
WebBrowser/SafeBrowsing/SafeBrowsingUrl.py file | annotate | diff | comparison | revisions
eric6.e4p file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/WebBrowser/SafeBrowsing/SafeBrowsingAPIClient.py	Mon Jul 17 19:58:37 2017 +0200
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2017 Detlev Offenbach <detlev@die-offenbachs.de>
+#
+
+"""
+Module implementing the low level interface for Google Safe Browsing.
+"""
+
+from __future__ import unicode_literals
+try:
+    str = unicode       # __IGNORE_EXCEPTION__
+except NameError:
+    pass
+
+import json
+import random
+
+from PyQt5.QtCore import pyqtSlot, pyqtSignal, QObject, QDateTime, QTimer, \
+    QUrl
+from PyQt5.QtNetwork import QNetworkRequest, QNetworkReply
+
+from WebBrowser.WebBrowserWindow import WebBrowserWindow
+
+
+class SafeBrowsingAPIClient(QObject):
+    """
+    Class implementing the low level interface for Google Safe Browsing.
+    """
+    ClientId = "eric6_API_client"
+    ClientVersion = "1.0.0"
+    
+    GsbUrlTemplate = "https://safebrowsing.googleapis.com/v4/{0}?key={1}"
+    
+    networkError = pyqtSignal(str)
+    threatLists = pyqtSignal(list)
+    
+    # threatListUpdates:fetch   Content-Type: application/json      POST
+    # fullHashes:find           Content-Type: application/json      POST
+    
+    def __init__(self, apiKey, fairUse=True, parent=None):
+        """
+        Constructor
+        
+        @param apiKey API key to be used
+        @type str
+        @param fairUse flag indicating to follow the fair use policy
+        @type bool
+        @param parent reference to the parent object
+        @type QObject
+        """
+        self.__apiKey = apiKey
+        self.__fairUse = fairUse
+        
+        self.__nextRequestNoSoonerThan = QDateTime()
+        self.__replies = []
+        self.__failCount = 0
+    
+    def getThreatLists(self):
+        """
+        Public method to retrieve all available threat lists.
+        
+        @return threat lists
+        @rtype list of dictionaries
+        """
+        url = QUrl(self.GsbUrlTemplate.format("threatLists", self.__apiKey))
+        req = QNetworkRequest(url)
+        reply = WebBrowserWindow.networkManager().get(req)
+        reply.finished.connect(self.__threatListsReceived)
+    
+    @pyqtSlot()
+    def __threatListsReceived(self):
+        """
+        Private slot handling the threat lists.
+        """
+        reply = self.sender()
+        result, hasError = self.__extractData(reply)
+        if hasError:
+            # reschedule
+            self.networkError.emit(reply.errorString())
+            self.__reschedule(reply.error(), self.getThreatLists)
+        else:
+            self.__setWaitDuration(result.get("minimumWaitDuration"))
+            self.threatLists.emit(result["threatLists"])
+            self.__failCount = 0
+        
+        if reply in self.__replies:
+            self.__replies.remove(reply)
+        reply.deleteLater()
+    
+    def __extractData(self, reply):
+        """
+        Private method to extract the data of a network reply.
+        
+        @param reply reference to the network reply object
+        @type QNetworkReply
+        @return tuple containing the extracted data and an error flag
+        @type tuple of (list or dict, bool)
+        """
+        if reply.error() != QNetworkReply.NoError:
+            return None, True
+        
+        result = json.loads(str(reply.readAll(), "utf-8"))
+        return result, False
+    
+    def __setWaitDuration(self, minimumWaitDuration):
+        """
+        Private method to set the minimum wait duration.
+        
+        @param minimumWaitDuration duration to be set
+        @type str
+        """
+        if not self.__fairUse or minimumWaitDuration is None:
+            self.__nextRequestNoSoonerThan = QDateTime()
+        else:
+            waitDuration = int(minimumWaitDuration.rstrip("s"))
+            self.__nextRequestNoSoonerThan = \
+                QDateTime.currentDateTime().addSecs(waitDuration)
+    
+    def __reschedule(self, errorCode, func):
+        """
+        Private method to reschedule an API access.
+        
+        @param errorCode error code returned by the function to be rescheduled
+        @type int
+        @param func function to be rescheduled
+        @type func
+        """
+        if errorCode >= 500:
+            return
+        
+        self.__failCount += 1
+        waitDuration = min(
+            int(2 ** (self.__failCount - 1) * 15 * 60 * (1 + random.random())),
+            24 * 60 * 60)
+        QTimer.singleShot(waitDuration * 1000, func)
--- a/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py	Sun Jul 16 19:34:54 2017 +0200
+++ b/WebBrowser/SafeBrowsing/SafeBrowsingUrl.py	Mon Jul 17 19:58:37 2017 +0200
@@ -20,6 +20,7 @@
 import posixpath
 import socket
 import struct
+import hashlib
 
 import Preferences
 
@@ -47,7 +48,7 @@
         in canonical form.
         
         @return generator for the URL hashes
-        @rtype generator of str
+        @rtype generator of str (Python2) or bytes (Python3)
         """
         for variant in self.permutations(self.canonical()):
             urlHash = self.digest(variant)
@@ -132,3 +133,79 @@
         if query is not None:
             canonicalUrl = '{0}?{1}'.format(canonicalUrl, query)
         return canonicalUrl
+    
+    @staticmethod
+    def permutations(url):
+        """
+        Static method to determine all permutations of host name and path
+        which can be applied to blacklisted URLs.
+        
+        @param url URL string to be permuted
+        @type str
+        @return generator of permuted URL strings
+        @type generator of str
+        """
+        def hostPermutations(host):
+            """
+            Method to generate the permutations of the host name.
+            
+            @param host host name
+            @type str
+            @return generator of permuted host names
+            @rtype generator of str
+            """
+            if re.match(r'\d+\.\d+\.\d+\.\d+', host):
+                yield host
+                return
+            parts = host.split('.')
+            l = min(len(parts), 5)
+            if l > 4:
+                yield host
+            for i in range(l - 1):
+                yield '.'.join(parts[i - l:])
+        
+        def pathPermutations(path):
+            """
+            Method to generate the permutations of the path.
+            
+            @param path path to be processed
+            @type str
+            @return generator of permuted paths
+            @rtype generator of str
+            """
+            yield path
+            query = None
+            if '?' in path:
+                path, query =  path.split('?', 1)
+            if query is not None:
+                yield path
+            pathParts = path.split('/')[0:-1]
+            curPath = ''
+            for i in range(min(4, len(pathParts))):
+                curPath = curPath + pathParts[i] + '/'
+                yield curPath
+        
+        protocol, addressStr = urllib.splittype(url)
+        host, path = urllib.splithost(addressStr)
+        user, host = urllib.splituser(host)
+        host, port = urllib.splitport(host)
+        host = host.strip('/')
+        seenPermutations = set()
+        for h in hostPermutations(host):
+            for p in pathPermutations(path):
+                u = '{0}{1}'.format(h, p)
+                if u not in seenPermutations:
+                    yield u
+                    seenPermutations.add(u)
+
+    @staticmethod
+    def digest(url):
+        """
+        Static method to calculate the SHA256 digest of an URL string.
+        
+        @param url URL string
+        @type str
+        @return SHA256 digest of the URL string
+        @rtype str (Python2) or bytes (Python3)
+        """
+        return hashlib.sha256(url.encode('utf-8')).digest()
--- a/eric6.e4p	Sun Jul 16 19:34:54 2017 +0200
+++ b/eric6.e4p	Mon Jul 17 19:58:37 2017 +0200
@@ -1404,6 +1404,7 @@
     <Source>WebBrowser/QtHelp/QtHelpDocumentationSelectionDialog.py</Source>
     <Source>WebBrowser/QtHelp/QtHelpFiltersDialog.py</Source>
     <Source>WebBrowser/QtHelp/__init__.py</Source>
+    <Source>WebBrowser/SafeBrowsing/SafeBrowsingAPIClient.py</Source>
     <Source>WebBrowser/SafeBrowsing/SafeBrowsingUrl.py</Source>
     <Source>WebBrowser/SafeBrowsing/__init__.py</Source>
     <Source>WebBrowser/SearchWidget.py</Source>
@@ -1999,14 +2000,14 @@
   <Interfaces/>
   <Others>
     <Other>.hgignore</Other>
-    <Other>APIs/Python/zope-2.10.7.api</Other>
-    <Other>APIs/Python/zope-2.11.2.api</Other>
-    <Other>APIs/Python/zope-3.3.1.api</Other>
     <Other>APIs/Python3/PyQt4.bas</Other>
     <Other>APIs/Python3/PyQt5.bas</Other>
     <Other>APIs/Python3/QScintilla2.bas</Other>
     <Other>APIs/Python3/eric6.api</Other>
     <Other>APIs/Python3/eric6.bas</Other>
+    <Other>APIs/Python/zope-2.10.7.api</Other>
+    <Other>APIs/Python/zope-2.11.2.api</Other>
+    <Other>APIs/Python/zope-3.3.1.api</Other>
     <Other>APIs/QSS/qss.api</Other>
     <Other>APIs/Ruby/Ruby-1.8.7.api</Other>
     <Other>APIs/Ruby/Ruby-1.8.7.bas</Other>

eric ide

mercurial