eric7/EricNetwork/EricTldExtractor.py

branch
eric7
changeset 9162
8b75b1668583
parent 9152
8a68afaf1ba2
--- a/eric7/EricNetwork/EricTldExtractor.py	Mon Jun 20 13:25:14 2022 +0200
+++ b/eric7/EricNetwork/EricTldExtractor.py	Mon Jun 20 19:47:39 2022 +0200
@@ -14,9 +14,8 @@
 
 import collections
 import os
-import re
 
-from PyQt6.QtCore import QObject, QUrl, QFile, QIODevice, qWarning
+from PyQt6.QtCore import QObject, QUrl, qWarning
 
 from EricWidgets import EricMessageBox
 
@@ -318,47 +317,44 @@
         # start with a fresh dictionary
         self.__tldDict = collections.defaultdict(list)
         
-        file = QFile(dataFile)
-        
-        if not file.open(QIODevice.OpenModeFlag.ReadOnly |
-                         QIODevice.OpenModeFlag.Text):
-            return False
-        
         seekToEndOfPrivateDomains = False
         
-        while not file.atEnd():
-            line = bytes(file.readLine()).decode("utf-8").strip()
-            if not line:
-                continue
-            
-            if line.startswith("."):
-                line = line[1:]
-            
-            if line.startswith("//"):
-                if "===END PRIVATE DOMAINS===" in line:
-                    seekToEndOfPrivateDomains = False
+        try:
+            with open(dataFile, "r", encoding="utf-8") as f:
+                for line in f.readlines():
+                    if not line:
+                        continue
+                    
+                    if line.startswith("."):
+                        line = line[1:]
+                    
+                    if line.startswith("//"):
+                        if "===END PRIVATE DOMAINS===" in line:
+                            seekToEndOfPrivateDomains = False
+                        
+                        if (
+                            not loadPrivateDomains and
+                            "===BEGIN PRIVATE DOMAINS===" in line
+                        ):
+                            seekToEndOfPrivateDomains = True
+                        
+                        continue
+                    
+                    if seekToEndOfPrivateDomains:
+                        continue
+                    
+                    # only data up to the first whitespace is used
+                    line = line.split(None, 1)[0]
+                    
+                    if "." not in line:
+                        self.__tldDict[line].append(line)
+                    else:
+                        key = line[line.rfind(".") + 1:]
+                        self.__tldDict[key].append(line)
                 
-                if (
-                    not loadPrivateDomains and
-                    "===BEGIN PRIVATE DOMAINS===" in line
-                ):
-                    seekToEndOfPrivateDomains = True
-                
-                continue
-            
-            if seekToEndOfPrivateDomains:
-                continue
-            
-            # only data up to the first whitespace is used
-            line = line.split(None, 1)[0]
-            
-            if "." not in line:
-                self.__tldDict[line].append(line)
-            else:
-                key = line[line.rfind(".") + 1:]
-                self.__tldDict[key].append(line)
-        
-        return self.isDataLoaded()
+                return self.isDataLoaded()
+        except OSError:
+            return False
     
     def __domainHelper(self, host, tldPart):
         """
@@ -432,107 +428,6 @@
         @rtype str
         """
         return host.lower()
-    
-    #################################################################
-    ## Methods below are for testing purposes
-    #################################################################
-    
-    def test(self):
-        """
-        Public method to execute the tests.
-        
-        @return flag indicating the test result
-        @rtype bool
-        """
-        self.__withPrivate = True
-        self.__loadData()
-        if not self.__tldDict:
-            return False
-        
-        testDataFileName = ""
-        testDataFileExist = False
-        
-        for path in self.__dataSearchPaths:
-            testDataFileName = os.path.abspath(
-                os.path.join(path, "test_psl.txt")
-            )
-            if os.path.exists(testDataFileName):
-                testDataFileExist = True
-                break
-        
-        if not testDataFileExist:
-            testFileDownloadLink = (
-                "http://mxr.mozilla.org/mozilla-central/source/netwerk/test/"
-                "unit/data/test_psl.txt?raw=1"
-            )
-            EricMessageBox.information(
-                None,
-                self.tr("TLD Data File not found"),
-                self.tr("""<p>The file 'test_psl.txt' was not found!"""
-                        """<br/>You can download it from '<a href="{0}">"""
-                        """<b>here</b></a>' to one of the following"""
-                        """ paths:</p><ul>{1}</ul>""").format(
-                    testFileDownloadLink,
-                    "".join(["<li>{0}</li>".format(p)
-                             for p in self.__dataSearchPaths]))
-            )
-            return False
-        
-        file = QFile(testDataFileName)
-        
-        if not file.open(QIODevice.OpenModeFlag.ReadOnly |
-                         QIODevice.OpenModeFlag.Text):
-            return False
-        
-        testRegExp = re.compile(
-            "checkPublicSuffix\\(('([^']+)'|null), ('([^']+)'|null)\\);")
-        allTestSuccess = True
-        
-        while not file.atEnd():
-            line = bytes(file.readLine()).decode("utf-8").strip()
-            if not line or line.startswith("//"):
-                continue
-            
-            match = testRegExp.search(line)
-            if match is None:
-                allTestSuccess = False
-            else:
-                hostName, registrableName = match.group(2, 4)
-                
-                if not self.__checkPublicSuffix(hostName, registrableName):
-                    allTestSuccess = False
-        
-        if allTestSuccess:
-            qWarning("EricTldExtractor: Test passed successfully.")
-        else:
-            qWarning("EricTldExtractor: Test finished with some errors!")
-        
-        # reset the TLD dictionary
-        self.__tldDict = collections.defaultdict(list)
-        
-        return allTestSuccess
-    
-    def __checkPublicSuffix(self, host, registrableName):
-        """
-        Private method to test a host name against a registrable name.
-        
-        @param host host name to test
-        @type str
-        @param registrableName registrable domain name to test against
-        @type str
-        @return flag indicating the check result
-        @rtype bool
-        """
-        regName = self.registrableDomain(host)
-        if regName != registrableName:
-            qWarning(
-                "EricTldExtractor Test Error: hostName: {0}\n"
-                "    Correct registrableName:    {1}\n"
-                "    Calculated registrableName: {2}".format(
-                    host, registrableName, regName))
-            return False
-        
-        return True
 
 
 _TLDExtractor = None

eric ide

mercurial