ThirdParty/CharDet/chardet/latin1prober.py

changeset 5714
90c57b50600f
parent 5310
f2b774d78b4a
diff -r 6762afd9f963 -r 90c57b50600f ThirdParty/CharDet/chardet/latin1prober.py
--- a/ThirdParty/CharDet/chardet/latin1prober.py	Tue Apr 25 18:36:38 2017 +0200
+++ b/ThirdParty/CharDet/chardet/latin1prober.py	Tue Apr 25 18:40:46 2017 +0200
@@ -27,8 +27,7 @@
 ######################### END LICENSE BLOCK #########################
 
 from .charsetprober import CharSetProber
-from .constants import eNotMe
-from .compat import wrap_ord
+from .enums import ProbingState
 
 FREQ_CAT_NUM = 4
 
@@ -82,7 +81,7 @@
 # 2 : normal
 # 3 : very likely
 Latin1ClassModel = (
-    # UDF OTH ASC ASS ACV ACO ASV ASO
+# UDF OTH ASC ASS ACV ACO ASV ASO
     0,  0,  0,  0,  0,  0,  0,  0,  # UDF
     0,  3,  3,  3,  3,  3,  3,  3,  # OTH
     0,  3,  3,  3,  3,  3,  3,  3,  # ASC
@@ -96,40 +95,47 @@
 
 class Latin1Prober(CharSetProber):
     def __init__(self):
-        CharSetProber.__init__(self)
+        super(Latin1Prober, self).__init__()
+        self._last_char_class = None
+        self._freq_counter = None
         self.reset()
 
     def reset(self):
-        self._mLastCharClass = OTH
-        self._mFreqCounter = [0] * FREQ_CAT_NUM
+        self._last_char_class = OTH
+        self._freq_counter = [0] * FREQ_CAT_NUM
         CharSetProber.reset(self)
 
-    def get_charset_name(self):
-        return "windows-1252"
+    @property
+    def charset_name(self):
+        return "ISO-8859-1"
+
+    @property
+    def language(self):
+        return ""
 
-    def feed(self, aBuf):
-        aBuf = self.filter_with_english_letters(aBuf)
-        for c in aBuf:
-            charClass = Latin1_CharToClass[wrap_ord(c)]
-            freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
-                                    + charClass]
+    def feed(self, byte_str):
+        byte_str = self.filter_with_english_letters(byte_str)
+        for c in byte_str:
+            char_class = Latin1_CharToClass[c]
+            freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
+                                    + char_class]
             if freq == 0:
-                self._mState = eNotMe
+                self._state = ProbingState.NOT_ME
                 break
-            self._mFreqCounter[freq] += 1
-            self._mLastCharClass = charClass
+            self._freq_counter[freq] += 1
+            self._last_char_class = char_class
 
-        return self.get_state()
+        return self.state
 
     def get_confidence(self):
-        if self.get_state() == eNotMe:
+        if self.state == ProbingState.NOT_ME:
             return 0.01
 
-        total = sum(self._mFreqCounter)
+        total = sum(self._freq_counter)
         if total < 0.01:
             confidence = 0.0
         else:
-            confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
+            confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
                           / total)
         if confidence < 0.0:
             confidence = 0.0

eric ide

mercurial