ThirdParty/CharDet/chardet/sjisprober.py

changeset 5714
90c57b50600f
parent 5310
f2b774d78b4a
diff -r 6762afd9f963 -r 90c57b50600f ThirdParty/CharDet/chardet/sjisprober.py
--- a/ThirdParty/CharDet/chardet/sjisprober.py	Tue Apr 25 18:36:38 2017 +0200
+++ b/ThirdParty/CharDet/chardet/sjisprober.py	Tue Apr 25 18:40:46 2017 +0200
@@ -25,67 +25,68 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-import sys
 from .mbcharsetprober import MultiByteCharSetProber
 from .codingstatemachine import CodingStateMachine
 from .chardistribution import SJISDistributionAnalysis
 from .jpcntx import SJISContextAnalysis
-from .mbcssm import SJISSMModel
-from . import constants
+from .mbcssm import SJIS_SM_MODEL
+from .enums import ProbingState, MachineState
 
 
 class SJISProber(MultiByteCharSetProber):
     def __init__(self):
-        MultiByteCharSetProber.__init__(self)
-        self._mCodingSM = CodingStateMachine(SJISSMModel)
-        self._mDistributionAnalyzer = SJISDistributionAnalysis()
-        self._mContextAnalyzer = SJISContextAnalysis()
+        super(SJISProber, self).__init__()
+        self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
+        self.distribution_analyzer = SJISDistributionAnalysis()
+        self.context_analyzer = SJISContextAnalysis()
         self.reset()
 
     def reset(self):
-        MultiByteCharSetProber.reset(self)
-        self._mContextAnalyzer.reset()
+        super(SJISProber, self).reset()
+        self.context_analyzer.reset()
 
-    def get_charset_name(self):
-        return self._mContextAnalyzer.get_charset_name()
+    @property
+    def charset_name(self):
+        return self.context_analyzer.charset_name
+
+    @property
+    def language(self):
+        return "Japanese"
 
-    def feed(self, aBuf):
-        aLen = len(aBuf)
-        for i in range(0, aLen):
-            codingState = self._mCodingSM.next_state(aBuf[i])
-            if codingState == constants.eError:
-                if constants._debug:
-                    sys.stderr.write(self.get_charset_name()
-                                     + ' prober hit error at byte ' + str(i)
-                                     + '\n')
-                self._mState = constants.eNotMe
+    def feed(self, byte_str):
+        for i in range(len(byte_str)):
+            coding_state = self.coding_sm.next_state(byte_str[i])
+            if coding_state == MachineState.ERROR:
+                self.logger.debug('%s %s prober hit error at byte %s',
+                                  self.charset_name, self.language, i)
+                self._state = ProbingState.NOT_ME
                 break
-            elif codingState == constants.eItsMe:
-                self._mState = constants.eFoundIt
+            elif coding_state == MachineState.ITS_ME:
+                self._state = ProbingState.FOUND_IT
                 break
-            elif codingState == constants.eStart:
-                charLen = self._mCodingSM.get_current_charlen()
+            elif coding_state == MachineState.START:
+                char_len = self.coding_sm.get_current_charlen()
                 if i == 0:
-                    self._mLastChar[1] = aBuf[0]
-                    self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
-                                                charLen)
-                    self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
+                    self._last_char[1] = byte_str[0]
+                    self.context_analyzer.feed(self._last_char[2 - char_len:],
+                                               char_len)
+                    self.distribution_analyzer.feed(self._last_char, char_len)
                 else:
-                    self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
-                                                     - charLen], charLen)
-                    self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
-                                                     charLen)
+                    self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
+                                                        - char_len], char_len)
+                    self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
+                                                    char_len)
 
-        self._mLastChar[0] = aBuf[aLen - 1]
+        self._last_char[0] = byte_str[-1]
 
-        if self.get_state() == constants.eDetecting:
-            if (self._mContextAnalyzer.got_enough_data() and
-               (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
-                self._mState = constants.eFoundIt
+        if self.state == ProbingState.DETECTING:
+            if (self.context_analyzer.got_enough_data() and
+               (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
+                self._state = ProbingState.FOUND_IT
 
-        return self.get_state()
+        return self.state
 
     def get_confidence(self):
-        contxtCf = self._mContextAnalyzer.get_confidence()
-        distribCf = self._mDistributionAnalyzer.get_confidence()
-        return max(contxtCf, distribCf)
+        context_conf = self.context_analyzer.get_confidence()
+        distrib_conf = self.distribution_analyzer.get_confidence()
+        return max(context_conf, distrib_conf)

eric ide

mercurial