ThirdParty/CharDet/chardet/escprober.py

changeset 12
1d8dd9706f46
parent 0
de9c2efb9d02
child 3537
7662053c3906
equal deleted inserted replaced
11:b0996e4a289e 12:1d8dd9706f46
23 # License along with this library; if not, write to the Free Software 23 # License along with this library; if not, write to the Free Software
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 # 02110-1301 USA 25 # 02110-1301 USA
26 ######################### END LICENSE BLOCK ######################### 26 ######################### END LICENSE BLOCK #########################
27 27
28 import constants, sys 28 from . import constants
29 from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel 29 import sys
30 from charsetprober import CharSetProber 30 from .escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel
31 from codingstatemachine import CodingStateMachine 31 from .charsetprober import CharSetProber
32 from .codingstatemachine import CodingStateMachine
32 33
33 class EscCharSetProber(CharSetProber): 34 class EscCharSetProber(CharSetProber):
34 def __init__(self): 35 def __init__(self):
35 CharSetProber.__init__(self) 36 CharSetProber.__init__(self)
36 self._mCodingSM = [ \ 37 self._mCodingSM = [ \
43 44
44 def reset(self): 45 def reset(self):
45 CharSetProber.reset(self) 46 CharSetProber.reset(self)
46 for codingSM in self._mCodingSM: 47 for codingSM in self._mCodingSM:
47 if not codingSM: continue 48 if not codingSM: continue
48 codingSM.active = constants.True 49 codingSM.active = True
49 codingSM.reset() 50 codingSM.reset()
50 self._mActiveSM = len(self._mCodingSM) 51 self._mActiveSM = len(self._mCodingSM)
51 self._mDetectedCharset = None 52 self._mDetectedCharset = None
52 53
53 def get_charset_name(self): 54 def get_charset_name(self):
59 else: 60 else:
60 return 0.00 61 return 0.00
61 62
62 def feed(self, aBuf): 63 def feed(self, aBuf):
63 for c in aBuf: 64 for c in aBuf:
65 # PY3K: aBuf is a byte array, so c is an int, not a byte
64 for codingSM in self._mCodingSM: 66 for codingSM in self._mCodingSM:
65 if not codingSM: continue 67 if not codingSM: continue
66 if not codingSM.active: continue 68 if not codingSM.active: continue
67 codingState = codingSM.next_state(c) 69 codingState = codingSM.next_state(c)
68 if codingState == constants.eError: 70 if codingState == constants.eError:
69 codingSM.active = constants.False 71 codingSM.active = False
70 self._mActiveSM -= 1 72 self._mActiveSM -= 1
71 if self._mActiveSM <= 0: 73 if self._mActiveSM <= 0:
72 self._mState = constants.eNotMe 74 self._mState = constants.eNotMe
73 return self.get_state() 75 return self.get_state()
74 elif codingState == constants.eItsMe: 76 elif codingState == constants.eItsMe:

eric ide

mercurial