ThirdParty/CharDet/chardet/codingstatemachine.py

changeset 3537
7662053c3906
parent 12
1d8dd9706f46
child 5714
90c57b50600f
equal deleted inserted replaced
3536:c06338ca892b 3537:7662053c3906
11 # 11 #
12 # This library is free software; you can redistribute it and/or 12 # This library is free software; you can redistribute it and/or
13 # modify it under the terms of the GNU Lesser General Public 13 # modify it under the terms of the GNU Lesser General Public
14 # License as published by the Free Software Foundation; either 14 # License as published by the Free Software Foundation; either
15 # version 2.1 of the License, or (at your option) any later version. 15 # version 2.1 of the License, or (at your option) any later version.
16 # 16 #
17 # This library is distributed in the hope that it will be useful, 17 # This library is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # Lesser General Public License for more details. 20 # Lesser General Public License for more details.
21 # 21 #
22 # You should have received a copy of the GNU Lesser General Public 22 # You should have received a copy of the GNU Lesser General Public
23 # License along with this library; if not, write to the Free Software 23 # License along with this library; if not, write to the Free Software
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 # 02110-1301 USA 25 # 02110-1301 USA
26 ######################### END LICENSE BLOCK ######################### 26 ######################### END LICENSE BLOCK #########################
27 27
28 from .constants import eStart, eError, eItsMe 28 from .constants import eStart
29 from .compat import wrap_ord
30
29 31
30 class CodingStateMachine: 32 class CodingStateMachine:
31 def __init__(self, sm): 33 def __init__(self, sm):
32 self._mModel = sm 34 self._mModel = sm
33 self._mCurrentBytePos = 0 35 self._mCurrentBytePos = 0
39 41
40 def next_state(self, c): 42 def next_state(self, c):
41 # for each byte we get its class 43 # for each byte we get its class
42 # if it is first byte, we also get byte length 44 # if it is first byte, we also get byte length
43 # PY3K: aBuf is a byte stream, so c is an int, not a byte 45 # PY3K: aBuf is a byte stream, so c is an int, not a byte
44 byteCls = self._mModel['classTable'][c] 46 byteCls = self._mModel['classTable'][wrap_ord(c)]
45 if self._mCurrentState == eStart: 47 if self._mCurrentState == eStart:
46 self._mCurrentBytePos = 0 48 self._mCurrentBytePos = 0
47 self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 49 self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
48 # from byte's class and stateTable, we get its next state 50 # from byte's class and stateTable, we get its next state
49 self._mCurrentState = self._mModel['stateTable'][self._mCurrentState * self._mModel['classFactor'] + byteCls] 51 curr_state = (self._mCurrentState * self._mModel['classFactor']
52 + byteCls)
53 self._mCurrentState = self._mModel['stateTable'][curr_state]
50 self._mCurrentBytePos += 1 54 self._mCurrentBytePos += 1
51 return self._mCurrentState 55 return self._mCurrentState
52 56
53 def get_current_charlen(self): 57 def get_current_charlen(self):
54 return self._mCurrentCharLen 58 return self._mCurrentCharLen

eric ide

mercurial