ThirdParty/CharDet/chardet/sbcharsetprober.py

changeset 12
1d8dd9706f46
parent 0
de9c2efb9d02
child 3537
7662053c3906
equal deleted inserted replaced
11:b0996e4a289e 12:1d8dd9706f46
24 # License along with this library; if not, write to the Free Software 24 # License along with this library; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 # 02110-1301 USA 26 # 02110-1301 USA
27 ######################### END LICENSE BLOCK ######################### 27 ######################### END LICENSE BLOCK #########################
28 28
29 import constants, sys 29 from . import constants
30 from charsetprober import CharSetProber 30 import sys
31 from .charsetprober import CharSetProber
31 32
32 SAMPLE_SIZE = 64 33 SAMPLE_SIZE = 64
33 SB_ENOUGH_REL_THRESHOLD = 1024 34 SB_ENOUGH_REL_THRESHOLD = 1024
34 POSITIVE_SHORTCUT_THRESHOLD = 0.95 35 POSITIVE_SHORTCUT_THRESHOLD = 0.95
35 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 36 NEGATIVE_SHORTCUT_THRESHOLD = 0.05
37 NUMBER_OF_SEQ_CAT = 4 38 NUMBER_OF_SEQ_CAT = 4
38 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 39 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
39 #NEGATIVE_CAT = 0 40 #NEGATIVE_CAT = 0
40 41
41 class SingleByteCharSetProber(CharSetProber): 42 class SingleByteCharSetProber(CharSetProber):
42 def __init__(self, model, reversed=constants.False, nameProber=None): 43 def __init__(self, model, reversed=False, nameProber=None):
43 CharSetProber.__init__(self) 44 CharSetProber.__init__(self)
44 self._mModel = model 45 self._mModel = model
45 self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup 46 self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup
46 self._mNameProber = nameProber # Optional auxiliary prober for name decision 47 self._mNameProber = nameProber # Optional auxiliary prober for name decision
47 self.reset() 48 self.reset()
65 aBuf = self.filter_without_english_letters(aBuf) 66 aBuf = self.filter_without_english_letters(aBuf)
66 aLen = len(aBuf) 67 aLen = len(aBuf)
67 if not aLen: 68 if not aLen:
68 return self.get_state() 69 return self.get_state()
69 for c in aBuf: 70 for c in aBuf:
70 order = self._mModel['charToOrderMap'][ord(c)] 71 order = self._mModel['charToOrderMap'][c]
71 if order < SYMBOL_CAT_ORDER: 72 if order < SYMBOL_CAT_ORDER:
72 self._mTotalChar += 1 73 self._mTotalChar += 1
73 if order < SAMPLE_SIZE: 74 if order < SAMPLE_SIZE:
74 self._mFreqChar += 1 75 self._mFreqChar += 1
75 if self._mLastOrder < SAMPLE_SIZE: 76 if self._mLastOrder < SAMPLE_SIZE:

eric ide

mercurial