24 # License along with this library; if not, write to the Free Software |
24 # License along with this library; if not, write to the Free Software |
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
26 # 02110-1301 USA |
26 # 02110-1301 USA |
27 ######################### END LICENSE BLOCK ######################### |
27 ######################### END LICENSE BLOCK ######################### |
28 |
28 |
29 import constants, sys |
29 from . import constants |
30 from charsetprober import CharSetProber |
30 import sys |
|
31 from .charsetprober import CharSetProber |
31 |
32 |
32 SAMPLE_SIZE = 64 |
33 SAMPLE_SIZE = 64 |
33 SB_ENOUGH_REL_THRESHOLD = 1024 |
34 SB_ENOUGH_REL_THRESHOLD = 1024 |
34 POSITIVE_SHORTCUT_THRESHOLD = 0.95 |
35 POSITIVE_SHORTCUT_THRESHOLD = 0.95 |
35 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 |
36 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 |
37 NUMBER_OF_SEQ_CAT = 4 |
38 NUMBER_OF_SEQ_CAT = 4 |
38 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 |
39 POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 |
39 #NEGATIVE_CAT = 0 |
40 #NEGATIVE_CAT = 0 |
40 |
41 |
41 class SingleByteCharSetProber(CharSetProber): |
42 class SingleByteCharSetProber(CharSetProber): |
42 def __init__(self, model, reversed=constants.False, nameProber=None): |
43 def __init__(self, model, reversed=False, nameProber=None): |
43 CharSetProber.__init__(self) |
44 CharSetProber.__init__(self) |
44 self._mModel = model |
45 self._mModel = model |
45 self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup |
46 self._mReversed = reversed # TRUE if we need to reverse every pair in the model lookup |
46 self._mNameProber = nameProber # Optional auxiliary prober for name decision |
47 self._mNameProber = nameProber # Optional auxiliary prober for name decision |
47 self.reset() |
48 self.reset() |
65 aBuf = self.filter_without_english_letters(aBuf) |
66 aBuf = self.filter_without_english_letters(aBuf) |
66 aLen = len(aBuf) |
67 aLen = len(aBuf) |
67 if not aLen: |
68 if not aLen: |
68 return self.get_state() |
69 return self.get_state() |
69 for c in aBuf: |
70 for c in aBuf: |
70 order = self._mModel['charToOrderMap'][ord(c)] |
71 order = self._mModel['charToOrderMap'][c] |
71 if order < SYMBOL_CAT_ORDER: |
72 if order < SYMBOL_CAT_ORDER: |
72 self._mTotalChar += 1 |
73 self._mTotalChar += 1 |
73 if order < SAMPLE_SIZE: |
74 if order < SAMPLE_SIZE: |
74 self._mFreqChar += 1 |
75 self._mFreqChar += 1 |
75 if self._mLastOrder < SAMPLE_SIZE: |
76 if self._mLastOrder < SAMPLE_SIZE: |