ThirdParty/CharDet/chardet/latin1prober.py

changeset 12
1d8dd9706f46
parent 0
de9c2efb9d02
child 3537
7662053c3906
equal deleted inserted replaced
11:b0996e4a289e 12:1d8dd9706f46
24 # License along with this library; if not, write to the Free Software 24 # License along with this library; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 # 02110-1301 USA 26 # 02110-1301 USA
27 ######################### END LICENSE BLOCK ######################### 27 ######################### END LICENSE BLOCK #########################
28 28
29 from charsetprober import CharSetProber 29 from .charsetprober import CharSetProber
30 import constants 30 from . import constants
31 import operator
32 31
33 FREQ_CAT_NUM = 4 32 FREQ_CAT_NUM = 4
34 33
35 UDF = 0 # undefined 34 UDF = 0 # undefined
36 OTH = 1 # other 35 OTH = 1 # other
107 return "windows-1252" 106 return "windows-1252"
108 107
109 def feed(self, aBuf): 108 def feed(self, aBuf):
110 aBuf = self.filter_with_english_letters(aBuf) 109 aBuf = self.filter_with_english_letters(aBuf)
111 for c in aBuf: 110 for c in aBuf:
112 charClass = Latin1_CharToClass[ord(c)] 111 charClass = Latin1_CharToClass[c]
113 freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass] 112 freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass]
114 if freq == 0: 113 if freq == 0:
115 self._mState = constants.eNotMe 114 self._mState = constants.eNotMe
116 break 115 break
117 self._mFreqCounter[freq] += 1 116 self._mFreqCounter[freq] += 1
121 120
122 def get_confidence(self): 121 def get_confidence(self):
123 if self.get_state() == constants.eNotMe: 122 if self.get_state() == constants.eNotMe:
124 return 0.01 123 return 0.01
125 124
126 total = reduce(operator.add, self._mFreqCounter) 125 total = sum(self._mFreqCounter)
127 if total < 0.01: 126 if total < 0.01:
128 confidence = 0.0 127 confidence = 0.0
129 else: 128 else:
130 confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total) 129 confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total)
131 if confidence < 0.0: 130 if confidence < 0.0:

eric ide

mercurial