--- a/ThirdParty/CharDet/chardet/latin1prober.py Tue Apr 25 18:36:38 2017 +0200 +++ b/ThirdParty/CharDet/chardet/latin1prober.py Tue Apr 25 18:40:46 2017 +0200 @@ -27,8 +27,7 @@ ######################### END LICENSE BLOCK ######################### from .charsetprober import CharSetProber -from .constants import eNotMe -from .compat import wrap_ord +from .enums import ProbingState FREQ_CAT_NUM = 4 @@ -82,7 +81,7 @@ # 2 : normal # 3 : very likely Latin1ClassModel = ( - # UDF OTH ASC ASS ACV ACO ASV ASO +# UDF OTH ASC ASS ACV ACO ASV ASO 0, 0, 0, 0, 0, 0, 0, 0, # UDF 0, 3, 3, 3, 3, 3, 3, 3, # OTH 0, 3, 3, 3, 3, 3, 3, 3, # ASC @@ -96,40 +95,47 @@ class Latin1Prober(CharSetProber): def __init__(self): - CharSetProber.__init__(self) + super(Latin1Prober, self).__init__() + self._last_char_class = None + self._freq_counter = None self.reset() def reset(self): - self._mLastCharClass = OTH - self._mFreqCounter = [0] * FREQ_CAT_NUM + self._last_char_class = OTH + self._freq_counter = [0] * FREQ_CAT_NUM CharSetProber.reset(self) - def get_charset_name(self): - return "windows-1252" + @property + def charset_name(self): + return "ISO-8859-1" + + @property + def language(self): + return "" - def feed(self, aBuf): - aBuf = self.filter_with_english_letters(aBuf) - for c in aBuf: - charClass = Latin1_CharToClass[wrap_ord(c)] - freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) - + charClass] + def feed(self, byte_str): + byte_str = self.filter_with_english_letters(byte_str) + for c in byte_str: + char_class = Latin1_CharToClass[c] + freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + + char_class] if freq == 0: - self._mState = eNotMe + self._state = ProbingState.NOT_ME break - self._mFreqCounter[freq] += 1 - self._mLastCharClass = charClass + self._freq_counter[freq] += 1 + self._last_char_class = char_class - return self.get_state() + return self.state def get_confidence(self): - if self.get_state() == eNotMe: + if self.state == ProbingState.NOT_ME: return 0.01 - total = sum(self._mFreqCounter) + total = sum(self._freq_counter) if total < 0.01: confidence = 0.0 else: - confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) + confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) / total) if confidence < 0.0: confidence = 0.0