diff -r 6762afd9f963 -r 90c57b50600f ThirdParty/CharDet/chardet/utf8prober.py --- a/ThirdParty/CharDet/chardet/utf8prober.py Tue Apr 25 18:36:38 2017 +0200 +++ b/ThirdParty/CharDet/chardet/utf8prober.py Tue Apr 25 18:40:46 2017 +0200 @@ -25,52 +25,58 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from . import constants from .charsetprober import CharSetProber +from .enums import ProbingState, MachineState from .codingstatemachine import CodingStateMachine -from .mbcssm import UTF8SMModel +from .mbcssm import UTF8_SM_MODEL -ONE_CHAR_PROB = 0.5 class UTF8Prober(CharSetProber): + ONE_CHAR_PROB = 0.5 + def __init__(self): - CharSetProber.__init__(self) - self._mCodingSM = CodingStateMachine(UTF8SMModel) + super(UTF8Prober, self).__init__() + self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) + self._num_mb_chars = None self.reset() def reset(self): - CharSetProber.reset(self) - self._mCodingSM.reset() - self._mNumOfMBChar = 0 + super(UTF8Prober, self).reset() + self.coding_sm.reset() + self._num_mb_chars = 0 - def get_charset_name(self): + @property + def charset_name(self): return "utf-8" - def feed(self, aBuf): - for c in aBuf: - codingState = self._mCodingSM.next_state(c) - if codingState == constants.eError: - self._mState = constants.eNotMe - break - elif codingState == constants.eItsMe: - self._mState = constants.eFoundIt + @property + def language(self): + return "" + + def feed(self, byte_str): + for c in byte_str: + coding_state = self.coding_sm.next_state(c) + if coding_state == MachineState.ERROR: + self._state = ProbingState.NOT_ME break - elif codingState == constants.eStart: - if self._mCodingSM.get_current_charlen() >= 2: - self._mNumOfMBChar += 1 + elif coding_state == MachineState.ITS_ME: + self._state = ProbingState.FOUND_IT + break + elif coding_state == MachineState.START: + if self.coding_sm.get_current_charlen() >= 2: + self._num_mb_chars += 1 - if self.get_state() == constants.eDetecting: - if self.get_confidence() > constants.SHORTCUT_THRESHOLD: - self._mState = constants.eFoundIt + if self.state == ProbingState.DETECTING: + if self.get_confidence() > self.SHORTCUT_THRESHOLD: + self._state = ProbingState.FOUND_IT - return self.get_state() + return self.state def get_confidence(self): unlike = 0.99 - if self._mNumOfMBChar < 6: - for i in range(0, self._mNumOfMBChar): - unlike = unlike * ONE_CHAR_PROB + if self._num_mb_chars < 6: + unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars return 1.0 - unlike else: return unlike