ThirdParty/CharDet/chardet/sbcsgroupprober.py

changeset 5714
90c57b50600f
parent 3537
7662053c3906
equal deleted inserted replaced
5713:6762afd9f963 5714:90c57b50600f
31 from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 31 from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 Latin5CyrillicModel, MacCyrillicModel, 32 Latin5CyrillicModel, MacCyrillicModel,
33 Ibm866Model, Ibm855Model) 33 Ibm866Model, Ibm855Model)
34 from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 34 from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 35 from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 36 # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 from .langthaimodel import TIS620ThaiModel 37 from .langthaimodel import TIS620ThaiModel
38 from .langhebrewmodel import Win1255HebrewModel 38 from .langhebrewmodel import Win1255HebrewModel
39 from .hebrewprober import HebrewProber 39 from .hebrewprober import HebrewProber
40 from .langturkishmodel import Latin5TurkishModel
40 41
41 42
42 class SBCSGroupProber(CharSetGroupProber): 43 class SBCSGroupProber(CharSetGroupProber):
43 def __init__(self): 44 def __init__(self):
44 CharSetGroupProber.__init__(self) 45 super(SBCSGroupProber, self).__init__()
45 self._mProbers = [ 46 self.probers = [
46 SingleByteCharSetProber(Win1251CyrillicModel), 47 SingleByteCharSetProber(Win1251CyrillicModel),
47 SingleByteCharSetProber(Koi8rModel), 48 SingleByteCharSetProber(Koi8rModel),
48 SingleByteCharSetProber(Latin5CyrillicModel), 49 SingleByteCharSetProber(Latin5CyrillicModel),
49 SingleByteCharSetProber(MacCyrillicModel), 50 SingleByteCharSetProber(MacCyrillicModel),
50 SingleByteCharSetProber(Ibm866Model), 51 SingleByteCharSetProber(Ibm866Model),
51 SingleByteCharSetProber(Ibm855Model), 52 SingleByteCharSetProber(Ibm855Model),
52 SingleByteCharSetProber(Latin7GreekModel), 53 SingleByteCharSetProber(Latin7GreekModel),
53 SingleByteCharSetProber(Win1253GreekModel), 54 SingleByteCharSetProber(Win1253GreekModel),
54 SingleByteCharSetProber(Latin5BulgarianModel), 55 SingleByteCharSetProber(Latin5BulgarianModel),
55 SingleByteCharSetProber(Win1251BulgarianModel), 56 SingleByteCharSetProber(Win1251BulgarianModel),
56 SingleByteCharSetProber(Latin2HungarianModel), 57 # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
57 SingleByteCharSetProber(Win1250HungarianModel), 58 # after we retrain model.
59 # SingleByteCharSetProber(Latin2HungarianModel),
60 # SingleByteCharSetProber(Win1250HungarianModel),
58 SingleByteCharSetProber(TIS620ThaiModel), 61 SingleByteCharSetProber(TIS620ThaiModel),
62 SingleByteCharSetProber(Latin5TurkishModel),
59 ] 63 ]
60 hebrewProber = HebrewProber() 64 hebrew_prober = HebrewProber()
61 logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 65 logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
62 False, hebrewProber) 66 False, hebrew_prober)
63 visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 67 visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
64 hebrewProber) 68 hebrew_prober)
65 hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 69 hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
66 self._mProbers.extend([hebrewProber, logicalHebrewProber, 70 self.probers.extend([hebrew_prober, logical_hebrew_prober,
67 visualHebrewProber]) 71 visual_hebrew_prober])
68 72
69 self.reset() 73 self.reset()

eric ide

mercurial