31 from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, |
31 from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, |
32 Latin5CyrillicModel, MacCyrillicModel, |
32 Latin5CyrillicModel, MacCyrillicModel, |
33 Ibm866Model, Ibm855Model) |
33 Ibm866Model, Ibm855Model) |
34 from .langgreekmodel import Latin7GreekModel, Win1253GreekModel |
34 from .langgreekmodel import Latin7GreekModel, Win1253GreekModel |
35 from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel |
35 from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel |
36 from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel |
36 # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel |
37 from .langthaimodel import TIS620ThaiModel |
37 from .langthaimodel import TIS620ThaiModel |
38 from .langhebrewmodel import Win1255HebrewModel |
38 from .langhebrewmodel import Win1255HebrewModel |
39 from .hebrewprober import HebrewProber |
39 from .hebrewprober import HebrewProber |
|
40 from .langturkishmodel import Latin5TurkishModel |
40 |
41 |
41 |
42 |
42 class SBCSGroupProber(CharSetGroupProber): |
43 class SBCSGroupProber(CharSetGroupProber): |
43 def __init__(self): |
44 def __init__(self): |
44 CharSetGroupProber.__init__(self) |
45 super(SBCSGroupProber, self).__init__() |
45 self._mProbers = [ |
46 self.probers = [ |
46 SingleByteCharSetProber(Win1251CyrillicModel), |
47 SingleByteCharSetProber(Win1251CyrillicModel), |
47 SingleByteCharSetProber(Koi8rModel), |
48 SingleByteCharSetProber(Koi8rModel), |
48 SingleByteCharSetProber(Latin5CyrillicModel), |
49 SingleByteCharSetProber(Latin5CyrillicModel), |
49 SingleByteCharSetProber(MacCyrillicModel), |
50 SingleByteCharSetProber(MacCyrillicModel), |
50 SingleByteCharSetProber(Ibm866Model), |
51 SingleByteCharSetProber(Ibm866Model), |
51 SingleByteCharSetProber(Ibm855Model), |
52 SingleByteCharSetProber(Ibm855Model), |
52 SingleByteCharSetProber(Latin7GreekModel), |
53 SingleByteCharSetProber(Latin7GreekModel), |
53 SingleByteCharSetProber(Win1253GreekModel), |
54 SingleByteCharSetProber(Win1253GreekModel), |
54 SingleByteCharSetProber(Latin5BulgarianModel), |
55 SingleByteCharSetProber(Latin5BulgarianModel), |
55 SingleByteCharSetProber(Win1251BulgarianModel), |
56 SingleByteCharSetProber(Win1251BulgarianModel), |
56 SingleByteCharSetProber(Latin2HungarianModel), |
57 # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) |
57 SingleByteCharSetProber(Win1250HungarianModel), |
58 # after we retrain model. |
|
59 # SingleByteCharSetProber(Latin2HungarianModel), |
|
60 # SingleByteCharSetProber(Win1250HungarianModel), |
58 SingleByteCharSetProber(TIS620ThaiModel), |
61 SingleByteCharSetProber(TIS620ThaiModel), |
|
62 SingleByteCharSetProber(Latin5TurkishModel), |
59 ] |
63 ] |
60 hebrewProber = HebrewProber() |
64 hebrew_prober = HebrewProber() |
61 logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, |
65 logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, |
62 False, hebrewProber) |
66 False, hebrew_prober) |
63 visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, |
67 visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, |
64 hebrewProber) |
68 hebrew_prober) |
65 hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) |
69 hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) |
66 self._mProbers.extend([hebrewProber, logicalHebrewProber, |
70 self.probers.extend([hebrew_prober, logical_hebrew_prober, |
67 visualHebrewProber]) |
71 visual_hebrew_prober]) |
68 |
72 |
69 self.reset() |
73 self.reset() |