ThirdParty/CharDet/chardet/langhungarianmodel.py

changeset 3537
7662053c3906
parent 12
1d8dd9706f46
child 5714
90c57b50600f
equal deleted inserted replaced
3536:c06338ca892b 3537:7662053c3906
11 # 11 #
12 # This library is free software; you can redistribute it and/or 12 # This library is free software; you can redistribute it and/or
13 # modify it under the terms of the GNU Lesser General Public 13 # modify it under the terms of the GNU Lesser General Public
14 # License as published by the Free Software Foundation; either 14 # License as published by the Free Software Foundation; either
15 # version 2.1 of the License, or (at your option) any later version. 15 # version 2.1 of the License, or (at your option) any later version.
16 # 16 #
17 # This library is distributed in the hope that it will be useful, 17 # This library is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # Lesser General Public License for more details. 20 # Lesser General Public License for more details.
21 # 21 #
22 # You should have received a copy of the GNU Lesser General Public 22 # You should have received a copy of the GNU Lesser General Public
23 # License along with this library; if not, write to the Free Software 23 # License along with this library; if not, write to the Free Software
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 # 02110-1301 USA 25 # 02110-1301 USA
26 ######################### END LICENSE BLOCK ######################### 26 ######################### END LICENSE BLOCK #########################
27 27
28 from . import constants
29
30 # 255: Control characters that usually does not exist in any text 28 # 255: Control characters that usually does not exist in any text
31 # 254: Carriage/Return 29 # 254: Carriage/Return
32 # 253: symbol (punctuation) that does not belong to word 30 # 253: symbol (punctuation) that does not belong to word
33 # 252: 0 - 9 31 # 252: 0 - 9
34 32
35 # Character Mapping Table: 33 # Character Mapping Table:
36 Latin2_HungarianCharToOrderMap = ( \ 34 Latin2_HungarianCharToOrderMap = (
37 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 35 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
38 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 36 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
39 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 37 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
40 252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 38 252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
41 253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, 39 253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
50 232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241, 48 232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
51 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85, 49 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
52 245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253, 50 245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
53 ) 51 )
54 52
55 win1250HungarianCharToOrderMap = ( \ 53 win1250HungarianCharToOrderMap = (
56 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00 54 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
57 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10 55 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, # 10
58 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20 56 253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, # 20
59 252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30 57 252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, # 30
60 253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47, 58 253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
69 232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241, 67 232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
70 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87, 68 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
71 245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253, 69 245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
72 ) 70 )
73 71
74 # Model Table: 72 # Model Table:
75 # total sequences: 100% 73 # total sequences: 100%
76 # first 512 sequences: 94.7368% 74 # first 512 sequences: 94.7368%
77 # first 1024 sequences:5.2623% 75 # first 1024 sequences:5.2623%
78 # rest sequences: 0.8894% 76 # rest sequences: 0.8894%
79 # negative sequences: 0.0009% 77 # negative sequences: 0.0009%
80 HungarianLangModel = ( \ 78 HungarianLangModel = (
81 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 79 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
82 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2, 80 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
83 3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1, 81 3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
84 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, 82 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
85 3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1, 83 3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
206 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 204 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
207 1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0, 205 1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
208 0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0, 206 0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
209 ) 207 )
210 208
211 Latin2HungarianModel = { \ 209 Latin2HungarianModel = {
212 'charToOrderMap': Latin2_HungarianCharToOrderMap, 210 'charToOrderMap': Latin2_HungarianCharToOrderMap,
213 'precedenceMatrix': HungarianLangModel, 211 'precedenceMatrix': HungarianLangModel,
214 'mTypicalPositiveRatio': 0.947368, 212 'mTypicalPositiveRatio': 0.947368,
215 'keepEnglishLetter': True, 213 'keepEnglishLetter': True,
216 'charsetName': "ISO-8859-2" 214 'charsetName': "ISO-8859-2"
217 } 215 }
218 216
219 Win1250HungarianModel = { \ 217 Win1250HungarianModel = {
220 'charToOrderMap': win1250HungarianCharToOrderMap, 218 'charToOrderMap': win1250HungarianCharToOrderMap,
221 'precedenceMatrix': HungarianLangModel, 219 'precedenceMatrix': HungarianLangModel,
222 'mTypicalPositiveRatio': 0.947368, 220 'mTypicalPositiveRatio': 0.947368,
223 'keepEnglishLetter': True, 221 'keepEnglishLetter': True,
224 'charsetName': "windows-1250" 222 'charsetName': "windows-1250"
225 } 223 }
224
225 # flake8: noqa

eric ide

mercurial