|
1 ######################## BEGIN LICENSE BLOCK ######################## |
|
2 # The Original Code is mozilla.org code. |
|
3 # |
|
4 # The Initial Developer of the Original Code is |
|
5 # Netscape Communications Corporation. |
|
6 # Portions created by the Initial Developer are Copyright (C) 1998 |
|
7 # the Initial Developer. All Rights Reserved. |
|
8 # |
|
9 # Contributor(s): |
|
10 # Mark Pilgrim - port to Python |
|
11 # |
|
12 # This library is free software; you can redistribute it and/or |
|
13 # modify it under the terms of the GNU Lesser General Public |
|
14 # License as published by the Free Software Foundation; either |
|
15 # version 2.1 of the License, or (at your option) any later version. |
|
16 # |
|
17 # This library is distributed in the hope that it will be useful, |
|
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
20 # Lesser General Public License for more details. |
|
21 # |
|
22 # You should have received a copy of the GNU Lesser General Public |
|
23 # License along with this library; if not, write to the Free Software |
|
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|
25 # 02110-1301 USA |
|
26 ######################### END LICENSE BLOCK ######################### |
|
27 |
|
28 from constants import eStart, eError, eItsMe |
|
29 |
|
30 # BIG5 |
|
31 |
|
32 BIG5_cls = ( \ |
|
33 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value |
|
34 1,1,1,1,1,1,0,0, # 08 - 0f |
|
35 1,1,1,1,1,1,1,1, # 10 - 17 |
|
36 1,1,1,0,1,1,1,1, # 18 - 1f |
|
37 1,1,1,1,1,1,1,1, # 20 - 27 |
|
38 1,1,1,1,1,1,1,1, # 28 - 2f |
|
39 1,1,1,1,1,1,1,1, # 30 - 37 |
|
40 1,1,1,1,1,1,1,1, # 38 - 3f |
|
41 2,2,2,2,2,2,2,2, # 40 - 47 |
|
42 2,2,2,2,2,2,2,2, # 48 - 4f |
|
43 2,2,2,2,2,2,2,2, # 50 - 57 |
|
44 2,2,2,2,2,2,2,2, # 58 - 5f |
|
45 2,2,2,2,2,2,2,2, # 60 - 67 |
|
46 2,2,2,2,2,2,2,2, # 68 - 6f |
|
47 2,2,2,2,2,2,2,2, # 70 - 77 |
|
48 2,2,2,2,2,2,2,1, # 78 - 7f |
|
49 4,4,4,4,4,4,4,4, # 80 - 87 |
|
50 4,4,4,4,4,4,4,4, # 88 - 8f |
|
51 4,4,4,4,4,4,4,4, # 90 - 97 |
|
52 4,4,4,4,4,4,4,4, # 98 - 9f |
|
53 4,3,3,3,3,3,3,3, # a0 - a7 |
|
54 3,3,3,3,3,3,3,3, # a8 - af |
|
55 3,3,3,3,3,3,3,3, # b0 - b7 |
|
56 3,3,3,3,3,3,3,3, # b8 - bf |
|
57 3,3,3,3,3,3,3,3, # c0 - c7 |
|
58 3,3,3,3,3,3,3,3, # c8 - cf |
|
59 3,3,3,3,3,3,3,3, # d0 - d7 |
|
60 3,3,3,3,3,3,3,3, # d8 - df |
|
61 3,3,3,3,3,3,3,3, # e0 - e7 |
|
62 3,3,3,3,3,3,3,3, # e8 - ef |
|
63 3,3,3,3,3,3,3,3, # f0 - f7 |
|
64 3,3,3,3,3,3,3,0) # f8 - ff |
|
65 |
|
66 BIG5_st = ( \ |
|
67 eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 |
|
68 eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,#08-0f |
|
69 eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart)#10-17 |
|
70 |
|
71 Big5CharLenTable = (0, 1, 1, 2, 0) |
|
72 |
|
73 Big5SMModel = {'classTable': BIG5_cls, |
|
74 'classFactor': 5, |
|
75 'stateTable': BIG5_st, |
|
76 'charLenTable': Big5CharLenTable, |
|
77 'name': 'Big5'} |
|
78 |
|
79 # EUC-JP |
|
80 |
|
81 EUCJP_cls = ( \ |
|
82 4,4,4,4,4,4,4,4, # 00 - 07 |
|
83 4,4,4,4,4,4,5,5, # 08 - 0f |
|
84 4,4,4,4,4,4,4,4, # 10 - 17 |
|
85 4,4,4,5,4,4,4,4, # 18 - 1f |
|
86 4,4,4,4,4,4,4,4, # 20 - 27 |
|
87 4,4,4,4,4,4,4,4, # 28 - 2f |
|
88 4,4,4,4,4,4,4,4, # 30 - 37 |
|
89 4,4,4,4,4,4,4,4, # 38 - 3f |
|
90 4,4,4,4,4,4,4,4, # 40 - 47 |
|
91 4,4,4,4,4,4,4,4, # 48 - 4f |
|
92 4,4,4,4,4,4,4,4, # 50 - 57 |
|
93 4,4,4,4,4,4,4,4, # 58 - 5f |
|
94 4,4,4,4,4,4,4,4, # 60 - 67 |
|
95 4,4,4,4,4,4,4,4, # 68 - 6f |
|
96 4,4,4,4,4,4,4,4, # 70 - 77 |
|
97 4,4,4,4,4,4,4,4, # 78 - 7f |
|
98 5,5,5,5,5,5,5,5, # 80 - 87 |
|
99 5,5,5,5,5,5,1,3, # 88 - 8f |
|
100 5,5,5,5,5,5,5,5, # 90 - 97 |
|
101 5,5,5,5,5,5,5,5, # 98 - 9f |
|
102 5,2,2,2,2,2,2,2, # a0 - a7 |
|
103 2,2,2,2,2,2,2,2, # a8 - af |
|
104 2,2,2,2,2,2,2,2, # b0 - b7 |
|
105 2,2,2,2,2,2,2,2, # b8 - bf |
|
106 2,2,2,2,2,2,2,2, # c0 - c7 |
|
107 2,2,2,2,2,2,2,2, # c8 - cf |
|
108 2,2,2,2,2,2,2,2, # d0 - d7 |
|
109 2,2,2,2,2,2,2,2, # d8 - df |
|
110 0,0,0,0,0,0,0,0, # e0 - e7 |
|
111 0,0,0,0,0,0,0,0, # e8 - ef |
|
112 0,0,0,0,0,0,0,0, # f0 - f7 |
|
113 0,0,0,0,0,0,0,5) # f8 - ff |
|
114 |
|
115 EUCJP_st = ( \ |
|
116 3, 4, 3, 5,eStart,eError,eError,eError,#00-07 |
|
117 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |
|
118 eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError,#10-17 |
|
119 eError,eError,eStart,eError,eError,eError, 3,eError,#18-1f |
|
120 3,eError,eError,eError,eStart,eStart,eStart,eStart)#20-27 |
|
121 |
|
122 EUCJPCharLenTable = (2, 2, 2, 3, 1, 0) |
|
123 |
|
124 EUCJPSMModel = {'classTable': EUCJP_cls, |
|
125 'classFactor': 6, |
|
126 'stateTable': EUCJP_st, |
|
127 'charLenTable': EUCJPCharLenTable, |
|
128 'name': 'EUC-JP'} |
|
129 |
|
130 # EUC-KR |
|
131 |
|
132 EUCKR_cls = ( \ |
|
133 1,1,1,1,1,1,1,1, # 00 - 07 |
|
134 1,1,1,1,1,1,0,0, # 08 - 0f |
|
135 1,1,1,1,1,1,1,1, # 10 - 17 |
|
136 1,1,1,0,1,1,1,1, # 18 - 1f |
|
137 1,1,1,1,1,1,1,1, # 20 - 27 |
|
138 1,1,1,1,1,1,1,1, # 28 - 2f |
|
139 1,1,1,1,1,1,1,1, # 30 - 37 |
|
140 1,1,1,1,1,1,1,1, # 38 - 3f |
|
141 1,1,1,1,1,1,1,1, # 40 - 47 |
|
142 1,1,1,1,1,1,1,1, # 48 - 4f |
|
143 1,1,1,1,1,1,1,1, # 50 - 57 |
|
144 1,1,1,1,1,1,1,1, # 58 - 5f |
|
145 1,1,1,1,1,1,1,1, # 60 - 67 |
|
146 1,1,1,1,1,1,1,1, # 68 - 6f |
|
147 1,1,1,1,1,1,1,1, # 70 - 77 |
|
148 1,1,1,1,1,1,1,1, # 78 - 7f |
|
149 0,0,0,0,0,0,0,0, # 80 - 87 |
|
150 0,0,0,0,0,0,0,0, # 88 - 8f |
|
151 0,0,0,0,0,0,0,0, # 90 - 97 |
|
152 0,0,0,0,0,0,0,0, # 98 - 9f |
|
153 0,2,2,2,2,2,2,2, # a0 - a7 |
|
154 2,2,2,2,2,3,3,3, # a8 - af |
|
155 2,2,2,2,2,2,2,2, # b0 - b7 |
|
156 2,2,2,2,2,2,2,2, # b8 - bf |
|
157 2,2,2,2,2,2,2,2, # c0 - c7 |
|
158 2,3,2,2,2,2,2,2, # c8 - cf |
|
159 2,2,2,2,2,2,2,2, # d0 - d7 |
|
160 2,2,2,2,2,2,2,2, # d8 - df |
|
161 2,2,2,2,2,2,2,2, # e0 - e7 |
|
162 2,2,2,2,2,2,2,2, # e8 - ef |
|
163 2,2,2,2,2,2,2,2, # f0 - f7 |
|
164 2,2,2,2,2,2,2,0) # f8 - ff |
|
165 |
|
166 EUCKR_st = ( |
|
167 eError,eStart, 3,eError,eError,eError,eError,eError,#00-07 |
|
168 eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart)#08-0f |
|
169 |
|
170 EUCKRCharLenTable = (0, 1, 2, 0) |
|
171 |
|
172 EUCKRSMModel = {'classTable': EUCKR_cls, |
|
173 'classFactor': 4, |
|
174 'stateTable': EUCKR_st, |
|
175 'charLenTable': EUCKRCharLenTable, |
|
176 'name': 'EUC-KR'} |
|
177 |
|
178 # EUC-TW |
|
179 |
|
180 EUCTW_cls = ( \ |
|
181 2,2,2,2,2,2,2,2, # 00 - 07 |
|
182 2,2,2,2,2,2,0,0, # 08 - 0f |
|
183 2,2,2,2,2,2,2,2, # 10 - 17 |
|
184 2,2,2,0,2,2,2,2, # 18 - 1f |
|
185 2,2,2,2,2,2,2,2, # 20 - 27 |
|
186 2,2,2,2,2,2,2,2, # 28 - 2f |
|
187 2,2,2,2,2,2,2,2, # 30 - 37 |
|
188 2,2,2,2,2,2,2,2, # 38 - 3f |
|
189 2,2,2,2,2,2,2,2, # 40 - 47 |
|
190 2,2,2,2,2,2,2,2, # 48 - 4f |
|
191 2,2,2,2,2,2,2,2, # 50 - 57 |
|
192 2,2,2,2,2,2,2,2, # 58 - 5f |
|
193 2,2,2,2,2,2,2,2, # 60 - 67 |
|
194 2,2,2,2,2,2,2,2, # 68 - 6f |
|
195 2,2,2,2,2,2,2,2, # 70 - 77 |
|
196 2,2,2,2,2,2,2,2, # 78 - 7f |
|
197 0,0,0,0,0,0,0,0, # 80 - 87 |
|
198 0,0,0,0,0,0,6,0, # 88 - 8f |
|
199 0,0,0,0,0,0,0,0, # 90 - 97 |
|
200 0,0,0,0,0,0,0,0, # 98 - 9f |
|
201 0,3,4,4,4,4,4,4, # a0 - a7 |
|
202 5,5,1,1,1,1,1,1, # a8 - af |
|
203 1,1,1,1,1,1,1,1, # b0 - b7 |
|
204 1,1,1,1,1,1,1,1, # b8 - bf |
|
205 1,1,3,1,3,3,3,3, # c0 - c7 |
|
206 3,3,3,3,3,3,3,3, # c8 - cf |
|
207 3,3,3,3,3,3,3,3, # d0 - d7 |
|
208 3,3,3,3,3,3,3,3, # d8 - df |
|
209 3,3,3,3,3,3,3,3, # e0 - e7 |
|
210 3,3,3,3,3,3,3,3, # e8 - ef |
|
211 3,3,3,3,3,3,3,3, # f0 - f7 |
|
212 3,3,3,3,3,3,3,0) # f8 - ff |
|
213 |
|
214 EUCTW_st = ( \ |
|
215 eError,eError,eStart, 3, 3, 3, 4,eError,#00-07 |
|
216 eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f |
|
217 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError,#10-17 |
|
218 eStart,eStart,eStart,eError,eError,eError,eError,eError,#18-1f |
|
219 5,eError,eError,eError,eStart,eError,eStart,eStart,#20-27 |
|
220 eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f |
|
221 |
|
222 EUCTWCharLenTable = (0, 0, 1, 2, 2, 2, 3) |
|
223 |
|
224 EUCTWSMModel = {'classTable': EUCTW_cls, |
|
225 'classFactor': 7, |
|
226 'stateTable': EUCTW_st, |
|
227 'charLenTable': EUCTWCharLenTable, |
|
228 'name': 'x-euc-tw'} |
|
229 |
|
230 # GB2312 |
|
231 |
|
232 GB2312_cls = ( \ |
|
233 1,1,1,1,1,1,1,1, # 00 - 07 |
|
234 1,1,1,1,1,1,0,0, # 08 - 0f |
|
235 1,1,1,1,1,1,1,1, # 10 - 17 |
|
236 1,1,1,0,1,1,1,1, # 18 - 1f |
|
237 1,1,1,1,1,1,1,1, # 20 - 27 |
|
238 1,1,1,1,1,1,1,1, # 28 - 2f |
|
239 3,3,3,3,3,3,3,3, # 30 - 37 |
|
240 3,3,1,1,1,1,1,1, # 38 - 3f |
|
241 2,2,2,2,2,2,2,2, # 40 - 47 |
|
242 2,2,2,2,2,2,2,2, # 48 - 4f |
|
243 2,2,2,2,2,2,2,2, # 50 - 57 |
|
244 2,2,2,2,2,2,2,2, # 58 - 5f |
|
245 2,2,2,2,2,2,2,2, # 60 - 67 |
|
246 2,2,2,2,2,2,2,2, # 68 - 6f |
|
247 2,2,2,2,2,2,2,2, # 70 - 77 |
|
248 2,2,2,2,2,2,2,4, # 78 - 7f |
|
249 5,6,6,6,6,6,6,6, # 80 - 87 |
|
250 6,6,6,6,6,6,6,6, # 88 - 8f |
|
251 6,6,6,6,6,6,6,6, # 90 - 97 |
|
252 6,6,6,6,6,6,6,6, # 98 - 9f |
|
253 6,6,6,6,6,6,6,6, # a0 - a7 |
|
254 6,6,6,6,6,6,6,6, # a8 - af |
|
255 6,6,6,6,6,6,6,6, # b0 - b7 |
|
256 6,6,6,6,6,6,6,6, # b8 - bf |
|
257 6,6,6,6,6,6,6,6, # c0 - c7 |
|
258 6,6,6,6,6,6,6,6, # c8 - cf |
|
259 6,6,6,6,6,6,6,6, # d0 - d7 |
|
260 6,6,6,6,6,6,6,6, # d8 - df |
|
261 6,6,6,6,6,6,6,6, # e0 - e7 |
|
262 6,6,6,6,6,6,6,6, # e8 - ef |
|
263 6,6,6,6,6,6,6,6, # f0 - f7 |
|
264 6,6,6,6,6,6,6,0) # f8 - ff |
|
265 |
|
266 GB2312_st = ( \ |
|
267 eError,eStart,eStart,eStart,eStart,eStart, 3,eError,#00-07 |
|
268 eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,#08-0f |
|
269 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,#10-17 |
|
270 4,eError,eStart,eStart,eError,eError,eError,eError,#18-1f |
|
271 eError,eError, 5,eError,eError,eError,eItsMe,eError,#20-27 |
|
272 eError,eError,eStart,eStart,eStart,eStart,eStart,eStart)#28-2f |
|
273 |
|
274 # To be accurate, the length of class 6 can be either 2 or 4. |
|
275 # But it is not necessary to discriminate between the two since |
|
276 # it is used for frequency analysis only, and we are validing |
|
277 # each code range there as well. So it is safe to set it to be |
|
278 # 2 here. |
|
279 GB2312CharLenTable = (0, 1, 1, 1, 1, 1, 2) |
|
280 |
|
281 GB2312SMModel = {'classTable': GB2312_cls, |
|
282 'classFactor': 7, |
|
283 'stateTable': GB2312_st, |
|
284 'charLenTable': GB2312CharLenTable, |
|
285 'name': 'GB2312'} |
|
286 |
|
287 # Shift_JIS |
|
288 |
|
289 SJIS_cls = ( \ |
|
290 1,1,1,1,1,1,1,1, # 00 - 07 |
|
291 1,1,1,1,1,1,0,0, # 08 - 0f |
|
292 1,1,1,1,1,1,1,1, # 10 - 17 |
|
293 1,1,1,0,1,1,1,1, # 18 - 1f |
|
294 1,1,1,1,1,1,1,1, # 20 - 27 |
|
295 1,1,1,1,1,1,1,1, # 28 - 2f |
|
296 1,1,1,1,1,1,1,1, # 30 - 37 |
|
297 1,1,1,1,1,1,1,1, # 38 - 3f |
|
298 2,2,2,2,2,2,2,2, # 40 - 47 |
|
299 2,2,2,2,2,2,2,2, # 48 - 4f |
|
300 2,2,2,2,2,2,2,2, # 50 - 57 |
|
301 2,2,2,2,2,2,2,2, # 58 - 5f |
|
302 2,2,2,2,2,2,2,2, # 60 - 67 |
|
303 2,2,2,2,2,2,2,2, # 68 - 6f |
|
304 2,2,2,2,2,2,2,2, # 70 - 77 |
|
305 2,2,2,2,2,2,2,1, # 78 - 7f |
|
306 3,3,3,3,3,3,3,3, # 80 - 87 |
|
307 3,3,3,3,3,3,3,3, # 88 - 8f |
|
308 3,3,3,3,3,3,3,3, # 90 - 97 |
|
309 3,3,3,3,3,3,3,3, # 98 - 9f |
|
310 #0xa0 is illegal in sjis encoding, but some pages does |
|
311 #contain such byte. We need to be more error forgiven. |
|
312 2,2,2,2,2,2,2,2, # a0 - a7 |
|
313 2,2,2,2,2,2,2,2, # a8 - af |
|
314 2,2,2,2,2,2,2,2, # b0 - b7 |
|
315 2,2,2,2,2,2,2,2, # b8 - bf |
|
316 2,2,2,2,2,2,2,2, # c0 - c7 |
|
317 2,2,2,2,2,2,2,2, # c8 - cf |
|
318 2,2,2,2,2,2,2,2, # d0 - d7 |
|
319 2,2,2,2,2,2,2,2, # d8 - df |
|
320 3,3,3,3,3,3,3,3, # e0 - e7 |
|
321 3,3,3,3,3,4,4,4, # e8 - ef |
|
322 4,4,4,4,4,4,4,4, # f0 - f7 |
|
323 4,4,4,4,4,0,0,0) # f8 - ff |
|
324 |
|
325 SJIS_st = ( \ |
|
326 eError,eStart,eStart, 3,eError,eError,eError,eError,#00-07 |
|
327 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |
|
328 eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart)#10-17 |
|
329 |
|
330 SJISCharLenTable = (0, 1, 1, 2, 0, 0) |
|
331 |
|
332 SJISSMModel = {'classTable': SJIS_cls, |
|
333 'classFactor': 6, |
|
334 'stateTable': SJIS_st, |
|
335 'charLenTable': SJISCharLenTable, |
|
336 'name': 'Shift_JIS'} |
|
337 |
|
338 # UCS2-BE |
|
339 |
|
340 UCS2BE_cls = ( \ |
|
341 0,0,0,0,0,0,0,0, # 00 - 07 |
|
342 0,0,1,0,0,2,0,0, # 08 - 0f |
|
343 0,0,0,0,0,0,0,0, # 10 - 17 |
|
344 0,0,0,3,0,0,0,0, # 18 - 1f |
|
345 0,0,0,0,0,0,0,0, # 20 - 27 |
|
346 0,3,3,3,3,3,0,0, # 28 - 2f |
|
347 0,0,0,0,0,0,0,0, # 30 - 37 |
|
348 0,0,0,0,0,0,0,0, # 38 - 3f |
|
349 0,0,0,0,0,0,0,0, # 40 - 47 |
|
350 0,0,0,0,0,0,0,0, # 48 - 4f |
|
351 0,0,0,0,0,0,0,0, # 50 - 57 |
|
352 0,0,0,0,0,0,0,0, # 58 - 5f |
|
353 0,0,0,0,0,0,0,0, # 60 - 67 |
|
354 0,0,0,0,0,0,0,0, # 68 - 6f |
|
355 0,0,0,0,0,0,0,0, # 70 - 77 |
|
356 0,0,0,0,0,0,0,0, # 78 - 7f |
|
357 0,0,0,0,0,0,0,0, # 80 - 87 |
|
358 0,0,0,0,0,0,0,0, # 88 - 8f |
|
359 0,0,0,0,0,0,0,0, # 90 - 97 |
|
360 0,0,0,0,0,0,0,0, # 98 - 9f |
|
361 0,0,0,0,0,0,0,0, # a0 - a7 |
|
362 0,0,0,0,0,0,0,0, # a8 - af |
|
363 0,0,0,0,0,0,0,0, # b0 - b7 |
|
364 0,0,0,0,0,0,0,0, # b8 - bf |
|
365 0,0,0,0,0,0,0,0, # c0 - c7 |
|
366 0,0,0,0,0,0,0,0, # c8 - cf |
|
367 0,0,0,0,0,0,0,0, # d0 - d7 |
|
368 0,0,0,0,0,0,0,0, # d8 - df |
|
369 0,0,0,0,0,0,0,0, # e0 - e7 |
|
370 0,0,0,0,0,0,0,0, # e8 - ef |
|
371 0,0,0,0,0,0,0,0, # f0 - f7 |
|
372 0,0,0,0,0,0,4,5) # f8 - ff |
|
373 |
|
374 UCS2BE_st = ( \ |
|
375 5, 7, 7,eError, 4, 3,eError,eError,#00-07 |
|
376 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |
|
377 eItsMe,eItsMe, 6, 6, 6, 6,eError,eError,#10-17 |
|
378 6, 6, 6, 6, 6,eItsMe, 6, 6,#18-1f |
|
379 6, 6, 6, 6, 5, 7, 7,eError,#20-27 |
|
380 5, 8, 6, 6,eError, 6, 6, 6,#28-2f |
|
381 6, 6, 6, 6,eError,eError,eStart,eStart)#30-37 |
|
382 |
|
383 UCS2BECharLenTable = (2, 2, 2, 0, 2, 2) |
|
384 |
|
385 UCS2BESMModel = {'classTable': UCS2BE_cls, |
|
386 'classFactor': 6, |
|
387 'stateTable': UCS2BE_st, |
|
388 'charLenTable': UCS2BECharLenTable, |
|
389 'name': 'UTF-16BE'} |
|
390 |
|
391 # UCS2-LE |
|
392 |
|
393 UCS2LE_cls = ( \ |
|
394 0,0,0,0,0,0,0,0, # 00 - 07 |
|
395 0,0,1,0,0,2,0,0, # 08 - 0f |
|
396 0,0,0,0,0,0,0,0, # 10 - 17 |
|
397 0,0,0,3,0,0,0,0, # 18 - 1f |
|
398 0,0,0,0,0,0,0,0, # 20 - 27 |
|
399 0,3,3,3,3,3,0,0, # 28 - 2f |
|
400 0,0,0,0,0,0,0,0, # 30 - 37 |
|
401 0,0,0,0,0,0,0,0, # 38 - 3f |
|
402 0,0,0,0,0,0,0,0, # 40 - 47 |
|
403 0,0,0,0,0,0,0,0, # 48 - 4f |
|
404 0,0,0,0,0,0,0,0, # 50 - 57 |
|
405 0,0,0,0,0,0,0,0, # 58 - 5f |
|
406 0,0,0,0,0,0,0,0, # 60 - 67 |
|
407 0,0,0,0,0,0,0,0, # 68 - 6f |
|
408 0,0,0,0,0,0,0,0, # 70 - 77 |
|
409 0,0,0,0,0,0,0,0, # 78 - 7f |
|
410 0,0,0,0,0,0,0,0, # 80 - 87 |
|
411 0,0,0,0,0,0,0,0, # 88 - 8f |
|
412 0,0,0,0,0,0,0,0, # 90 - 97 |
|
413 0,0,0,0,0,0,0,0, # 98 - 9f |
|
414 0,0,0,0,0,0,0,0, # a0 - a7 |
|
415 0,0,0,0,0,0,0,0, # a8 - af |
|
416 0,0,0,0,0,0,0,0, # b0 - b7 |
|
417 0,0,0,0,0,0,0,0, # b8 - bf |
|
418 0,0,0,0,0,0,0,0, # c0 - c7 |
|
419 0,0,0,0,0,0,0,0, # c8 - cf |
|
420 0,0,0,0,0,0,0,0, # d0 - d7 |
|
421 0,0,0,0,0,0,0,0, # d8 - df |
|
422 0,0,0,0,0,0,0,0, # e0 - e7 |
|
423 0,0,0,0,0,0,0,0, # e8 - ef |
|
424 0,0,0,0,0,0,0,0, # f0 - f7 |
|
425 0,0,0,0,0,0,4,5) # f8 - ff |
|
426 |
|
427 UCS2LE_st = ( \ |
|
428 6, 6, 7, 6, 4, 3,eError,eError,#00-07 |
|
429 eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,#08-0f |
|
430 eItsMe,eItsMe, 5, 5, 5,eError,eItsMe,eError,#10-17 |
|
431 5, 5, 5,eError, 5,eError, 6, 6,#18-1f |
|
432 7, 6, 8, 8, 5, 5, 5,eError,#20-27 |
|
433 5, 5, 5,eError,eError,eError, 5, 5,#28-2f |
|
434 5, 5, 5,eError, 5,eError,eStart,eStart)#30-37 |
|
435 |
|
436 UCS2LECharLenTable = (2, 2, 2, 2, 2, 2) |
|
437 |
|
438 UCS2LESMModel = {'classTable': UCS2LE_cls, |
|
439 'classFactor': 6, |
|
440 'stateTable': UCS2LE_st, |
|
441 'charLenTable': UCS2LECharLenTable, |
|
442 'name': 'UTF-16LE'} |
|
443 |
|
444 # UTF-8 |
|
445 |
|
446 UTF8_cls = ( \ |
|
447 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value |
|
448 1,1,1,1,1,1,0,0, # 08 - 0f |
|
449 1,1,1,1,1,1,1,1, # 10 - 17 |
|
450 1,1,1,0,1,1,1,1, # 18 - 1f |
|
451 1,1,1,1,1,1,1,1, # 20 - 27 |
|
452 1,1,1,1,1,1,1,1, # 28 - 2f |
|
453 1,1,1,1,1,1,1,1, # 30 - 37 |
|
454 1,1,1,1,1,1,1,1, # 38 - 3f |
|
455 1,1,1,1,1,1,1,1, # 40 - 47 |
|
456 1,1,1,1,1,1,1,1, # 48 - 4f |
|
457 1,1,1,1,1,1,1,1, # 50 - 57 |
|
458 1,1,1,1,1,1,1,1, # 58 - 5f |
|
459 1,1,1,1,1,1,1,1, # 60 - 67 |
|
460 1,1,1,1,1,1,1,1, # 68 - 6f |
|
461 1,1,1,1,1,1,1,1, # 70 - 77 |
|
462 1,1,1,1,1,1,1,1, # 78 - 7f |
|
463 2,2,2,2,3,3,3,3, # 80 - 87 |
|
464 4,4,4,4,4,4,4,4, # 88 - 8f |
|
465 4,4,4,4,4,4,4,4, # 90 - 97 |
|
466 4,4,4,4,4,4,4,4, # 98 - 9f |
|
467 5,5,5,5,5,5,5,5, # a0 - a7 |
|
468 5,5,5,5,5,5,5,5, # a8 - af |
|
469 5,5,5,5,5,5,5,5, # b0 - b7 |
|
470 5,5,5,5,5,5,5,5, # b8 - bf |
|
471 0,0,6,6,6,6,6,6, # c0 - c7 |
|
472 6,6,6,6,6,6,6,6, # c8 - cf |
|
473 6,6,6,6,6,6,6,6, # d0 - d7 |
|
474 6,6,6,6,6,6,6,6, # d8 - df |
|
475 7,8,8,8,8,8,8,8, # e0 - e7 |
|
476 8,8,8,8,8,9,8,8, # e8 - ef |
|
477 10,11,11,11,11,11,11,11, # f0 - f7 |
|
478 12,13,13,13,14,15,0,0) # f8 - ff |
|
479 |
|
480 UTF8_st = ( \ |
|
481 eError,eStart,eError,eError,eError,eError, 12, 10,#00-07 |
|
482 9, 11, 8, 7, 6, 5, 4, 3,#08-0f |
|
483 eError,eError,eError,eError,eError,eError,eError,eError,#10-17 |
|
484 eError,eError,eError,eError,eError,eError,eError,eError,#18-1f |
|
485 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#20-27 |
|
486 eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,#28-2f |
|
487 eError,eError, 5, 5, 5, 5,eError,eError,#30-37 |
|
488 eError,eError,eError,eError,eError,eError,eError,eError,#38-3f |
|
489 eError,eError,eError, 5, 5, 5,eError,eError,#40-47 |
|
490 eError,eError,eError,eError,eError,eError,eError,eError,#48-4f |
|
491 eError,eError, 7, 7, 7, 7,eError,eError,#50-57 |
|
492 eError,eError,eError,eError,eError,eError,eError,eError,#58-5f |
|
493 eError,eError,eError,eError, 7, 7,eError,eError,#60-67 |
|
494 eError,eError,eError,eError,eError,eError,eError,eError,#68-6f |
|
495 eError,eError, 9, 9, 9, 9,eError,eError,#70-77 |
|
496 eError,eError,eError,eError,eError,eError,eError,eError,#78-7f |
|
497 eError,eError,eError,eError,eError, 9,eError,eError,#80-87 |
|
498 eError,eError,eError,eError,eError,eError,eError,eError,#88-8f |
|
499 eError,eError, 12, 12, 12, 12,eError,eError,#90-97 |
|
500 eError,eError,eError,eError,eError,eError,eError,eError,#98-9f |
|
501 eError,eError,eError,eError,eError, 12,eError,eError,#a0-a7 |
|
502 eError,eError,eError,eError,eError,eError,eError,eError,#a8-af |
|
503 eError,eError, 12, 12, 12,eError,eError,eError,#b0-b7 |
|
504 eError,eError,eError,eError,eError,eError,eError,eError,#b8-bf |
|
505 eError,eError,eStart,eStart,eStart,eStart,eError,eError,#c0-c7 |
|
506 eError,eError,eError,eError,eError,eError,eError,eError)#c8-cf |
|
507 |
|
508 UTF8CharLenTable = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) |
|
509 |
|
510 UTF8SMModel = {'classTable': UTF8_cls, |
|
511 'classFactor': 16, |
|
512 'stateTable': UTF8_st, |
|
513 'charLenTable': UTF8CharLenTable, |
|
514 'name': 'UTF-8'} |