|
1 ######################## BEGIN LICENSE BLOCK ######################## |
|
2 # The Original Code is Mozilla Communicator client code. |
|
3 # |
|
4 # The Initial Developer of the Original Code is |
|
5 # Netscape Communications Corporation. |
|
6 # Portions created by the Initial Developer are Copyright (C) 1998 |
|
7 # the Initial Developer. All Rights Reserved. |
|
8 # |
|
9 # Contributor(s): |
|
10 # Mark Pilgrim - port to Python |
|
11 # |
|
12 # This library is free software; you can redistribute it and/or |
|
13 # modify it under the terms of the GNU Lesser General Public |
|
14 # License as published by the Free Software Foundation; either |
|
15 # version 2.1 of the License, or (at your option) any later version. |
|
16 # |
|
17 # This library is distributed in the hope that it will be useful, |
|
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
20 # Lesser General Public License for more details. |
|
21 # |
|
22 # You should have received a copy of the GNU Lesser General Public |
|
23 # License along with this library; if not, write to the Free Software |
|
24 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
|
25 # 02110-1301 USA |
|
26 ######################### END LICENSE BLOCK ######################### |
|
27 |
|
28 import constants, sys |
|
29 from charsetprober import CharSetProber |
|
30 |
|
31 class CharSetGroupProber(CharSetProber): |
|
32 def __init__(self): |
|
33 CharSetProber.__init__(self) |
|
34 self._mActiveNum = 0 |
|
35 self._mProbers = [] |
|
36 self._mBestGuessProber = None |
|
37 |
|
38 def reset(self): |
|
39 CharSetProber.reset(self) |
|
40 self._mActiveNum = 0 |
|
41 for prober in self._mProbers: |
|
42 if prober: |
|
43 prober.reset() |
|
44 prober.active = constants.True |
|
45 self._mActiveNum += 1 |
|
46 self._mBestGuessProber = None |
|
47 |
|
48 def get_charset_name(self): |
|
49 if not self._mBestGuessProber: |
|
50 self.get_confidence() |
|
51 if not self._mBestGuessProber: return None |
|
52 # self._mBestGuessProber = self._mProbers[0] |
|
53 return self._mBestGuessProber.get_charset_name() |
|
54 |
|
55 def feed(self, aBuf): |
|
56 for prober in self._mProbers: |
|
57 if not prober: continue |
|
58 if not prober.active: continue |
|
59 st = prober.feed(aBuf) |
|
60 if not st: continue |
|
61 if st == constants.eFoundIt: |
|
62 self._mBestGuessProber = prober |
|
63 return self.get_state() |
|
64 elif st == constants.eNotMe: |
|
65 prober.active = constants.False |
|
66 self._mActiveNum -= 1 |
|
67 if self._mActiveNum <= 0: |
|
68 self._mState = constants.eNotMe |
|
69 return self.get_state() |
|
70 return self.get_state() |
|
71 |
|
72 def get_confidence(self): |
|
73 st = self.get_state() |
|
74 if st == constants.eFoundIt: |
|
75 return 0.99 |
|
76 elif st == constants.eNotMe: |
|
77 return 0.01 |
|
78 bestConf = 0.0 |
|
79 self._mBestGuessProber = None |
|
80 for prober in self._mProbers: |
|
81 if not prober: continue |
|
82 if not prober.active: |
|
83 if constants._debug: |
|
84 sys.stderr.write(prober.get_charset_name() + ' not active\n') |
|
85 continue |
|
86 cf = prober.get_confidence() |
|
87 if constants._debug: |
|
88 sys.stderr.write('%s confidence = %s\n' % (prober.get_charset_name(), cf)) |
|
89 if bestConf < cf: |
|
90 bestConf = cf |
|
91 self._mBestGuessProber = prober |
|
92 if not self._mBestGuessProber: return 0.0 |
|
93 return bestConf |
|
94 # else: |
|
95 # self._mBestGuessProber = self._mProbers[0] |
|
96 # return self._mBestGuessProber.get_confidence() |