ThirdParty/CharDet/chardet/charsetprober.py

changeset 12
1d8dd9706f46
parent 0
de9c2efb9d02
child 3537
7662053c3906
equal deleted inserted replaced
11:b0996e4a289e 12:1d8dd9706f46
24 # License along with this library; if not, write to the Free Software 24 # License along with this library; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 # 02110-1301 USA 26 # 02110-1301 USA
27 ######################### END LICENSE BLOCK ######################### 27 ######################### END LICENSE BLOCK #########################
28 28
29 import constants, re 29 from . import constants
30 import re
30 31
31 class CharSetProber: 32 class CharSetProber:
32 def __init__(self): 33 def __init__(self):
33 pass 34 pass
34 35
46 47
47 def get_confidence(self): 48 def get_confidence(self):
48 return 0.0 49 return 0.0
49 50
50 def filter_high_bit_only(self, aBuf): 51 def filter_high_bit_only(self, aBuf):
51 aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf) 52 aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
52 return aBuf 53 return aBuf
53 54
54 def filter_without_english_letters(self, aBuf): 55 def filter_without_english_letters(self, aBuf):
55 aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf) 56 aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
56 return aBuf 57 return aBuf
57 58
58 def filter_with_english_letters(self, aBuf): 59 def filter_with_english_letters(self, aBuf):
59 # TODO 60 # TODO
60 return aBuf 61 return aBuf

eric ide

mercurial