24 # License along with this library; if not, write to the Free Software |
24 # License along with this library; if not, write to the Free Software |
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA |
26 # 02110-1301 USA |
26 # 02110-1301 USA |
27 ######################### END LICENSE BLOCK ######################### |
27 ######################### END LICENSE BLOCK ######################### |
28 |
28 |
29 import constants, re |
29 from . import constants |
|
30 import re |
30 |
31 |
31 class CharSetProber: |
32 class CharSetProber: |
32 def __init__(self): |
33 def __init__(self): |
33 pass |
34 pass |
34 |
35 |
46 |
47 |
47 def get_confidence(self): |
48 def get_confidence(self): |
48 return 0.0 |
49 return 0.0 |
49 |
50 |
50 def filter_high_bit_only(self, aBuf): |
51 def filter_high_bit_only(self, aBuf): |
51 aBuf = re.sub(r'([\x00-\x7F])+', ' ', aBuf) |
52 aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) |
52 return aBuf |
53 return aBuf |
53 |
54 |
54 def filter_without_english_letters(self, aBuf): |
55 def filter_without_english_letters(self, aBuf): |
55 aBuf = re.sub(r'([A-Za-z])+', ' ', aBuf) |
56 aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) |
56 return aBuf |
57 return aBuf |
57 |
58 |
58 def filter_with_english_letters(self, aBuf): |
59 def filter_with_english_letters(self, aBuf): |
59 # TODO |
60 # TODO |
60 return aBuf |
61 return aBuf |