ThirdParty/Pygments/pygments/lexers/special.py

changeset 4172
4f20dba37ab6
parent 3145
a9de05d4a22f
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
3 pygments.lexers.special 3 pygments.lexers.special
4 ~~~~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~~~~
5 5
6 Special lexers. 6 Special lexers.
7 7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 from __future__ import unicode_literals
13
14 import re 12 import re
15 import io
16 13
17 from pygments.lexer import Lexer 14 from pygments.lexer import Lexer
18 from pygments.token import Token, Error, Text 15 from pygments.token import Token, Error, Text
19 from pygments.util import get_choice_opt, b 16 from pygments.util import get_choice_opt, text_type, BytesIO
20 17
21 18
22 __all__ = ['TextLexer', 'RawTokenLexer'] 19 __all__ = ['TextLexer', 'RawTokenLexer']
23 20
24 21
35 yield 0, Text, text 32 yield 0, Text, text
36 33
37 34
38 _ttype_cache = {} 35 _ttype_cache = {}
39 36
40 line_re = re.compile(b('.*?\n')) 37 line_re = re.compile(b'.*?\n')
38
41 39
42 class RawTokenLexer(Lexer): 40 class RawTokenLexer(Lexer):
43 """ 41 """
44 Recreate a token stream formatted with the `RawTokenFormatter`. This 42 Recreate a token stream formatted with the `RawTokenFormatter`. This
45 lexer raises exceptions during parsing if the token stream in the 43 lexer raises exceptions during parsing if the token stream in the
60 self.compress = get_choice_opt(options, 'compress', 58 self.compress = get_choice_opt(options, 'compress',
61 ['', 'none', 'gz', 'bz2'], '') 59 ['', 'none', 'gz', 'bz2'], '')
62 Lexer.__init__(self, **options) 60 Lexer.__init__(self, **options)
63 61
64 def get_tokens(self, text): 62 def get_tokens(self, text):
65 if isinstance(text, str): 63 if isinstance(text, text_type):
66 # raw token stream never has any non-ASCII characters 64 # raw token stream never has any non-ASCII characters
67 text = text.encode('ascii') 65 text = text.encode('ascii')
68 if self.compress == 'gz': 66 if self.compress == 'gz':
69 import gzip 67 import gzip
70 gzipfile = gzip.GzipFile('', 'rb', 9, io.StringIO(text)) 68 gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
71 text = gzipfile.read() 69 text = gzipfile.read()
72 elif self.compress == 'bz2': 70 elif self.compress == 'bz2':
73 import bz2 71 import bz2
74 text = bz2.decompress(text) 72 text = bz2.decompress(text)
75 73
76 # do not call Lexer.get_tokens() because we do not want Unicode 74 # do not call Lexer.get_tokens() because we do not want Unicode
77 # decoding to occur, and stripping is not optional. 75 # decoding to occur, and stripping is not optional.
78 text = text.strip(b('\n')) + b('\n') 76 text = text.strip(b'\n') + b'\n'
79 for i, t, v in self.get_tokens_unprocessed(text): 77 for i, t, v in self.get_tokens_unprocessed(text):
80 yield t, v 78 yield t, v
81 79
82 def get_tokens_unprocessed(self, text): 80 def get_tokens_unprocessed(self, text):
83 length = 0 81 length = 0
84 for match in line_re.finditer(text): 82 for match in line_re.finditer(text):
85 try: 83 try:
86 ttypestr, val = match.group().split(b('\t'), 1) 84 ttypestr, val = match.group().split(b'\t', 1)
87 except ValueError: 85 except ValueError:
88 val = match.group().decode(self.encoding) 86 val = match.group().decode('ascii', 'replace')
89 ttype = Error 87 ttype = Error
90 else: 88 else:
91 ttype = _ttype_cache.get(ttypestr) 89 ttype = _ttype_cache.get(ttypestr)
92 if not ttype: 90 if not ttype:
93 ttype = Token 91 ttype = Token

eric ide

mercurial