ThirdParty/Pygments/pygments/lexers/special.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.special
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Special lexers.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 import cStringIO
14
15 from pygments.lexer import Lexer
16 from pygments.token import Token, Error, Text
17 from pygments.util import get_choice_opt, b
18
19
20 __all__ = ['TextLexer', 'RawTokenLexer']
21
22
23 class TextLexer(Lexer):
24 """
25 "Null" lexer, doesn't highlight anything.
26 """
27 name = 'Text only'
28 aliases = ['text']
29 filenames = ['*.txt']
30 mimetypes = ['text/plain']
31
32 def get_tokens_unprocessed(self, text):
33 yield 0, Text, text
34
35
36 _ttype_cache = {}
37
38 line_re = re.compile(b('.*?\n'))
39
40 class RawTokenLexer(Lexer):
41 """
42 Recreate a token stream formatted with the `RawTokenFormatter`. This
43 lexer raises exceptions during parsing if the token stream in the
44 file is malformed.
45
46 Additional options accepted:
47
48 `compress`
49 If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
50 the given compression algorithm before lexing (default: ``""``).
51 """
52 name = 'Raw token data'
53 aliases = ['raw']
54 filenames = []
55 mimetypes = ['application/x-pygments-tokens']
56
57 def __init__(self, **options):
58 self.compress = get_choice_opt(options, 'compress',
59 ['', 'none', 'gz', 'bz2'], '')
60 Lexer.__init__(self, **options)
61
62 def get_tokens(self, text):
63 if isinstance(text, unicode):
64 # raw token stream never has any non-ASCII characters
65 text = text.encode('ascii')
66 if self.compress == 'gz':
67 import gzip
68 gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text))
69 text = gzipfile.read()
70 elif self.compress == 'bz2':
71 import bz2
72 text = bz2.decompress(text)
73
74 # do not call Lexer.get_tokens() because we do not want Unicode
75 # decoding to occur, and stripping is not optional.
76 text = text.strip(b('\n')) + b('\n')
77 for i, t, v in self.get_tokens_unprocessed(text):
78 yield t, v
79
80 def get_tokens_unprocessed(self, text):
81 length = 0
82 for match in line_re.finditer(text):
83 try:
84 ttypestr, val = match.group().split(b('\t'), 1)
85 except ValueError:
86 val = match.group().decode(self.encoding)
87 ttype = Error
88 else:
89 ttype = _ttype_cache.get(ttypestr)
90 if not ttype:
91 ttype = Token
92 ttypes = ttypestr.split('.')[1:]
93 for ttype_ in ttypes:
94 if not ttype_ or not ttype_[0].isupper():
95 raise ValueError('malformed token name')
96 ttype = getattr(ttype, ttype_)
97 _ttype_cache[ttypestr] = ttype
98 val = val[2:-2].decode('unicode-escape')
99 yield length, ttype, val
100 length += len(val)

eric ide

mercurial