eric6/ThirdParty/Pygments/pygments/lexers/special.py

changeset 6942
2602857055c5
parent 5713
6762afd9f963
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.special
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Special lexers.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer
15 from pygments.token import Token, Error, Text
16 from pygments.util import get_choice_opt, text_type, BytesIO
17
18
19 __all__ = ['TextLexer', 'RawTokenLexer']
20
21
22 class TextLexer(Lexer):
23 """
24 "Null" lexer, doesn't highlight anything.
25 """
26 name = 'Text only'
27 aliases = ['text']
28 filenames = ['*.txt']
29 mimetypes = ['text/plain']
30 priority = 0.01
31
32 def get_tokens_unprocessed(self, text):
33 yield 0, Text, text
34
35 def analyse_text(text):
36 return TextLexer.priority
37
38 _ttype_cache = {}
39
40 line_re = re.compile(b'.*?\n')
41
42
43 class RawTokenLexer(Lexer):
44 """
45 Recreate a token stream formatted with the `RawTokenFormatter`. This
46 lexer raises exceptions during parsing if the token stream in the
47 file is malformed.
48
49 Additional options accepted:
50
51 `compress`
52 If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
53 the given compression algorithm before lexing (default: ``""``).
54 """
55 name = 'Raw token data'
56 aliases = ['raw']
57 filenames = []
58 mimetypes = ['application/x-pygments-tokens']
59
60 def __init__(self, **options):
61 self.compress = get_choice_opt(options, 'compress',
62 ['', 'none', 'gz', 'bz2'], '')
63 Lexer.__init__(self, **options)
64
65 def get_tokens(self, text):
66 if isinstance(text, text_type):
67 # raw token stream never has any non-ASCII characters
68 text = text.encode('ascii')
69 if self.compress == 'gz':
70 import gzip
71 gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
72 text = gzipfile.read()
73 elif self.compress == 'bz2':
74 import bz2
75 text = bz2.decompress(text)
76
77 # do not call Lexer.get_tokens() because we do not want Unicode
78 # decoding to occur, and stripping is not optional.
79 text = text.strip(b'\n') + b'\n'
80 for i, t, v in self.get_tokens_unprocessed(text):
81 yield t, v
82
83 def get_tokens_unprocessed(self, text):
84 length = 0
85 for match in line_re.finditer(text):
86 try:
87 ttypestr, val = match.group().split(b'\t', 1)
88 except ValueError:
89 val = match.group().decode('ascii', 'replace')
90 ttype = Error
91 else:
92 ttype = _ttype_cache.get(ttypestr)
93 if not ttype:
94 ttype = Token
95 ttypes = ttypestr.split('.')[1:]
96 for ttype_ in ttypes:
97 if not ttype_ or not ttype_[0].isupper():
98 raise ValueError('malformed token name')
99 ttype = getattr(ttype, ttype_)
100 _ttype_cache[ttypestr] = ttype
101 val = val[2:-2].decode('unicode-escape')
102 yield length, ttype, val
103 length += len(val)

eric ide

mercurial