eric6/ThirdParty/Pygments/pygments/lexers/special.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.special
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Special lexers.
7
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 from io import BytesIO
14
15 from pygments.lexer import Lexer
16 from pygments.token import Token, Error, Text
17 from pygments.util import get_choice_opt
18
19
20 __all__ = ['TextLexer', 'RawTokenLexer']
21
22
23 class TextLexer(Lexer):
24 """
25 "Null" lexer, doesn't highlight anything.
26 """
27 name = 'Text only'
28 aliases = ['text']
29 filenames = ['*.txt']
30 mimetypes = ['text/plain']
31 priority = 0.01
32
33 def get_tokens_unprocessed(self, text):
34 yield 0, Text, text
35
36 def analyse_text(text):
37 return TextLexer.priority
38
39
40 _ttype_cache = {}
41
42 line_re = re.compile('.*?\n')
43
44
45 class RawTokenLexer(Lexer):
46 """
47 Recreate a token stream formatted with the `RawTokenFormatter`. This
48 lexer raises exceptions during parsing if the token stream in the
49 file is malformed.
50
51 Additional options accepted:
52
53 `compress`
54 If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
55 the given compression algorithm before lexing (default: ``""``).
56 """
57 name = 'Raw token data'
58 aliases = ['raw']
59 filenames = []
60 mimetypes = ['application/x-pygments-tokens']
61
62 def __init__(self, **options):
63 self.compress = get_choice_opt(options, 'compress',
64 ['', 'none', 'gz', 'bz2'], '')
65 Lexer.__init__(self, **options)
66
67 def get_tokens(self, text):
68 if self.compress:
69 if isinstance(text, str):
70 text = text.encode('latin1')
71 if self.compress == 'gz':
72 import gzip
73 gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
74 text = gzipfile.read()
75 elif self.compress == 'bz2':
76 import bz2
77 text = bz2.decompress(text)
78 text = text.decode('latin1')
79
80 # do not call Lexer.get_tokens() because stripping is not optional.
81 text = text.strip('\n') + '\n'
82 for i, t, v in self.get_tokens_unprocessed(text):
83 yield t, v
84
85 def get_tokens_unprocessed(self, text):
86 length = 0
87 for match in line_re.finditer(text):
88 try:
89 ttypestr, val = match.group().rstrip().split('\t', 1)
90 except ValueError:
91 val = match.group()
92 ttype = Error
93 else:
94 ttype = _ttype_cache.get(ttypestr)
95 if not ttype:
96 ttype = Token
97 ttypes = ttypestr.split('.')[1:]
98 for ttype_ in ttypes:
99 if not ttype_ or not ttype_[0].isupper():
100 raise ValueError('malformed token name')
101 ttype = getattr(ttype, ttype_)
102 _ttype_cache[ttypestr] = ttype
103 val = val[1:-1].encode().decode('unicode-escape')
104 yield length, ttype, val
105 length += len(val)

eric ide

mercurial