Tue, 15 Sep 2020 19:09:05 +0200
Pygments: updated to 2.7.0.
7701
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
1 | # -*- coding: utf-8 -*- |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
2 | """ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
3 | pygments.lexers.special |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
4 | ~~~~~~~~~~~~~~~~~~~~~~~ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
5 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
6 | Special lexers. |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
7 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
8 | :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
9 | :license: BSD, see LICENSE for details. |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
10 | """ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
11 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
12 | import re |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
13 | from io import BytesIO |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
14 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
15 | from pygments.lexer import Lexer |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
16 | from pygments.token import Token, Error, Text |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
17 | from pygments.util import get_choice_opt |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
18 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
19 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
20 | __all__ = ['TextLexer', 'RawTokenLexer'] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
21 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
22 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
23 | class TextLexer(Lexer): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
24 | """ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
25 | "Null" lexer, doesn't highlight anything. |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
26 | """ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
27 | name = 'Text only' |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
28 | aliases = ['text'] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
29 | filenames = ['*.txt'] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
30 | mimetypes = ['text/plain'] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
31 | priority = 0.01 |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
32 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
33 | def get_tokens_unprocessed(self, text): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
34 | yield 0, Text, text |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
35 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
36 | def analyse_text(text): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
37 | return TextLexer.priority |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
38 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
39 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
40 | _ttype_cache = {} |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
41 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
42 | line_re = re.compile(b'.*?\n') |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
43 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
44 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
45 | class RawTokenLexer(Lexer): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
46 | """ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
47 | Recreate a token stream formatted with the `RawTokenFormatter`. This |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
48 | lexer raises exceptions during parsing if the token stream in the |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
49 | file is malformed. |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
50 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
51 | Additional options accepted: |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
52 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
53 | `compress` |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
54 | If set to ``"gz"`` or ``"bz2"``, decompress the token stream with |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
55 | the given compression algorithm before lexing (default: ``""``). |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
56 | """ |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
57 | name = 'Raw token data' |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
58 | aliases = ['raw'] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
59 | filenames = [] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
60 | mimetypes = ['application/x-pygments-tokens'] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
61 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
62 | def __init__(self, **options): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
63 | self.compress = get_choice_opt(options, 'compress', |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
64 | ['', 'none', 'gz', 'bz2'], '') |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
65 | Lexer.__init__(self, **options) |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
66 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
67 | def get_tokens(self, text): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
68 | if isinstance(text, str): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
69 | # raw token stream never has any non-ASCII characters |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
70 | text = text.encode('ascii') |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
71 | if self.compress == 'gz': |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
72 | import gzip |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
73 | gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text)) |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
74 | text = gzipfile.read() |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
75 | elif self.compress == 'bz2': |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
76 | import bz2 |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
77 | text = bz2.decompress(text) |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
78 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
79 | # do not call Lexer.get_tokens() because we do not want Unicode |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
80 | # decoding to occur, and stripping is not optional. |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
81 | text = text.strip(b'\n') + b'\n' |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
82 | for i, t, v in self.get_tokens_unprocessed(text): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
83 | yield t, v |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
84 | |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
85 | def get_tokens_unprocessed(self, text): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
86 | length = 0 |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
87 | for match in line_re.finditer(text): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
88 | try: |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
89 | ttypestr, val = match.group().split(b'\t', 1) |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
90 | except ValueError: |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
91 | val = match.group().decode('ascii', 'replace') |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
92 | ttype = Error |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
93 | else: |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
94 | ttype = _ttype_cache.get(ttypestr) |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
95 | if not ttype: |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
96 | ttype = Token |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
97 | ttypes = ttypestr.split('.')[1:] |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
98 | for ttype_ in ttypes: |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
99 | if not ttype_ or not ttype_[0].isupper(): |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
100 | raise ValueError('malformed token name') |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
101 | ttype = getattr(ttype, ttype_) |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
102 | _ttype_cache[ttypestr] = ttype |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
103 | val = val[2:-2].decode('unicode-escape') |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
104 | yield length, ttype, val |
25f42e208e08
Pygments: updated to 2.7.0.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7547
diff
changeset
|
105 | length += len(val) |