1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.formatters.other |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Other formatters: NullFormatter, RawTokenFormatter. |
|
7 |
|
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from pygments.formatter import Formatter |
|
13 from pygments.util import get_choice_opt |
|
14 from pygments.token import Token |
|
15 from pygments.console import colorize |
|
16 |
|
17 __all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter'] |
|
18 |
|
19 |
|
20 class NullFormatter(Formatter): |
|
21 """ |
|
22 Output the text unchanged without any formatting. |
|
23 """ |
|
24 name = 'Text only' |
|
25 aliases = ['text', 'null'] |
|
26 filenames = ['*.txt'] |
|
27 |
|
28 def format(self, tokensource, outfile): |
|
29 enc = self.encoding |
|
30 for ttype, value in tokensource: |
|
31 if enc: |
|
32 outfile.write(value.encode(enc)) |
|
33 else: |
|
34 outfile.write(value) |
|
35 |
|
36 |
|
37 class RawTokenFormatter(Formatter): |
|
38 r""" |
|
39 Format tokens as a raw representation for storing token streams. |
|
40 |
|
41 The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later |
|
42 be converted to a token stream with the `RawTokenLexer`, described in the |
|
43 :doc:`lexer list <lexers>`. |
|
44 |
|
45 Only two options are accepted: |
|
46 |
|
47 `compress` |
|
48 If set to ``'gz'`` or ``'bz2'``, compress the output with the given |
|
49 compression algorithm after encoding (default: ``''``). |
|
50 `error_color` |
|
51 If set to a color name, highlight error tokens using that color. If |
|
52 set but with no value, defaults to ``'red'``. |
|
53 |
|
54 .. versionadded:: 0.11 |
|
55 |
|
56 """ |
|
57 name = 'Raw tokens' |
|
58 aliases = ['raw', 'tokens'] |
|
59 filenames = ['*.raw'] |
|
60 |
|
61 unicodeoutput = False |
|
62 |
|
63 def __init__(self, **options): |
|
64 Formatter.__init__(self, **options) |
|
65 # We ignore self.encoding if it is set, since it gets set for lexer |
|
66 # and formatter if given with -Oencoding on the command line. |
|
67 # The RawTokenFormatter outputs only ASCII. Override here. |
|
68 self.encoding = 'ascii' # let pygments.format() do the right thing |
|
69 self.compress = get_choice_opt(options, 'compress', |
|
70 ['', 'none', 'gz', 'bz2'], '') |
|
71 self.error_color = options.get('error_color', None) |
|
72 if self.error_color is True: |
|
73 self.error_color = 'red' |
|
74 if self.error_color is not None: |
|
75 try: |
|
76 colorize(self.error_color, '') |
|
77 except KeyError: |
|
78 raise ValueError("Invalid color %r specified" % |
|
79 self.error_color) |
|
80 |
|
81 def format(self, tokensource, outfile): |
|
82 try: |
|
83 outfile.write(b'') |
|
84 except TypeError: |
|
85 raise TypeError('The raw tokens formatter needs a binary ' |
|
86 'output file') |
|
87 if self.compress == 'gz': |
|
88 import gzip |
|
89 outfile = gzip.GzipFile('', 'wb', 9, outfile) |
|
90 |
|
91 def write(text): |
|
92 outfile.write(text.encode()) |
|
93 flush = outfile.flush |
|
94 elif self.compress == 'bz2': |
|
95 import bz2 |
|
96 compressor = bz2.BZ2Compressor(9) |
|
97 |
|
98 def write(text): |
|
99 outfile.write(compressor.compress(text.encode())) |
|
100 |
|
101 def flush(): |
|
102 outfile.write(compressor.flush()) |
|
103 outfile.flush() |
|
104 else: |
|
105 def write(text): |
|
106 outfile.write(text.encode()) |
|
107 flush = outfile.flush |
|
108 |
|
109 if self.error_color: |
|
110 for ttype, value in tokensource: |
|
111 line = "%s\t%r\n" % (ttype, value) |
|
112 if ttype is Token.Error: |
|
113 write(colorize(self.error_color, line)) |
|
114 else: |
|
115 write(line) |
|
116 else: |
|
117 for ttype, value in tokensource: |
|
118 write("%s\t%r\n" % (ttype, value)) |
|
119 flush() |
|
120 |
|
121 |
|
122 TESTCASE_BEFORE = '''\ |
|
123 def testNeedsName(lexer): |
|
124 fragment = %r |
|
125 tokens = [ |
|
126 ''' |
|
127 TESTCASE_AFTER = '''\ |
|
128 ] |
|
129 assert list(lexer.get_tokens(fragment)) == tokens |
|
130 ''' |
|
131 |
|
132 |
|
133 class TestcaseFormatter(Formatter): |
|
134 """ |
|
135 Format tokens as appropriate for a new testcase. |
|
136 |
|
137 .. versionadded:: 2.0 |
|
138 """ |
|
139 name = 'Testcase' |
|
140 aliases = ['testcase'] |
|
141 |
|
142 def __init__(self, **options): |
|
143 Formatter.__init__(self, **options) |
|
144 if self.encoding is not None and self.encoding != 'utf-8': |
|
145 raise ValueError("Only None and utf-8 are allowed encodings.") |
|
146 |
|
147 def format(self, tokensource, outfile): |
|
148 indentation = ' ' * 12 |
|
149 rawbuf = [] |
|
150 outbuf = [] |
|
151 for ttype, value in tokensource: |
|
152 rawbuf.append(value) |
|
153 outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value)) |
|
154 |
|
155 before = TESTCASE_BEFORE % (''.join(rawbuf),) |
|
156 during = ''.join(outbuf) |
|
157 after = TESTCASE_AFTER |
|
158 if self.encoding is None: |
|
159 outfile.write(before + during + after) |
|
160 else: |
|
161 outfile.write(before.encode('utf-8')) |
|
162 outfile.write(during.encode('utf-8')) |
|
163 outfile.write(after.encode('utf-8')) |
|
164 outfile.flush() |
|