|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.formatters.other |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Other formatters: NullFormatter, RawTokenFormatter. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from pygments.formatter import Formatter |
|
13 from pygments.util import OptionError, get_choice_opt |
|
14 from pygments.token import Token |
|
15 from pygments.console import colorize |
|
16 |
|
17 __all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter'] |
|
18 |
|
19 |
|
20 class NullFormatter(Formatter): |
|
21 """ |
|
22 Output the text unchanged without any formatting. |
|
23 """ |
|
24 name = 'Text only' |
|
25 aliases = ['text', 'null'] |
|
26 filenames = ['*.txt'] |
|
27 |
|
28 def format(self, tokensource, outfile): |
|
29 enc = self.encoding |
|
30 for ttype, value in tokensource: |
|
31 if enc: |
|
32 outfile.write(value.encode(enc)) |
|
33 else: |
|
34 outfile.write(value) |
|
35 |
|
36 |
|
37 class RawTokenFormatter(Formatter): |
|
38 r""" |
|
39 Format tokens as a raw representation for storing token streams. |
|
40 |
|
41 The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later |
|
42 be converted to a token stream with the `RawTokenLexer`, described in the |
|
43 :doc:`lexer list <lexers>`. |
|
44 |
|
45 Only two options are accepted: |
|
46 |
|
47 `compress` |
|
48 If set to ``'gz'`` or ``'bz2'``, compress the output with the given |
|
49 compression algorithm after encoding (default: ``''``). |
|
50 `error_color` |
|
51 If set to a color name, highlight error tokens using that color. If |
|
52 set but with no value, defaults to ``'red'``. |
|
53 |
|
54 .. versionadded:: 0.11 |
|
55 |
|
56 """ |
|
57 name = 'Raw tokens' |
|
58 aliases = ['raw', 'tokens'] |
|
59 filenames = ['*.raw'] |
|
60 |
|
61 unicodeoutput = False |
|
62 |
|
63 def __init__(self, **options): |
|
64 Formatter.__init__(self, **options) |
|
65 # We ignore self.encoding if it is set, since it gets set for lexer |
|
66 # and formatter if given with -Oencoding on the command line. |
|
67 # The RawTokenFormatter outputs only ASCII. Override here. |
|
68 self.encoding = 'ascii' # let pygments.format() do the right thing |
|
69 self.compress = get_choice_opt(options, 'compress', |
|
70 ['', 'none', 'gz', 'bz2'], '') |
|
71 self.error_color = options.get('error_color', None) |
|
72 if self.error_color is True: |
|
73 self.error_color = 'red' |
|
74 if self.error_color is not None: |
|
75 try: |
|
76 colorize(self.error_color, '') |
|
77 except KeyError: |
|
78 raise ValueError("Invalid color %r specified" % |
|
79 self.error_color) |
|
80 |
|
81 def format(self, tokensource, outfile): |
|
82 try: |
|
83 outfile.write(b'') |
|
84 except TypeError: |
|
85 raise TypeError('The raw tokens formatter needs a binary ' |
|
86 'output file') |
|
87 if self.compress == 'gz': |
|
88 import gzip |
|
89 outfile = gzip.GzipFile('', 'wb', 9, outfile) |
|
90 def write(text): |
|
91 outfile.write(text.encode()) |
|
92 flush = outfile.flush |
|
93 elif self.compress == 'bz2': |
|
94 import bz2 |
|
95 compressor = bz2.BZ2Compressor(9) |
|
96 def write(text): |
|
97 outfile.write(compressor.compress(text.encode())) |
|
98 def flush(): |
|
99 outfile.write(compressor.flush()) |
|
100 outfile.flush() |
|
101 else: |
|
102 def write(text): |
|
103 outfile.write(text.encode()) |
|
104 flush = outfile.flush |
|
105 |
|
106 if self.error_color: |
|
107 for ttype, value in tokensource: |
|
108 line = "%s\t%r\n" % (ttype, value) |
|
109 if ttype is Token.Error: |
|
110 write(colorize(self.error_color, line)) |
|
111 else: |
|
112 write(line) |
|
113 else: |
|
114 for ttype, value in tokensource: |
|
115 write("%s\t%r\n" % (ttype, value)) |
|
116 flush() |
|
117 |
|
118 TESTCASE_BEFORE = u'''\ |
|
119 def testNeedsName(self): |
|
120 fragment = %r |
|
121 tokens = [ |
|
122 ''' |
|
123 TESTCASE_AFTER = u'''\ |
|
124 ] |
|
125 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment))) |
|
126 ''' |
|
127 |
|
128 |
|
129 class TestcaseFormatter(Formatter): |
|
130 """ |
|
131 Format tokens as appropriate for a new testcase. |
|
132 |
|
133 .. versionadded:: 2.0 |
|
134 """ |
|
135 name = 'Testcase' |
|
136 aliases = ['testcase'] |
|
137 |
|
138 def __init__(self, **options): |
|
139 Formatter.__init__(self, **options) |
|
140 if self.encoding is not None and self.encoding != 'utf-8': |
|
141 raise ValueError("Only None and utf-8 are allowed encodings.") |
|
142 |
|
143 def format(self, tokensource, outfile): |
|
144 indentation = ' ' * 12 |
|
145 rawbuf = [] |
|
146 outbuf = [] |
|
147 for ttype, value in tokensource: |
|
148 rawbuf.append(value) |
|
149 outbuf.append('%s(%s, %r),\n' % (indentation, ttype, value)) |
|
150 |
|
151 before = TESTCASE_BEFORE % (u''.join(rawbuf),) |
|
152 during = u''.join(outbuf) |
|
153 after = TESTCASE_AFTER |
|
154 if self.encoding is None: |
|
155 outfile.write(before + during + after) |
|
156 else: |
|
157 outfile.write(before.encode('utf-8')) |
|
158 outfile.write(during.encode('utf-8')) |
|
159 outfile.write(after.encode('utf-8')) |
|
160 outfile.flush() |