|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.formatters.latex |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Formatter for LaTeX fancyvrb output. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 from __future__ import division |
|
13 |
|
14 from pygments.formatter import Formatter |
|
15 from pygments.lexer import Lexer |
|
16 from pygments.token import Token, STANDARD_TYPES |
|
17 from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \ |
|
18 iteritems |
|
19 |
|
20 |
|
21 __all__ = ['LatexFormatter'] |
|
22 |
|
23 |
|
24 def escape_tex(text, commandprefix): |
|
25 return text.replace('\\', '\x00'). \ |
|
26 replace('{', '\x01'). \ |
|
27 replace('}', '\x02'). \ |
|
28 replace('\x00', r'\%sZbs{}' % commandprefix). \ |
|
29 replace('\x01', r'\%sZob{}' % commandprefix). \ |
|
30 replace('\x02', r'\%sZcb{}' % commandprefix). \ |
|
31 replace('^', r'\%sZca{}' % commandprefix). \ |
|
32 replace('_', r'\%sZus{}' % commandprefix). \ |
|
33 replace('&', r'\%sZam{}' % commandprefix). \ |
|
34 replace('<', r'\%sZlt{}' % commandprefix). \ |
|
35 replace('>', r'\%sZgt{}' % commandprefix). \ |
|
36 replace('#', r'\%sZsh{}' % commandprefix). \ |
|
37 replace('%', r'\%sZpc{}' % commandprefix). \ |
|
38 replace('$', r'\%sZdl{}' % commandprefix). \ |
|
39 replace('-', r'\%sZhy{}' % commandprefix). \ |
|
40 replace("'", r'\%sZsq{}' % commandprefix). \ |
|
41 replace('"', r'\%sZdq{}' % commandprefix). \ |
|
42 replace('~', r'\%sZti{}' % commandprefix) |
|
43 |
|
44 |
|
45 DOC_TEMPLATE = r''' |
|
46 \documentclass{%(docclass)s} |
|
47 \usepackage{fancyvrb} |
|
48 \usepackage{color} |
|
49 \usepackage[%(encoding)s]{inputenc} |
|
50 %(preamble)s |
|
51 |
|
52 %(styledefs)s |
|
53 |
|
54 \begin{document} |
|
55 |
|
56 \section*{%(title)s} |
|
57 |
|
58 %(code)s |
|
59 \end{document} |
|
60 ''' |
|
61 |
|
62 ## Small explanation of the mess below :) |
|
63 # |
|
64 # The previous version of the LaTeX formatter just assigned a command to |
|
65 # each token type defined in the current style. That obviously is |
|
66 # problematic if the highlighted code is produced for a different style |
|
67 # than the style commands themselves. |
|
68 # |
|
69 # This version works much like the HTML formatter which assigns multiple |
|
70 # CSS classes to each <span> tag, from the most specific to the least |
|
71 # specific token type, thus falling back to the parent token type if one |
|
72 # is not defined. Here, the classes are there too and use the same short |
|
73 # forms given in token.STANDARD_TYPES. |
|
74 # |
|
75 # Highlighted code now only uses one custom command, which by default is |
|
76 # \PY and selectable by the commandprefix option (and in addition the |
|
77 # escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for |
|
78 # backwards compatibility purposes). |
|
79 # |
|
80 # \PY has two arguments: the classes, separated by +, and the text to |
|
81 # render in that style. The classes are resolved into the respective |
|
82 # style commands by magic, which serves to ignore unknown classes. |
|
83 # |
|
84 # The magic macros are: |
|
85 # * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text |
|
86 # to render in \PY@do. Their definition determines the style. |
|
87 # * \PY@reset resets \PY@it etc. to do nothing. |
|
88 # * \PY@toks parses the list of classes, using magic inspired by the |
|
89 # keyval package (but modified to use plusses instead of commas |
|
90 # because fancyvrb redefines commas inside its environments). |
|
91 # * \PY@tok processes one class, calling the \PY@tok@classname command |
|
92 # if it exists. |
|
93 # * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style |
|
94 # for its class. |
|
95 # * \PY resets the style, parses the classnames and then calls \PY@do. |
|
96 # |
|
97 # Tip: to read this code, print it out in substituted form using e.g. |
|
98 # >>> print STYLE_TEMPLATE % {'cp': 'PY'} |
|
99 |
|
100 STYLE_TEMPLATE = r''' |
|
101 \makeatletter |
|
102 \def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% |
|
103 \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% |
|
104 \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} |
|
105 \def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} |
|
106 \def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% |
|
107 \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} |
|
108 \def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% |
|
109 \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} |
|
110 \def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} |
|
111 |
|
112 %(styles)s |
|
113 |
|
114 \def\%(cp)sZbs{\char`\\} |
|
115 \def\%(cp)sZus{\char`\_} |
|
116 \def\%(cp)sZob{\char`\{} |
|
117 \def\%(cp)sZcb{\char`\}} |
|
118 \def\%(cp)sZca{\char`\^} |
|
119 \def\%(cp)sZam{\char`\&} |
|
120 \def\%(cp)sZlt{\char`\<} |
|
121 \def\%(cp)sZgt{\char`\>} |
|
122 \def\%(cp)sZsh{\char`\#} |
|
123 \def\%(cp)sZpc{\char`\%%} |
|
124 \def\%(cp)sZdl{\char`\$} |
|
125 \def\%(cp)sZhy{\char`\-} |
|
126 \def\%(cp)sZsq{\char`\'} |
|
127 \def\%(cp)sZdq{\char`\"} |
|
128 \def\%(cp)sZti{\char`\~} |
|
129 %% for compatibility with earlier versions |
|
130 \def\%(cp)sZat{@} |
|
131 \def\%(cp)sZlb{[} |
|
132 \def\%(cp)sZrb{]} |
|
133 \makeatother |
|
134 ''' |
|
135 |
|
136 |
|
137 def _get_ttype_name(ttype): |
|
138 fname = STANDARD_TYPES.get(ttype) |
|
139 if fname: |
|
140 return fname |
|
141 aname = '' |
|
142 while fname is None: |
|
143 aname = ttype[-1] + aname |
|
144 ttype = ttype.parent |
|
145 fname = STANDARD_TYPES.get(ttype) |
|
146 return fname + aname |
|
147 |
|
148 |
|
149 class LatexFormatter(Formatter): |
|
150 r""" |
|
151 Format tokens as LaTeX code. This needs the `fancyvrb` and `color` |
|
152 standard packages. |
|
153 |
|
154 Without the `full` option, code is formatted as one ``Verbatim`` |
|
155 environment, like this: |
|
156 |
|
157 .. sourcecode:: latex |
|
158 |
|
159 \begin{Verbatim}[commandchars=\\\{\}] |
|
160 \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): |
|
161 \PY{k}{pass} |
|
162 \end{Verbatim} |
|
163 |
|
164 The special command used here (``\PY``) and all the other macros it needs |
|
165 are output by the `get_style_defs` method. |
|
166 |
|
167 With the `full` option, a complete LaTeX document is output, including |
|
168 the command definitions in the preamble. |
|
169 |
|
170 The `get_style_defs()` method of a `LatexFormatter` returns a string |
|
171 containing ``\def`` commands defining the macros needed inside the |
|
172 ``Verbatim`` environments. |
|
173 |
|
174 Additional options accepted: |
|
175 |
|
176 `style` |
|
177 The style to use, can be a string or a Style subclass (default: |
|
178 ``'default'``). |
|
179 |
|
180 `full` |
|
181 Tells the formatter to output a "full" document, i.e. a complete |
|
182 self-contained document (default: ``False``). |
|
183 |
|
184 `title` |
|
185 If `full` is true, the title that should be used to caption the |
|
186 document (default: ``''``). |
|
187 |
|
188 `docclass` |
|
189 If the `full` option is enabled, this is the document class to use |
|
190 (default: ``'article'``). |
|
191 |
|
192 `preamble` |
|
193 If the `full` option is enabled, this can be further preamble commands, |
|
194 e.g. ``\usepackage`` (default: ``''``). |
|
195 |
|
196 `linenos` |
|
197 If set to ``True``, output line numbers (default: ``False``). |
|
198 |
|
199 `linenostart` |
|
200 The line number for the first line (default: ``1``). |
|
201 |
|
202 `linenostep` |
|
203 If set to a number n > 1, only every nth line number is printed. |
|
204 |
|
205 `verboptions` |
|
206 Additional options given to the Verbatim environment (see the *fancyvrb* |
|
207 docs for possible values) (default: ``''``). |
|
208 |
|
209 `commandprefix` |
|
210 The LaTeX commands used to produce colored output are constructed |
|
211 using this prefix and some letters (default: ``'PY'``). |
|
212 |
|
213 .. versionadded:: 0.7 |
|
214 .. versionchanged:: 0.10 |
|
215 The default is now ``'PY'`` instead of ``'C'``. |
|
216 |
|
217 `texcomments` |
|
218 If set to ``True``, enables LaTeX comment lines. That is, LaTex markup |
|
219 in comment tokens is not escaped so that LaTeX can render it (default: |
|
220 ``False``). |
|
221 |
|
222 .. versionadded:: 1.2 |
|
223 |
|
224 `mathescape` |
|
225 If set to ``True``, enables LaTeX math mode escape in comments. That |
|
226 is, ``'$...$'`` inside a comment will trigger math mode (default: |
|
227 ``False``). |
|
228 |
|
229 .. versionadded:: 1.2 |
|
230 |
|
231 `escapeinside` |
|
232 If set to a string of length 2, enables escaping to LaTeX. Text |
|
233 delimited by these 2 characters is read as LaTeX code and |
|
234 typeset accordingly. It has no effect in string literals. It has |
|
235 no effect in comments if `texcomments` or `mathescape` is |
|
236 set. (default: ``''``). |
|
237 |
|
238 .. versionadded:: 2.0 |
|
239 |
|
240 `envname` |
|
241 Allows you to pick an alternative environment name replacing Verbatim. |
|
242 The alternate environment still has to support Verbatim's option syntax. |
|
243 (default: ``'Verbatim'``). |
|
244 |
|
245 .. versionadded:: 2.0 |
|
246 """ |
|
247 name = 'LaTeX' |
|
248 aliases = ['latex', 'tex'] |
|
249 filenames = ['*.tex'] |
|
250 |
|
251 def __init__(self, **options): |
|
252 Formatter.__init__(self, **options) |
|
253 self.docclass = options.get('docclass', 'article') |
|
254 self.preamble = options.get('preamble', '') |
|
255 self.linenos = get_bool_opt(options, 'linenos', False) |
|
256 self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) |
|
257 self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) |
|
258 self.verboptions = options.get('verboptions', '') |
|
259 self.nobackground = get_bool_opt(options, 'nobackground', False) |
|
260 self.commandprefix = options.get('commandprefix', 'PY') |
|
261 self.texcomments = get_bool_opt(options, 'texcomments', False) |
|
262 self.mathescape = get_bool_opt(options, 'mathescape', False) |
|
263 self.escapeinside = options.get('escapeinside', '') |
|
264 if len(self.escapeinside) == 2: |
|
265 self.left = self.escapeinside[0] |
|
266 self.right = self.escapeinside[1] |
|
267 else: |
|
268 self.escapeinside = '' |
|
269 self.envname = options.get('envname', u'Verbatim') |
|
270 |
|
271 self._create_stylesheet() |
|
272 |
|
273 def _create_stylesheet(self): |
|
274 t2n = self.ttype2name = {Token: ''} |
|
275 c2d = self.cmd2def = {} |
|
276 cp = self.commandprefix |
|
277 |
|
278 def rgbcolor(col): |
|
279 if col: |
|
280 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0) |
|
281 for i in (0, 2, 4)]) |
|
282 else: |
|
283 return '1,1,1' |
|
284 |
|
285 for ttype, ndef in self.style: |
|
286 name = _get_ttype_name(ttype) |
|
287 cmndef = '' |
|
288 if ndef['bold']: |
|
289 cmndef += r'\let\$$@bf=\textbf' |
|
290 if ndef['italic']: |
|
291 cmndef += r'\let\$$@it=\textit' |
|
292 if ndef['underline']: |
|
293 cmndef += r'\let\$$@ul=\underline' |
|
294 if ndef['roman']: |
|
295 cmndef += r'\let\$$@ff=\textrm' |
|
296 if ndef['sans']: |
|
297 cmndef += r'\let\$$@ff=\textsf' |
|
298 if ndef['mono']: |
|
299 cmndef += r'\let\$$@ff=\textsf' |
|
300 if ndef['color']: |
|
301 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % |
|
302 rgbcolor(ndef['color'])) |
|
303 if ndef['border']: |
|
304 cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}' |
|
305 r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}' % |
|
306 (rgbcolor(ndef['border']), |
|
307 rgbcolor(ndef['bgcolor']))) |
|
308 elif ndef['bgcolor']: |
|
309 cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}' |
|
310 r'\colorbox[rgb]{%s}{\strut ##1}}' % |
|
311 rgbcolor(ndef['bgcolor'])) |
|
312 if cmndef == '': |
|
313 continue |
|
314 cmndef = cmndef.replace('$$', cp) |
|
315 t2n[ttype] = name |
|
316 c2d[name] = cmndef |
|
317 |
|
318 def get_style_defs(self, arg=''): |
|
319 """ |
|
320 Return the command sequences needed to define the commands |
|
321 used to format text in the verbatim environment. ``arg`` is ignored. |
|
322 """ |
|
323 cp = self.commandprefix |
|
324 styles = [] |
|
325 for name, definition in iteritems(self.cmd2def): |
|
326 styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % |
|
327 (cp, name, definition)) |
|
328 return STYLE_TEMPLATE % {'cp': self.commandprefix, |
|
329 'styles': '\n'.join(styles)} |
|
330 |
|
331 def format_unencoded(self, tokensource, outfile): |
|
332 # TODO: add support for background colors |
|
333 t2n = self.ttype2name |
|
334 cp = self.commandprefix |
|
335 |
|
336 if self.full: |
|
337 realoutfile = outfile |
|
338 outfile = StringIO() |
|
339 |
|
340 outfile.write(u'\\begin{' + self.envname + u'}[commandchars=\\\\\\{\\}') |
|
341 if self.linenos: |
|
342 start, step = self.linenostart, self.linenostep |
|
343 outfile.write(u',numbers=left' + |
|
344 (start and u',firstnumber=%d' % start or u'') + |
|
345 (step and u',stepnumber=%d' % step or u'')) |
|
346 if self.mathescape or self.texcomments or self.escapeinside: |
|
347 outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}') |
|
348 if self.verboptions: |
|
349 outfile.write(u',' + self.verboptions) |
|
350 outfile.write(u']\n') |
|
351 |
|
352 for ttype, value in tokensource: |
|
353 if ttype in Token.Comment: |
|
354 if self.texcomments: |
|
355 # Try to guess comment starting lexeme and escape it ... |
|
356 start = value[0:1] |
|
357 for i in xrange(1, len(value)): |
|
358 if start[0] != value[i]: |
|
359 break |
|
360 start += value[i] |
|
361 |
|
362 value = value[len(start):] |
|
363 start = escape_tex(start, cp) |
|
364 |
|
365 # ... but do not escape inside comment. |
|
366 value = start + value |
|
367 elif self.mathescape: |
|
368 # Only escape parts not inside a math environment. |
|
369 parts = value.split('$') |
|
370 in_math = False |
|
371 for i, part in enumerate(parts): |
|
372 if not in_math: |
|
373 parts[i] = escape_tex(part, cp) |
|
374 in_math = not in_math |
|
375 value = '$'.join(parts) |
|
376 elif self.escapeinside: |
|
377 text = value |
|
378 value = '' |
|
379 while text: |
|
380 a, sep1, text = text.partition(self.left) |
|
381 if sep1: |
|
382 b, sep2, text = text.partition(self.right) |
|
383 if sep2: |
|
384 value += escape_tex(a, cp) + b |
|
385 else: |
|
386 value += escape_tex(a + sep1 + b, cp) |
|
387 else: |
|
388 value += escape_tex(a, cp) |
|
389 else: |
|
390 value = escape_tex(value, cp) |
|
391 elif ttype not in Token.Escape: |
|
392 value = escape_tex(value, cp) |
|
393 styles = [] |
|
394 while ttype is not Token: |
|
395 try: |
|
396 styles.append(t2n[ttype]) |
|
397 except KeyError: |
|
398 # not in current style |
|
399 styles.append(_get_ttype_name(ttype)) |
|
400 ttype = ttype.parent |
|
401 styleval = '+'.join(reversed(styles)) |
|
402 if styleval: |
|
403 spl = value.split('\n') |
|
404 for line in spl[:-1]: |
|
405 if line: |
|
406 outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) |
|
407 outfile.write('\n') |
|
408 if spl[-1]: |
|
409 outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) |
|
410 else: |
|
411 outfile.write(value) |
|
412 |
|
413 outfile.write(u'\\end{' + self.envname + u'}\n') |
|
414 |
|
415 if self.full: |
|
416 encoding = self.encoding or 'utf8' |
|
417 # map known existings encodings from LaTeX distribution |
|
418 encoding = { |
|
419 'utf_8': 'utf8', |
|
420 'latin_1': 'latin1', |
|
421 'iso_8859_1': 'latin1', |
|
422 }.get(encoding.replace('-', '_'), encoding) |
|
423 realoutfile.write(DOC_TEMPLATE % |
|
424 dict(docclass = self.docclass, |
|
425 preamble = self.preamble, |
|
426 title = self.title, |
|
427 encoding = encoding, |
|
428 styledefs = self.get_style_defs(), |
|
429 code = outfile.getvalue())) |
|
430 |
|
431 |
|
432 class LatexEmbeddedLexer(Lexer): |
|
433 """ |
|
434 This lexer takes one lexer as argument, the lexer for the language |
|
435 being formatted, and the left and right delimiters for escaped text. |
|
436 |
|
437 First everything is scanned using the language lexer to obtain |
|
438 strings and comments. All other consecutive tokens are merged and |
|
439 the resulting text is scanned for escaped segments, which are given |
|
440 the Token.Escape type. Finally text that is not escaped is scanned |
|
441 again with the language lexer. |
|
442 """ |
|
443 def __init__(self, left, right, lang, **options): |
|
444 self.left = left |
|
445 self.right = right |
|
446 self.lang = lang |
|
447 Lexer.__init__(self, **options) |
|
448 |
|
449 def get_tokens_unprocessed(self, text): |
|
450 buf = '' |
|
451 idx = 0 |
|
452 for i, t, v in self.lang.get_tokens_unprocessed(text): |
|
453 if t in Token.Comment or t in Token.String: |
|
454 if buf: |
|
455 for x in self.get_tokens_aux(idx, buf): |
|
456 yield x |
|
457 buf = '' |
|
458 yield i, t, v |
|
459 else: |
|
460 if not buf: |
|
461 idx = i |
|
462 buf += v |
|
463 if buf: |
|
464 for x in self.get_tokens_aux(idx, buf): |
|
465 yield x |
|
466 |
|
467 def get_tokens_aux(self, index, text): |
|
468 while text: |
|
469 a, sep1, text = text.partition(self.left) |
|
470 if a: |
|
471 for i, t, v in self.lang.get_tokens_unprocessed(a): |
|
472 yield index + i, t, v |
|
473 index += len(a) |
|
474 if sep1: |
|
475 b, sep2, text = text.partition(self.right) |
|
476 if sep2: |
|
477 yield index + len(sep1), Token.Escape, b |
|
478 index += len(sep1) + len(b) + len(sep2) |
|
479 else: |
|
480 yield index, Token.Error, sep1 |
|
481 index += len(sep1) |
|
482 text = b |