eric6/ThirdParty/Pygments/pygments/formatters/latex.py

changeset 6942
2602857055c5
parent 5713
6762afd9f963
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.formatters.latex
4 ~~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Formatter for LaTeX fancyvrb output.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 from __future__ import division
13
14 from pygments.formatter import Formatter
15 from pygments.lexer import Lexer
16 from pygments.token import Token, STANDARD_TYPES
17 from pygments.util import get_bool_opt, get_int_opt, StringIO, xrange, \
18 iteritems
19
20
21 __all__ = ['LatexFormatter']
22
23
24 def escape_tex(text, commandprefix):
25 return text.replace('\\', '\x00'). \
26 replace('{', '\x01'). \
27 replace('}', '\x02'). \
28 replace('\x00', r'\%sZbs{}' % commandprefix). \
29 replace('\x01', r'\%sZob{}' % commandprefix). \
30 replace('\x02', r'\%sZcb{}' % commandprefix). \
31 replace('^', r'\%sZca{}' % commandprefix). \
32 replace('_', r'\%sZus{}' % commandprefix). \
33 replace('&', r'\%sZam{}' % commandprefix). \
34 replace('<', r'\%sZlt{}' % commandprefix). \
35 replace('>', r'\%sZgt{}' % commandprefix). \
36 replace('#', r'\%sZsh{}' % commandprefix). \
37 replace('%', r'\%sZpc{}' % commandprefix). \
38 replace('$', r'\%sZdl{}' % commandprefix). \
39 replace('-', r'\%sZhy{}' % commandprefix). \
40 replace("'", r'\%sZsq{}' % commandprefix). \
41 replace('"', r'\%sZdq{}' % commandprefix). \
42 replace('~', r'\%sZti{}' % commandprefix)
43
44
45 DOC_TEMPLATE = r'''
46 \documentclass{%(docclass)s}
47 \usepackage{fancyvrb}
48 \usepackage{color}
49 \usepackage[%(encoding)s]{inputenc}
50 %(preamble)s
51
52 %(styledefs)s
53
54 \begin{document}
55
56 \section*{%(title)s}
57
58 %(code)s
59 \end{document}
60 '''
61
62 ## Small explanation of the mess below :)
63 #
64 # The previous version of the LaTeX formatter just assigned a command to
65 # each token type defined in the current style. That obviously is
66 # problematic if the highlighted code is produced for a different style
67 # than the style commands themselves.
68 #
69 # This version works much like the HTML formatter which assigns multiple
70 # CSS classes to each <span> tag, from the most specific to the least
71 # specific token type, thus falling back to the parent token type if one
72 # is not defined. Here, the classes are there too and use the same short
73 # forms given in token.STANDARD_TYPES.
74 #
75 # Highlighted code now only uses one custom command, which by default is
76 # \PY and selectable by the commandprefix option (and in addition the
77 # escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
78 # backwards compatibility purposes).
79 #
80 # \PY has two arguments: the classes, separated by +, and the text to
81 # render in that style. The classes are resolved into the respective
82 # style commands by magic, which serves to ignore unknown classes.
83 #
84 # The magic macros are:
85 # * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
86 # to render in \PY@do. Their definition determines the style.
87 # * \PY@reset resets \PY@it etc. to do nothing.
88 # * \PY@toks parses the list of classes, using magic inspired by the
89 # keyval package (but modified to use plusses instead of commas
90 # because fancyvrb redefines commas inside its environments).
91 # * \PY@tok processes one class, calling the \PY@tok@classname command
92 # if it exists.
93 # * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
94 # for its class.
95 # * \PY resets the style, parses the classnames and then calls \PY@do.
96 #
97 # Tip: to read this code, print it out in substituted form using e.g.
98 # >>> print STYLE_TEMPLATE % {'cp': 'PY'}
99
100 STYLE_TEMPLATE = r'''
101 \makeatletter
102 \def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
103 \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
104 \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
105 \def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
106 \def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
107 \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
108 \def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
109 \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
110 \def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}
111
112 %(styles)s
113
114 \def\%(cp)sZbs{\char`\\}
115 \def\%(cp)sZus{\char`\_}
116 \def\%(cp)sZob{\char`\{}
117 \def\%(cp)sZcb{\char`\}}
118 \def\%(cp)sZca{\char`\^}
119 \def\%(cp)sZam{\char`\&}
120 \def\%(cp)sZlt{\char`\<}
121 \def\%(cp)sZgt{\char`\>}
122 \def\%(cp)sZsh{\char`\#}
123 \def\%(cp)sZpc{\char`\%%}
124 \def\%(cp)sZdl{\char`\$}
125 \def\%(cp)sZhy{\char`\-}
126 \def\%(cp)sZsq{\char`\'}
127 \def\%(cp)sZdq{\char`\"}
128 \def\%(cp)sZti{\char`\~}
129 %% for compatibility with earlier versions
130 \def\%(cp)sZat{@}
131 \def\%(cp)sZlb{[}
132 \def\%(cp)sZrb{]}
133 \makeatother
134 '''
135
136
137 def _get_ttype_name(ttype):
138 fname = STANDARD_TYPES.get(ttype)
139 if fname:
140 return fname
141 aname = ''
142 while fname is None:
143 aname = ttype[-1] + aname
144 ttype = ttype.parent
145 fname = STANDARD_TYPES.get(ttype)
146 return fname + aname
147
148
149 class LatexFormatter(Formatter):
150 r"""
151 Format tokens as LaTeX code. This needs the `fancyvrb` and `color`
152 standard packages.
153
154 Without the `full` option, code is formatted as one ``Verbatim``
155 environment, like this:
156
157 .. sourcecode:: latex
158
159 \begin{Verbatim}[commandchars=\\\{\}]
160 \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
161 \PY{k}{pass}
162 \end{Verbatim}
163
164 The special command used here (``\PY``) and all the other macros it needs
165 are output by the `get_style_defs` method.
166
167 With the `full` option, a complete LaTeX document is output, including
168 the command definitions in the preamble.
169
170 The `get_style_defs()` method of a `LatexFormatter` returns a string
171 containing ``\def`` commands defining the macros needed inside the
172 ``Verbatim`` environments.
173
174 Additional options accepted:
175
176 `style`
177 The style to use, can be a string or a Style subclass (default:
178 ``'default'``).
179
180 `full`
181 Tells the formatter to output a "full" document, i.e. a complete
182 self-contained document (default: ``False``).
183
184 `title`
185 If `full` is true, the title that should be used to caption the
186 document (default: ``''``).
187
188 `docclass`
189 If the `full` option is enabled, this is the document class to use
190 (default: ``'article'``).
191
192 `preamble`
193 If the `full` option is enabled, this can be further preamble commands,
194 e.g. ``\usepackage`` (default: ``''``).
195
196 `linenos`
197 If set to ``True``, output line numbers (default: ``False``).
198
199 `linenostart`
200 The line number for the first line (default: ``1``).
201
202 `linenostep`
203 If set to a number n > 1, only every nth line number is printed.
204
205 `verboptions`
206 Additional options given to the Verbatim environment (see the *fancyvrb*
207 docs for possible values) (default: ``''``).
208
209 `commandprefix`
210 The LaTeX commands used to produce colored output are constructed
211 using this prefix and some letters (default: ``'PY'``).
212
213 .. versionadded:: 0.7
214 .. versionchanged:: 0.10
215 The default is now ``'PY'`` instead of ``'C'``.
216
217 `texcomments`
218 If set to ``True``, enables LaTeX comment lines. That is, LaTex markup
219 in comment tokens is not escaped so that LaTeX can render it (default:
220 ``False``).
221
222 .. versionadded:: 1.2
223
224 `mathescape`
225 If set to ``True``, enables LaTeX math mode escape in comments. That
226 is, ``'$...$'`` inside a comment will trigger math mode (default:
227 ``False``).
228
229 .. versionadded:: 1.2
230
231 `escapeinside`
232 If set to a string of length 2, enables escaping to LaTeX. Text
233 delimited by these 2 characters is read as LaTeX code and
234 typeset accordingly. It has no effect in string literals. It has
235 no effect in comments if `texcomments` or `mathescape` is
236 set. (default: ``''``).
237
238 .. versionadded:: 2.0
239
240 `envname`
241 Allows you to pick an alternative environment name replacing Verbatim.
242 The alternate environment still has to support Verbatim's option syntax.
243 (default: ``'Verbatim'``).
244
245 .. versionadded:: 2.0
246 """
247 name = 'LaTeX'
248 aliases = ['latex', 'tex']
249 filenames = ['*.tex']
250
251 def __init__(self, **options):
252 Formatter.__init__(self, **options)
253 self.docclass = options.get('docclass', 'article')
254 self.preamble = options.get('preamble', '')
255 self.linenos = get_bool_opt(options, 'linenos', False)
256 self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
257 self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
258 self.verboptions = options.get('verboptions', '')
259 self.nobackground = get_bool_opt(options, 'nobackground', False)
260 self.commandprefix = options.get('commandprefix', 'PY')
261 self.texcomments = get_bool_opt(options, 'texcomments', False)
262 self.mathescape = get_bool_opt(options, 'mathescape', False)
263 self.escapeinside = options.get('escapeinside', '')
264 if len(self.escapeinside) == 2:
265 self.left = self.escapeinside[0]
266 self.right = self.escapeinside[1]
267 else:
268 self.escapeinside = ''
269 self.envname = options.get('envname', u'Verbatim')
270
271 self._create_stylesheet()
272
273 def _create_stylesheet(self):
274 t2n = self.ttype2name = {Token: ''}
275 c2d = self.cmd2def = {}
276 cp = self.commandprefix
277
278 def rgbcolor(col):
279 if col:
280 return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0)
281 for i in (0, 2, 4)])
282 else:
283 return '1,1,1'
284
285 for ttype, ndef in self.style:
286 name = _get_ttype_name(ttype)
287 cmndef = ''
288 if ndef['bold']:
289 cmndef += r'\let\$$@bf=\textbf'
290 if ndef['italic']:
291 cmndef += r'\let\$$@it=\textit'
292 if ndef['underline']:
293 cmndef += r'\let\$$@ul=\underline'
294 if ndef['roman']:
295 cmndef += r'\let\$$@ff=\textrm'
296 if ndef['sans']:
297 cmndef += r'\let\$$@ff=\textsf'
298 if ndef['mono']:
299 cmndef += r'\let\$$@ff=\textsf'
300 if ndef['color']:
301 cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' %
302 rgbcolor(ndef['color']))
303 if ndef['border']:
304 cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}'
305 r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}' %
306 (rgbcolor(ndef['border']),
307 rgbcolor(ndef['bgcolor'])))
308 elif ndef['bgcolor']:
309 cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}'
310 r'\colorbox[rgb]{%s}{\strut ##1}}' %
311 rgbcolor(ndef['bgcolor']))
312 if cmndef == '':
313 continue
314 cmndef = cmndef.replace('$$', cp)
315 t2n[ttype] = name
316 c2d[name] = cmndef
317
318 def get_style_defs(self, arg=''):
319 """
320 Return the command sequences needed to define the commands
321 used to format text in the verbatim environment. ``arg`` is ignored.
322 """
323 cp = self.commandprefix
324 styles = []
325 for name, definition in iteritems(self.cmd2def):
326 styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' %
327 (cp, name, definition))
328 return STYLE_TEMPLATE % {'cp': self.commandprefix,
329 'styles': '\n'.join(styles)}
330
331 def format_unencoded(self, tokensource, outfile):
332 # TODO: add support for background colors
333 t2n = self.ttype2name
334 cp = self.commandprefix
335
336 if self.full:
337 realoutfile = outfile
338 outfile = StringIO()
339
340 outfile.write(u'\\begin{' + self.envname + u'}[commandchars=\\\\\\{\\}')
341 if self.linenos:
342 start, step = self.linenostart, self.linenostep
343 outfile.write(u',numbers=left' +
344 (start and u',firstnumber=%d' % start or u'') +
345 (step and u',stepnumber=%d' % step or u''))
346 if self.mathescape or self.texcomments or self.escapeinside:
347 outfile.write(u',codes={\\catcode`\\$=3\\catcode`\\^=7\\catcode`\\_=8}')
348 if self.verboptions:
349 outfile.write(u',' + self.verboptions)
350 outfile.write(u']\n')
351
352 for ttype, value in tokensource:
353 if ttype in Token.Comment:
354 if self.texcomments:
355 # Try to guess comment starting lexeme and escape it ...
356 start = value[0:1]
357 for i in xrange(1, len(value)):
358 if start[0] != value[i]:
359 break
360 start += value[i]
361
362 value = value[len(start):]
363 start = escape_tex(start, cp)
364
365 # ... but do not escape inside comment.
366 value = start + value
367 elif self.mathescape:
368 # Only escape parts not inside a math environment.
369 parts = value.split('$')
370 in_math = False
371 for i, part in enumerate(parts):
372 if not in_math:
373 parts[i] = escape_tex(part, cp)
374 in_math = not in_math
375 value = '$'.join(parts)
376 elif self.escapeinside:
377 text = value
378 value = ''
379 while text:
380 a, sep1, text = text.partition(self.left)
381 if sep1:
382 b, sep2, text = text.partition(self.right)
383 if sep2:
384 value += escape_tex(a, cp) + b
385 else:
386 value += escape_tex(a + sep1 + b, cp)
387 else:
388 value += escape_tex(a, cp)
389 else:
390 value = escape_tex(value, cp)
391 elif ttype not in Token.Escape:
392 value = escape_tex(value, cp)
393 styles = []
394 while ttype is not Token:
395 try:
396 styles.append(t2n[ttype])
397 except KeyError:
398 # not in current style
399 styles.append(_get_ttype_name(ttype))
400 ttype = ttype.parent
401 styleval = '+'.join(reversed(styles))
402 if styleval:
403 spl = value.split('\n')
404 for line in spl[:-1]:
405 if line:
406 outfile.write("\\%s{%s}{%s}" % (cp, styleval, line))
407 outfile.write('\n')
408 if spl[-1]:
409 outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1]))
410 else:
411 outfile.write(value)
412
413 outfile.write(u'\\end{' + self.envname + u'}\n')
414
415 if self.full:
416 encoding = self.encoding or 'utf8'
417 # map known existings encodings from LaTeX distribution
418 encoding = {
419 'utf_8': 'utf8',
420 'latin_1': 'latin1',
421 'iso_8859_1': 'latin1',
422 }.get(encoding.replace('-', '_'), encoding)
423 realoutfile.write(DOC_TEMPLATE %
424 dict(docclass = self.docclass,
425 preamble = self.preamble,
426 title = self.title,
427 encoding = encoding,
428 styledefs = self.get_style_defs(),
429 code = outfile.getvalue()))
430
431
432 class LatexEmbeddedLexer(Lexer):
433 """
434 This lexer takes one lexer as argument, the lexer for the language
435 being formatted, and the left and right delimiters for escaped text.
436
437 First everything is scanned using the language lexer to obtain
438 strings and comments. All other consecutive tokens are merged and
439 the resulting text is scanned for escaped segments, which are given
440 the Token.Escape type. Finally text that is not escaped is scanned
441 again with the language lexer.
442 """
443 def __init__(self, left, right, lang, **options):
444 self.left = left
445 self.right = right
446 self.lang = lang
447 Lexer.__init__(self, **options)
448
449 def get_tokens_unprocessed(self, text):
450 buf = ''
451 idx = 0
452 for i, t, v in self.lang.get_tokens_unprocessed(text):
453 if t in Token.Comment or t in Token.String:
454 if buf:
455 for x in self.get_tokens_aux(idx, buf):
456 yield x
457 buf = ''
458 yield i, t, v
459 else:
460 if not buf:
461 idx = i
462 buf += v
463 if buf:
464 for x in self.get_tokens_aux(idx, buf):
465 yield x
466
467 def get_tokens_aux(self, index, text):
468 while text:
469 a, sep1, text = text.partition(self.left)
470 if a:
471 for i, t, v in self.lang.get_tokens_unprocessed(a):
472 yield index + i, t, v
473 index += len(a)
474 if sep1:
475 b, sep2, text = text.partition(self.right)
476 if sep2:
477 yield index + len(sep1), Token.Escape, b
478 index += len(sep1) + len(b) + len(sep2)
479 else:
480 yield index, Token.Error, sep1
481 index += len(sep1)
482 text = b

eric ide

mercurial