ThirdParty/Pygments/pygments/formatters/html.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.formatters.html
4 ~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Formatter for HTML output.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11 import sys, os
12 import StringIO
13
14 try:
15 set
16 except NameError:
17 from sets import Set as set
18
19 from pygments.formatter import Formatter
20 from pygments.token import Token, Text, STANDARD_TYPES
21 from pygments.util import get_bool_opt, get_int_opt, get_list_opt, bytes
22
23
24 __all__ = ['HtmlFormatter']
25
26
27 def escape_html(text):
28 """Escape &, <, > as well as single and double quotes for HTML."""
29 return text.replace('&', '&amp;'). \
30 replace('<', '&lt;'). \
31 replace('>', '&gt;'). \
32 replace('"', '&quot;'). \
33 replace("'", '&#39;')
34
35
36 def get_random_id():
37 """Return a random id for javascript fields."""
38 from random import random
39 from time import time
40 try:
41 from hashlib import sha1 as sha
42 except ImportError:
43 import sha
44 sha = sha.new
45 return sha('%s|%s' % (random(), time())).hexdigest()
46
47
48 def _get_ttype_class(ttype):
49 fname = STANDARD_TYPES.get(ttype)
50 if fname:
51 return fname
52 aname = ''
53 while fname is None:
54 aname = '-' + ttype[-1] + aname
55 ttype = ttype.parent
56 fname = STANDARD_TYPES.get(ttype)
57 return fname + aname
58
59
60 CSSFILE_TEMPLATE = '''\
61 td.linenos { background-color: #f0f0f0; padding-right: 10px; }
62 span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
63 pre { line-height: 125%%; }
64 %(styledefs)s
65 '''
66
67 DOC_HEADER = '''\
68 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
69 "http://www.w3.org/TR/html4/strict.dtd">
70
71 <html>
72 <head>
73 <title>%(title)s</title>
74 <meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
75 <style type="text/css">
76 ''' + CSSFILE_TEMPLATE + '''
77 </style>
78 </head>
79 <body>
80 <h2>%(title)s</h2>
81
82 '''
83
84 DOC_HEADER_EXTERNALCSS = '''\
85 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
86 "http://www.w3.org/TR/html4/strict.dtd">
87
88 <html>
89 <head>
90 <title>%(title)s</title>
91 <meta http-equiv="content-type" content="text/html; charset=%(encoding)s">
92 <link rel="stylesheet" href="%(cssfile)s" type="text/css">
93 </head>
94 <body>
95 <h2>%(title)s</h2>
96
97 '''
98
99 DOC_FOOTER = '''\
100 </body>
101 </html>
102 '''
103
104
105 class HtmlFormatter(Formatter):
106 r"""
107 Format tokens as HTML 4 ``<span>`` tags within a ``<pre>`` tag, wrapped
108 in a ``<div>`` tag. The ``<div>``'s CSS class can be set by the `cssclass`
109 option.
110
111 If the `linenos` option is set to ``"table"``, the ``<pre>`` is
112 additionally wrapped inside a ``<table>`` which has one row and two
113 cells: one containing the line numbers and one containing the code.
114 Example:
115
116 .. sourcecode:: html
117
118 <div class="highlight" >
119 <table><tr>
120 <td class="linenos" title="click to toggle"
121 onclick="with (this.firstChild.style)
122 { display = (display == '') ? 'none' : '' }">
123 <pre>1
124 2</pre>
125 </td>
126 <td class="code">
127 <pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
128 <span class="Ke">pass</span>
129 </pre>
130 </td>
131 </tr></table></div>
132
133 (whitespace added to improve clarity).
134
135 Wrapping can be disabled using the `nowrap` option.
136
137 A list of lines can be specified using the `hl_lines` option to make these
138 lines highlighted (as of Pygments 0.11).
139
140 With the `full` option, a complete HTML 4 document is output, including
141 the style definitions inside a ``<style>`` tag, or in a separate file if
142 the `cssfile` option is given.
143
144 The `get_style_defs(arg='')` method of a `HtmlFormatter` returns a string
145 containing CSS rules for the CSS classes used by the formatter. The
146 argument `arg` can be used to specify additional CSS selectors that
147 are prepended to the classes. A call `fmter.get_style_defs('td .code')`
148 would result in the following CSS classes:
149
150 .. sourcecode:: css
151
152 td .code .kw { font-weight: bold; color: #00FF00 }
153 td .code .cm { color: #999999 }
154 ...
155
156 If you have Pygments 0.6 or higher, you can also pass a list or tuple to the
157 `get_style_defs()` method to request multiple prefixes for the tokens:
158
159 .. sourcecode:: python
160
161 formatter.get_style_defs(['div.syntax pre', 'pre.syntax'])
162
163 The output would then look like this:
164
165 .. sourcecode:: css
166
167 div.syntax pre .kw,
168 pre.syntax .kw { font-weight: bold; color: #00FF00 }
169 div.syntax pre .cm,
170 pre.syntax .cm { color: #999999 }
171 ...
172
173 Additional options accepted:
174
175 `nowrap`
176 If set to ``True``, don't wrap the tokens at all, not even inside a ``<pre>``
177 tag. This disables most other options (default: ``False``).
178
179 `full`
180 Tells the formatter to output a "full" document, i.e. a complete
181 self-contained document (default: ``False``).
182
183 `title`
184 If `full` is true, the title that should be used to caption the
185 document (default: ``''``).
186
187 `style`
188 The style to use, can be a string or a Style subclass (default:
189 ``'default'``). This option has no effect if the `cssfile`
190 and `noclobber_cssfile` option are given and the file specified in
191 `cssfile` exists.
192
193 `noclasses`
194 If set to true, token ``<span>`` tags will not use CSS classes, but
195 inline styles. This is not recommended for larger pieces of code since
196 it increases output size by quite a bit (default: ``False``).
197
198 `classprefix`
199 Since the token types use relatively short class names, they may clash
200 with some of your own class names. In this case you can use the
201 `classprefix` option to give a string to prepend to all Pygments-generated
202 CSS class names for token types.
203 Note that this option also affects the output of `get_style_defs()`.
204
205 `cssclass`
206 CSS class for the wrapping ``<div>`` tag (default: ``'highlight'``).
207 If you set this option, the default selector for `get_style_defs()`
208 will be this class.
209
210 *New in Pygments 0.9:* If you select the ``'table'`` line numbers, the
211 wrapping table will have a CSS class of this string plus ``'table'``,
212 the default is accordingly ``'highlighttable'``.
213
214 `cssstyles`
215 Inline CSS styles for the wrapping ``<div>`` tag (default: ``''``).
216
217 `prestyles`
218 Inline CSS styles for the ``<pre>`` tag (default: ``''``). *New in
219 Pygments 0.11.*
220
221 `cssfile`
222 If the `full` option is true and this option is given, it must be the
223 name of an external file. If the filename does not include an absolute
224 path, the file's path will be assumed to be relative to the main output
225 file's path, if the latter can be found. The stylesheet is then written
226 to this file instead of the HTML file. *New in Pygments 0.6.*
227
228 `noclobber_cssfile`
229 If `cssfile` is given and the specified file exists, the css file will
230 not be overwritten. This allows the use of the `full` option in
231 combination with a user specified css file. Default is ``False``.
232 *New in Pygments 1.1.*
233
234 `linenos`
235 If set to ``'table'``, output line numbers as a table with two cells,
236 one containing the line numbers, the other the whole code. This is
237 copy-and-paste-friendly, but may cause alignment problems with some
238 browsers or fonts. If set to ``'inline'``, the line numbers will be
239 integrated in the ``<pre>`` tag that contains the code (that setting
240 is *new in Pygments 0.8*).
241
242 For compatibility with Pygments 0.7 and earlier, every true value
243 except ``'inline'`` means the same as ``'table'`` (in particular, that
244 means also ``True``).
245
246 The default value is ``False``, which means no line numbers at all.
247
248 **Note:** with the default ("table") line number mechanism, the line
249 numbers and code can have different line heights in Internet Explorer
250 unless you give the enclosing ``<pre>`` tags an explicit ``line-height``
251 CSS property (you get the default line spacing with ``line-height:
252 125%``).
253
254 `hl_lines`
255 Specify a list of lines to be highlighted. *New in Pygments 0.11.*
256
257 `linenostart`
258 The line number for the first line (default: ``1``).
259
260 `linenostep`
261 If set to a number n > 1, only every nth line number is printed.
262
263 `linenospecial`
264 If set to a number n > 0, every nth line number is given the CSS
265 class ``"special"`` (default: ``0``).
266
267 `nobackground`
268 If set to ``True``, the formatter won't output the background color
269 for the wrapping element (this automatically defaults to ``False``
270 when there is no wrapping element [eg: no argument for the
271 `get_syntax_defs` method given]) (default: ``False``). *New in
272 Pygments 0.6.*
273
274 `lineseparator`
275 This string is output between lines of code. It defaults to ``"\n"``,
276 which is enough to break a line inside ``<pre>`` tags, but you can
277 e.g. set it to ``"<br>"`` to get HTML line breaks. *New in Pygments
278 0.7.*
279
280 `lineanchors`
281 If set to a nonempty string, e.g. ``foo``, the formatter will wrap each
282 output line in an anchor tag with a ``name`` of ``foo-linenumber``.
283 This allows easy linking to certain lines. *New in Pygments 0.9.*
284
285 `anchorlinenos`
286 If set to `True`, will wrap line numbers in <a> tags. Used in
287 combination with `linenos` and `lineanchors`.
288
289
290 **Subclassing the HTML formatter**
291
292 *New in Pygments 0.7.*
293
294 The HTML formatter is now built in a way that allows easy subclassing, thus
295 customizing the output HTML code. The `format()` method calls
296 `self._format_lines()` which returns a generator that yields tuples of ``(1,
297 line)``, where the ``1`` indicates that the ``line`` is a line of the
298 formatted source code.
299
300 If the `nowrap` option is set, the generator is the iterated over and the
301 resulting HTML is output.
302
303 Otherwise, `format()` calls `self.wrap()`, which wraps the generator with
304 other generators. These may add some HTML code to the one generated by
305 `_format_lines()`, either by modifying the lines generated by the latter,
306 then yielding them again with ``(1, line)``, and/or by yielding other HTML
307 code before or after the lines, with ``(0, html)``. The distinction between
308 source lines and other code makes it possible to wrap the generator multiple
309 times.
310
311 The default `wrap()` implementation adds a ``<div>`` and a ``<pre>`` tag.
312
313 A custom `HtmlFormatter` subclass could look like this:
314
315 .. sourcecode:: python
316
317 class CodeHtmlFormatter(HtmlFormatter):
318
319 def wrap(self, source, outfile):
320 return self._wrap_code(source)
321
322 def _wrap_code(self, source):
323 yield 0, '<code>'
324 for i, t in source:
325 if i == 1:
326 # it's a line of formatted code
327 t += '<br>'
328 yield i, t
329 yield 0, '</code>'
330
331 This results in wrapping the formatted lines with a ``<code>`` tag, where the
332 source lines are broken using ``<br>`` tags.
333
334 After calling `wrap()`, the `format()` method also adds the "line numbers"
335 and/or "full document" wrappers if the respective options are set. Then, all
336 HTML yielded by the wrapped generator is output.
337 """
338
339 name = 'HTML'
340 aliases = ['html']
341 filenames = ['*.html', '*.htm']
342
343 def __init__(self, **options):
344 Formatter.__init__(self, **options)
345 self.title = self._decodeifneeded(self.title)
346 self.nowrap = get_bool_opt(options, 'nowrap', False)
347 self.noclasses = get_bool_opt(options, 'noclasses', False)
348 self.classprefix = options.get('classprefix', '')
349 self.cssclass = self._decodeifneeded(options.get('cssclass', 'highlight'))
350 self.cssstyles = self._decodeifneeded(options.get('cssstyles', ''))
351 self.prestyles = self._decodeifneeded(options.get('prestyles', ''))
352 self.cssfile = self._decodeifneeded(options.get('cssfile', ''))
353 self.noclobber_cssfile = get_bool_opt(options, 'noclobber_cssfile', False)
354
355 linenos = options.get('linenos', False)
356 if linenos == 'inline':
357 self.linenos = 2
358 elif linenos:
359 # compatibility with <= 0.7
360 self.linenos = 1
361 else:
362 self.linenos = 0
363 self.linenostart = abs(get_int_opt(options, 'linenostart', 1))
364 self.linenostep = abs(get_int_opt(options, 'linenostep', 1))
365 self.linenospecial = abs(get_int_opt(options, 'linenospecial', 0))
366 self.nobackground = get_bool_opt(options, 'nobackground', False)
367 self.lineseparator = options.get('lineseparator', '\n')
368 self.lineanchors = options.get('lineanchors', '')
369 self.anchorlinenos = options.get('anchorlinenos', False)
370 self.hl_lines = set()
371 for lineno in get_list_opt(options, 'hl_lines', []):
372 try:
373 self.hl_lines.add(int(lineno))
374 except ValueError:
375 pass
376
377 self._class_cache = {}
378 self._create_stylesheet()
379
380 def _get_css_class(self, ttype):
381 """Return the css class of this token type prefixed with
382 the classprefix option."""
383 if ttype in self._class_cache:
384 return self._class_cache[ttype]
385 return self.classprefix + _get_ttype_class(ttype)
386
387 def _create_stylesheet(self):
388 t2c = self.ttype2class = {Token: ''}
389 c2s = self.class2style = {}
390 cp = self.classprefix
391 for ttype, ndef in self.style:
392 name = cp + _get_ttype_class(ttype)
393 style = ''
394 if ndef['color']:
395 style += 'color: #%s; ' % ndef['color']
396 if ndef['bold']:
397 style += 'font-weight: bold; '
398 if ndef['italic']:
399 style += 'font-style: italic; '
400 if ndef['underline']:
401 style += 'text-decoration: underline; '
402 if ndef['bgcolor']:
403 style += 'background-color: #%s; ' % ndef['bgcolor']
404 if ndef['border']:
405 style += 'border: 1px solid #%s; ' % ndef['border']
406 if style:
407 t2c[ttype] = name
408 # save len(ttype) to enable ordering the styles by
409 # hierarchy (necessary for CSS cascading rules!)
410 c2s[name] = (style[:-2], ttype, len(ttype))
411
412 def get_style_defs(self, arg=None):
413 """
414 Return CSS style definitions for the classes produced by the current
415 highlighting style. ``arg`` can be a string or list of selectors to
416 insert before the token type classes.
417 """
418 if arg is None:
419 arg = ('cssclass' in self.options and '.'+self.cssclass or '')
420 if isinstance(arg, basestring):
421 args = [arg]
422 else:
423 args = list(arg)
424
425 def prefix(cls):
426 if cls:
427 cls = '.' + cls
428 tmp = []
429 for arg in args:
430 tmp.append((arg and arg + ' ' or '') + cls)
431 return ', '.join(tmp)
432
433 styles = [(level, ttype, cls, style)
434 for cls, (style, ttype, level) in self.class2style.iteritems()
435 if cls and style]
436 styles.sort()
437 lines = ['%s { %s } /* %s */' % (prefix(cls), style, repr(ttype)[6:])
438 for (level, ttype, cls, style) in styles]
439 if arg and not self.nobackground and \
440 self.style.background_color is not None:
441 text_style = ''
442 if Text in self.ttype2class:
443 text_style = ' ' + self.class2style[self.ttype2class[Text]][0]
444 lines.insert(0, '%s { background: %s;%s }' %
445 (prefix(''), self.style.background_color, text_style))
446 if self.style.highlight_color is not None:
447 lines.insert(0, '%s.hll { background-color: %s }' %
448 (prefix(''), self.style.highlight_color))
449 return '\n'.join(lines)
450
451 def _decodeifneeded(self, value):
452 if isinstance(value, bytes):
453 if self.encoding:
454 return value.decode(self.encoding)
455 return value.decode()
456 return value
457
458 def _wrap_full(self, inner, outfile):
459 if self.cssfile:
460 if os.path.isabs(self.cssfile):
461 # it's an absolute filename
462 cssfilename = self.cssfile
463 else:
464 try:
465 filename = outfile.name
466 if not filename or filename[0] == '<':
467 # pseudo files, e.g. name == '<fdopen>'
468 raise AttributeError
469 cssfilename = os.path.join(os.path.dirname(filename),
470 self.cssfile)
471 except AttributeError:
472 print >>sys.stderr, 'Note: Cannot determine output file name, ' \
473 'using current directory as base for the CSS file name'
474 cssfilename = self.cssfile
475 # write CSS file only if noclobber_cssfile isn't given as an option.
476 try:
477 if not os.path.exists(cssfilename) or not self.noclobber_cssfile:
478 cf = open(cssfilename, "w")
479 cf.write(CSSFILE_TEMPLATE %
480 {'styledefs': self.get_style_defs('body')})
481 cf.close()
482 except IOError, err:
483 err.strerror = 'Error writing CSS file: ' + err.strerror
484 raise
485
486 yield 0, (DOC_HEADER_EXTERNALCSS %
487 dict(title = self.title,
488 cssfile = self.cssfile,
489 encoding = self.encoding))
490 else:
491 yield 0, (DOC_HEADER %
492 dict(title = self.title,
493 styledefs = self.get_style_defs('body'),
494 encoding = self.encoding))
495
496 for t, line in inner:
497 yield t, line
498 yield 0, DOC_FOOTER
499
500 def _wrap_tablelinenos(self, inner):
501 dummyoutfile = StringIO.StringIO()
502 lncount = 0
503 for t, line in inner:
504 if t:
505 lncount += 1
506 dummyoutfile.write(line)
507
508 fl = self.linenostart
509 mw = len(str(lncount + fl - 1))
510 sp = self.linenospecial
511 st = self.linenostep
512 la = self.lineanchors
513 aln = self.anchorlinenos
514 if sp:
515 lines = []
516
517 for i in range(fl, fl+lncount):
518 if i % st == 0:
519 if i % sp == 0:
520 if aln:
521 lines.append('<a href="#%s-%d" class="special">%*d</a>' %
522 (la, i, mw, i))
523 else:
524 lines.append('<span class="special">%*d</span>' % (mw, i))
525 else:
526 if aln:
527 lines.append('<a href="#%s-%d">%*d</a>' % (la, i, mw, i))
528 else:
529 lines.append('%*d' % (mw, i))
530 else:
531 lines.append('')
532 ls = '\n'.join(lines)
533 else:
534 lines = []
535 for i in range(fl, fl+lncount):
536 if i % st == 0:
537 if aln:
538 lines.append('<a href="#%s-%d">%*d</a>' % (la, i, mw, i))
539 else:
540 lines.append('%*d' % (mw, i))
541 else:
542 lines.append('')
543 ls = '\n'.join(lines)
544
545 # in case you wonder about the seemingly redundant <div> here: since the
546 # content in the other cell also is wrapped in a div, some browsers in
547 # some configurations seem to mess up the formatting...
548 yield 0, ('<table class="%stable">' % self.cssclass +
549 '<tr><td class="linenos"><div class="linenodiv"><pre>' +
550 ls + '</pre></div></td><td class="code">')
551 yield 0, dummyoutfile.getvalue()
552 yield 0, '</td></tr></table>'
553
554 def _wrap_inlinelinenos(self, inner):
555 # need a list of lines since we need the width of a single number :(
556 lines = list(inner)
557 sp = self.linenospecial
558 st = self.linenostep
559 num = self.linenostart
560 mw = len(str(len(lines) + num - 1))
561
562 if sp:
563 for t, line in lines:
564 yield 1, '<span class="lineno%s">%*s</span> ' % (
565 num%sp == 0 and ' special' or '', mw,
566 (num%st and ' ' or num)) + line
567 num += 1
568 else:
569 for t, line in lines:
570 yield 1, '<span class="lineno">%*s</span> ' % (
571 mw, (num%st and ' ' or num)) + line
572 num += 1
573
574 def _wrap_lineanchors(self, inner):
575 s = self.lineanchors
576 i = 0
577 for t, line in inner:
578 if t:
579 i += 1
580 yield 1, '<a name="%s-%d"></a>' % (s, i) + line
581 else:
582 yield 0, line
583
584 def _wrap_div(self, inner):
585 yield 0, ('<div' + (self.cssclass and ' class="%s"' % self.cssclass)
586 + (self.cssstyles and ' style="%s"' % self.cssstyles) + '>')
587 for tup in inner:
588 yield tup
589 yield 0, '</div>\n'
590
591 def _wrap_pre(self, inner):
592 yield 0, ('<pre'
593 + (self.prestyles and ' style="%s"' % self.prestyles) + '>')
594 for tup in inner:
595 yield tup
596 yield 0, '</pre>'
597
598 def _format_lines(self, tokensource):
599 """
600 Just format the tokens, without any wrapping tags.
601 Yield individual lines.
602 """
603 nocls = self.noclasses
604 lsep = self.lineseparator
605 # for <span style=""> lookup only
606 getcls = self.ttype2class.get
607 c2s = self.class2style
608
609 lspan = ''
610 line = ''
611 for ttype, value in tokensource:
612 if nocls:
613 cclass = getcls(ttype)
614 while cclass is None:
615 ttype = ttype.parent
616 cclass = getcls(ttype)
617 cspan = cclass and '<span style="%s">' % c2s[cclass][0] or ''
618 else:
619 cls = self._get_css_class(ttype)
620 cspan = cls and '<span class="%s">' % cls or ''
621
622 parts = escape_html(value).split('\n')
623
624 # for all but the last line
625 for part in parts[:-1]:
626 if line:
627 if lspan != cspan:
628 line += (lspan and '</span>') + cspan + part + \
629 (cspan and '</span>') + lsep
630 else: # both are the same
631 line += part + (lspan and '</span>') + lsep
632 yield 1, line
633 line = ''
634 elif part:
635 yield 1, cspan + part + (cspan and '</span>') + lsep
636 else:
637 yield 1, lsep
638 # for the last line
639 if line and parts[-1]:
640 if lspan != cspan:
641 line += (lspan and '</span>') + cspan + parts[-1]
642 lspan = cspan
643 else:
644 line += parts[-1]
645 elif parts[-1]:
646 line = cspan + parts[-1]
647 lspan = cspan
648 # else we neither have to open a new span nor set lspan
649
650 if line:
651 yield 1, line + (lspan and '</span>') + lsep
652
653 def _highlight_lines(self, tokensource):
654 """
655 Highlighted the lines specified in the `hl_lines` option by
656 post-processing the token stream coming from `_format_lines`.
657 """
658 hls = self.hl_lines
659
660 for i, (t, value) in enumerate(tokensource):
661 if t != 1:
662 yield t, value
663 if i + 1 in hls: # i + 1 because Python indexes start at 0
664 yield 1, '<span class="hll">%s</span>' % value
665 else:
666 yield 1, value
667
668 def wrap(self, source, outfile):
669 """
670 Wrap the ``source``, which is a generator yielding
671 individual lines, in custom generators. See docstring
672 for `format`. Can be overridden.
673 """
674 return self._wrap_div(self._wrap_pre(source))
675
676 def format_unencoded(self, tokensource, outfile):
677 """
678 The formatting process uses several nested generators; which of
679 them are used is determined by the user's options.
680
681 Each generator should take at least one argument, ``inner``,
682 and wrap the pieces of text generated by this.
683
684 Always yield 2-tuples: (code, text). If "code" is 1, the text
685 is part of the original tokensource being highlighted, if it's
686 0, the text is some piece of wrapping. This makes it possible to
687 use several different wrappers that process the original source
688 linewise, e.g. line number generators.
689 """
690 source = self._format_lines(tokensource)
691 if self.hl_lines:
692 source = self._highlight_lines(source)
693 if not self.nowrap:
694 if self.linenos == 2:
695 source = self._wrap_inlinelinenos(source)
696 if self.lineanchors:
697 source = self._wrap_lineanchors(source)
698 source = self.wrap(source, outfile)
699 if self.linenos == 1:
700 source = self._wrap_tablelinenos(source)
701 if self.full:
702 source = self._wrap_full(source, outfile)
703
704 for t, piece in source:
705 outfile.write(piece)

eric ide

mercurial