|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.markup |
|
4 ~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for non-HTML markup languages. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexers.html import HtmlLexer, XmlLexer |
|
15 from pygments.lexers.javascript import JavascriptLexer |
|
16 from pygments.lexers.css import CssLexer |
|
17 |
|
18 from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ |
|
19 using, this, do_insertions, default, words |
|
20 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
21 Number, Punctuation, Generic, Other |
|
22 from pygments.util import get_bool_opt, ClassNotFound |
|
23 |
|
24 __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', |
|
25 'MozPreprocHashLexer', 'MozPreprocPercentLexer', |
|
26 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', |
|
27 'MozPreprocCssLexer'] |
|
28 |
|
29 |
|
30 class BBCodeLexer(RegexLexer): |
|
31 """ |
|
32 A lexer that highlights BBCode(-like) syntax. |
|
33 |
|
34 .. versionadded:: 0.6 |
|
35 """ |
|
36 |
|
37 name = 'BBCode' |
|
38 aliases = ['bbcode'] |
|
39 mimetypes = ['text/x-bbcode'] |
|
40 |
|
41 tokens = { |
|
42 'root': [ |
|
43 (r'[^[]+', Text), |
|
44 # tag/end tag begin |
|
45 (r'\[/?\w+', Keyword, 'tag'), |
|
46 # stray bracket |
|
47 (r'\[', Text), |
|
48 ], |
|
49 'tag': [ |
|
50 (r'\s+', Text), |
|
51 # attribute with value |
|
52 (r'(\w+)(=)("?[^\s"\]]+"?)', |
|
53 bygroups(Name.Attribute, Operator, String)), |
|
54 # tag argument (a la [color=green]) |
|
55 (r'(=)("?[^\s"\]]+"?)', |
|
56 bygroups(Operator, String)), |
|
57 # tag end |
|
58 (r'\]', Keyword, '#pop'), |
|
59 ], |
|
60 } |
|
61 |
|
62 |
|
63 class MoinWikiLexer(RegexLexer): |
|
64 """ |
|
65 For MoinMoin (and Trac) Wiki markup. |
|
66 |
|
67 .. versionadded:: 0.7 |
|
68 """ |
|
69 |
|
70 name = 'MoinMoin/Trac Wiki markup' |
|
71 aliases = ['trac-wiki', 'moin'] |
|
72 filenames = [] |
|
73 mimetypes = ['text/x-trac-wiki'] |
|
74 flags = re.MULTILINE | re.IGNORECASE |
|
75 |
|
76 tokens = { |
|
77 'root': [ |
|
78 (r'^#.*$', Comment), |
|
79 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next |
|
80 # Titles |
|
81 (r'^(=+)([^=]+)(=+)(\s*#.+)?$', |
|
82 bygroups(Generic.Heading, using(this), Generic.Heading, String)), |
|
83 # Literal code blocks, with optional shebang |
|
84 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), |
|
85 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting |
|
86 # Lists |
|
87 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), |
|
88 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), |
|
89 # Other Formatting |
|
90 (r'\[\[\w+.*?\]\]', Keyword), # Macro |
|
91 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', |
|
92 bygroups(Keyword, String, Keyword)), # Link |
|
93 (r'^----+$', Keyword), # Horizontal rules |
|
94 (r'[^\n\'\[{!_~^,|]+', Text), |
|
95 (r'\n', Text), |
|
96 (r'.', Text), |
|
97 ], |
|
98 'codeblock': [ |
|
99 (r'\}\}\}', Name.Builtin, '#pop'), |
|
100 # these blocks are allowed to be nested in Trac, but not MoinMoin |
|
101 (r'\{\{\{', Text, '#push'), |
|
102 (r'[^{}]+', Comment.Preproc), # slurp boring text |
|
103 (r'.', Comment.Preproc), # allow loose { or } |
|
104 ], |
|
105 } |
|
106 |
|
107 |
|
108 class RstLexer(RegexLexer): |
|
109 """ |
|
110 For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. |
|
111 |
|
112 .. versionadded:: 0.7 |
|
113 |
|
114 Additional options accepted: |
|
115 |
|
116 `handlecodeblocks` |
|
117 Highlight the contents of ``.. sourcecode:: language``, |
|
118 ``.. code:: language`` and ``.. code-block:: language`` |
|
119 directives with a lexer for the given language (default: |
|
120 ``True``). |
|
121 |
|
122 .. versionadded:: 0.8 |
|
123 """ |
|
124 name = 'reStructuredText' |
|
125 aliases = ['rst', 'rest', 'restructuredtext'] |
|
126 filenames = ['*.rst', '*.rest'] |
|
127 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] |
|
128 flags = re.MULTILINE |
|
129 |
|
130 def _handle_sourcecode(self, match): |
|
131 from pygments.lexers import get_lexer_by_name |
|
132 |
|
133 # section header |
|
134 yield match.start(1), Punctuation, match.group(1) |
|
135 yield match.start(2), Text, match.group(2) |
|
136 yield match.start(3), Operator.Word, match.group(3) |
|
137 yield match.start(4), Punctuation, match.group(4) |
|
138 yield match.start(5), Text, match.group(5) |
|
139 yield match.start(6), Keyword, match.group(6) |
|
140 yield match.start(7), Text, match.group(7) |
|
141 |
|
142 # lookup lexer if wanted and existing |
|
143 lexer = None |
|
144 if self.handlecodeblocks: |
|
145 try: |
|
146 lexer = get_lexer_by_name(match.group(6).strip()) |
|
147 except ClassNotFound: |
|
148 pass |
|
149 indention = match.group(8) |
|
150 indention_size = len(indention) |
|
151 code = (indention + match.group(9) + match.group(10) + match.group(11)) |
|
152 |
|
153 # no lexer for this language. handle it like it was a code block |
|
154 if lexer is None: |
|
155 yield match.start(8), String, code |
|
156 return |
|
157 |
|
158 # highlight the lines with the lexer. |
|
159 ins = [] |
|
160 codelines = code.splitlines(True) |
|
161 code = '' |
|
162 for line in codelines: |
|
163 if len(line) > indention_size: |
|
164 ins.append((len(code), [(0, Text, line[:indention_size])])) |
|
165 code += line[indention_size:] |
|
166 else: |
|
167 code += line |
|
168 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): |
|
169 yield item |
|
170 |
|
171 # from docutils.parsers.rst.states |
|
172 closers = u'\'")]}>\u2019\u201d\xbb!?' |
|
173 unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' |
|
174 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' |
|
175 % (re.escape(unicode_delimiters), |
|
176 re.escape(closers))) |
|
177 |
|
178 tokens = { |
|
179 'root': [ |
|
180 # Heading with overline |
|
181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' |
|
182 r'(.+)(\n)(\1)(\n)', |
|
183 bygroups(Generic.Heading, Text, Generic.Heading, |
|
184 Text, Generic.Heading, Text)), |
|
185 # Plain heading |
|
186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' |
|
187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', |
|
188 bygroups(Generic.Heading, Text, Generic.Heading, Text)), |
|
189 # Bulleted lists |
|
190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', |
|
191 bygroups(Text, Number, using(this, state='inline'))), |
|
192 # Numbered lists |
|
193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', |
|
194 bygroups(Text, Number, using(this, state='inline'))), |
|
195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', |
|
196 bygroups(Text, Number, using(this, state='inline'))), |
|
197 # Numbered, but keep words at BOL from becoming lists |
|
198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', |
|
199 bygroups(Text, Number, using(this, state='inline'))), |
|
200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', |
|
201 bygroups(Text, Number, using(this, state='inline'))), |
|
202 # Line blocks |
|
203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', |
|
204 bygroups(Text, Operator, using(this, state='inline'))), |
|
205 # Sourcecode directives |
|
206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' |
|
207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', |
|
208 _handle_sourcecode), |
|
209 # A directive |
|
210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', |
|
211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, |
|
212 using(this, state='inline'))), |
|
213 # A reference target |
|
214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', |
|
215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), |
|
216 # A footnote/citation target |
|
217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', |
|
218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), |
|
219 # A substitution def |
|
220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', |
|
221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, |
|
222 Punctuation, Text, using(this, state='inline'))), |
|
223 # Comments |
|
224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), |
|
225 # Field list |
|
226 (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)), |
|
227 (r'^( *)(:.*?:)([ \t]+)(.*?)$', |
|
228 bygroups(Text, Name.Class, Text, Name.Function)), |
|
229 # Definition list |
|
230 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', |
|
231 bygroups(using(this, state='inline'), using(this, state='inline'))), |
|
232 # Code blocks |
|
233 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', |
|
234 bygroups(String.Escape, Text, String, String, Text, String)), |
|
235 include('inline'), |
|
236 ], |
|
237 'inline': [ |
|
238 (r'\\.', Text), # escape |
|
239 (r'``', String, 'literal'), # code |
|
240 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target |
|
241 bygroups(String, String.Interpol, String)), |
|
242 (r'`.+?`__?', String), # reference |
|
243 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', |
|
244 bygroups(Name.Variable, Name.Attribute)), # role |
|
245 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', |
|
246 bygroups(Name.Attribute, Name.Variable)), # role (content first) |
|
247 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis |
|
248 (r'\*.+?\*', Generic.Emph), # Emphasis |
|
249 (r'\[.*?\]_', String), # Footnote or citation |
|
250 (r'<.+?>', Name.Tag), # Hyperlink |
|
251 (r'[^\\\n\[*`:]+', Text), |
|
252 (r'.', Text), |
|
253 ], |
|
254 'literal': [ |
|
255 (r'[^`]+', String), |
|
256 (r'``' + end_string_suffix, String, '#pop'), |
|
257 (r'`', String), |
|
258 ] |
|
259 } |
|
260 |
|
261 def __init__(self, **options): |
|
262 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
|
263 RegexLexer.__init__(self, **options) |
|
264 |
|
265 def analyse_text(text): |
|
266 if text[:2] == '..' and text[2:3] != '.': |
|
267 return 0.3 |
|
268 p1 = text.find("\n") |
|
269 p2 = text.find("\n", p1 + 1) |
|
270 if (p2 > -1 and # has two lines |
|
271 p1 * 2 + 1 == p2 and # they are the same length |
|
272 text[p1+1] in '-=' and # the next line both starts and ends with |
|
273 text[p1+1] == text[p2-1]): # ...a sufficiently high header |
|
274 return 0.5 |
|
275 |
|
276 |
|
277 class TexLexer(RegexLexer): |
|
278 """ |
|
279 Lexer for the TeX and LaTeX typesetting languages. |
|
280 """ |
|
281 |
|
282 name = 'TeX' |
|
283 aliases = ['tex', 'latex'] |
|
284 filenames = ['*.tex', '*.aux', '*.toc'] |
|
285 mimetypes = ['text/x-tex', 'text/x-latex'] |
|
286 |
|
287 tokens = { |
|
288 'general': [ |
|
289 (r'%.*?\n', Comment), |
|
290 (r'[{}]', Name.Builtin), |
|
291 (r'[&_^]', Name.Builtin), |
|
292 ], |
|
293 'root': [ |
|
294 (r'\\\[', String.Backtick, 'displaymath'), |
|
295 (r'\\\(', String, 'inlinemath'), |
|
296 (r'\$\$', String.Backtick, 'displaymath'), |
|
297 (r'\$', String, 'inlinemath'), |
|
298 (r'\\([a-zA-Z]+|.)', Keyword, 'command'), |
|
299 (r'\\$', Keyword), |
|
300 include('general'), |
|
301 (r'[^\\$%&_^{}]+', Text), |
|
302 ], |
|
303 'math': [ |
|
304 (r'\\([a-zA-Z]+|.)', Name.Variable), |
|
305 include('general'), |
|
306 (r'[0-9]+', Number), |
|
307 (r'[-=!+*/()\[\]]', Operator), |
|
308 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), |
|
309 ], |
|
310 'inlinemath': [ |
|
311 (r'\\\)', String, '#pop'), |
|
312 (r'\$', String, '#pop'), |
|
313 include('math'), |
|
314 ], |
|
315 'displaymath': [ |
|
316 (r'\\\]', String, '#pop'), |
|
317 (r'\$\$', String, '#pop'), |
|
318 (r'\$', Name.Builtin), |
|
319 include('math'), |
|
320 ], |
|
321 'command': [ |
|
322 (r'\[.*?\]', Name.Attribute), |
|
323 (r'\*', Keyword), |
|
324 default('#pop'), |
|
325 ], |
|
326 } |
|
327 |
|
328 def analyse_text(text): |
|
329 for start in ("\\documentclass", "\\input", "\\documentstyle", |
|
330 "\\relax"): |
|
331 if text[:len(start)] == start: |
|
332 return True |
|
333 |
|
334 |
|
335 class GroffLexer(RegexLexer): |
|
336 """ |
|
337 Lexer for the (g)roff typesetting language, supporting groff |
|
338 extensions. Mainly useful for highlighting manpage sources. |
|
339 |
|
340 .. versionadded:: 0.6 |
|
341 """ |
|
342 |
|
343 name = 'Groff' |
|
344 aliases = ['groff', 'nroff', 'man'] |
|
345 filenames = ['*.[1234567]', '*.man'] |
|
346 mimetypes = ['application/x-troff', 'text/troff'] |
|
347 |
|
348 tokens = { |
|
349 'root': [ |
|
350 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), |
|
351 (r'\.', Punctuation, 'request'), |
|
352 # Regular characters, slurp till we find a backslash or newline |
|
353 (r'[^\\\n]+', Text, 'textline'), |
|
354 default('textline'), |
|
355 ], |
|
356 'textline': [ |
|
357 include('escapes'), |
|
358 (r'[^\\\n]+', Text), |
|
359 (r'\n', Text, '#pop'), |
|
360 ], |
|
361 'escapes': [ |
|
362 # groff has many ways to write escapes. |
|
363 (r'\\"[^\n]*', Comment), |
|
364 (r'\\[fn]\w', String.Escape), |
|
365 (r'\\\(.{2}', String.Escape), |
|
366 (r'\\.\[.*\]', String.Escape), |
|
367 (r'\\.', String.Escape), |
|
368 (r'\\\n', Text, 'request'), |
|
369 ], |
|
370 'request': [ |
|
371 (r'\n', Text, '#pop'), |
|
372 include('escapes'), |
|
373 (r'"[^\n"]+"', String.Double), |
|
374 (r'\d+', Number), |
|
375 (r'\S+', String), |
|
376 (r'\s+', Text), |
|
377 ], |
|
378 } |
|
379 |
|
380 def analyse_text(text): |
|
381 if text[:1] != '.': |
|
382 return False |
|
383 if text[:3] == '.\\"': |
|
384 return True |
|
385 if text[:4] == '.TH ': |
|
386 return True |
|
387 if text[1:3].isalnum() and text[3].isspace(): |
|
388 return 0.9 |
|
389 |
|
390 |
|
391 class MozPreprocHashLexer(RegexLexer): |
|
392 """ |
|
393 Lexer for Mozilla Preprocessor files (with '#' as the marker). |
|
394 |
|
395 Other data is left untouched. |
|
396 |
|
397 .. versionadded:: 2.0 |
|
398 """ |
|
399 name = 'mozhashpreproc' |
|
400 aliases = [name] |
|
401 filenames = [] |
|
402 mimetypes = [] |
|
403 |
|
404 tokens = { |
|
405 'root': [ |
|
406 (r'^#', Comment.Preproc, ('expr', 'exprstart')), |
|
407 (r'.+', Other), |
|
408 ], |
|
409 'exprstart': [ |
|
410 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), |
|
411 (words(( |
|
412 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', |
|
413 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', |
|
414 'include', 'includesubst', 'error')), |
|
415 Comment.Preproc, '#pop'), |
|
416 ], |
|
417 'expr': [ |
|
418 (words(('!', '!=', '==', '&&', '||')), Operator), |
|
419 (r'(defined)(\()', bygroups(Keyword, Punctuation)), |
|
420 (r'\)', Punctuation), |
|
421 (r'[0-9]+', Number.Decimal), |
|
422 (r'__\w+?__', Name.Variable), |
|
423 (r'@\w+?@', Name.Class), |
|
424 (r'\w+', Name), |
|
425 (r'\n', Text, '#pop'), |
|
426 (r'\s+', Text), |
|
427 (r'\S', Punctuation), |
|
428 ], |
|
429 } |
|
430 |
|
431 |
|
432 class MozPreprocPercentLexer(MozPreprocHashLexer): |
|
433 """ |
|
434 Lexer for Mozilla Preprocessor files (with '%' as the marker). |
|
435 |
|
436 Other data is left untouched. |
|
437 |
|
438 .. versionadded:: 2.0 |
|
439 """ |
|
440 name = 'mozpercentpreproc' |
|
441 aliases = [name] |
|
442 filenames = [] |
|
443 mimetypes = [] |
|
444 |
|
445 tokens = { |
|
446 'root': [ |
|
447 (r'^%', Comment.Preproc, ('expr', 'exprstart')), |
|
448 (r'.+', Other), |
|
449 ], |
|
450 } |
|
451 |
|
452 |
|
453 class MozPreprocXulLexer(DelegatingLexer): |
|
454 """ |
|
455 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the |
|
456 `XmlLexer`. |
|
457 |
|
458 .. versionadded:: 2.0 |
|
459 """ |
|
460 name = "XUL+mozpreproc" |
|
461 aliases = ['xul+mozpreproc'] |
|
462 filenames = ['*.xul.in'] |
|
463 mimetypes = [] |
|
464 |
|
465 def __init__(self, **options): |
|
466 super(MozPreprocXulLexer, self).__init__( |
|
467 XmlLexer, MozPreprocHashLexer, **options) |
|
468 |
|
469 |
|
470 class MozPreprocJavascriptLexer(DelegatingLexer): |
|
471 """ |
|
472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the |
|
473 `JavascriptLexer`. |
|
474 |
|
475 .. versionadded:: 2.0 |
|
476 """ |
|
477 name = "Javascript+mozpreproc" |
|
478 aliases = ['javascript+mozpreproc'] |
|
479 filenames = ['*.js.in'] |
|
480 mimetypes = [] |
|
481 |
|
482 def __init__(self, **options): |
|
483 super(MozPreprocJavascriptLexer, self).__init__( |
|
484 JavascriptLexer, MozPreprocHashLexer, **options) |
|
485 |
|
486 |
|
487 class MozPreprocCssLexer(DelegatingLexer): |
|
488 """ |
|
489 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the |
|
490 `CssLexer`. |
|
491 |
|
492 .. versionadded:: 2.0 |
|
493 """ |
|
494 name = "CSS+mozpreproc" |
|
495 aliases = ['css+mozpreproc'] |
|
496 filenames = ['*.css.in'] |
|
497 mimetypes = [] |
|
498 |
|
499 def __init__(self, **options): |
|
500 super(MozPreprocCssLexer, self).__init__( |
|
501 CssLexer, MozPreprocPercentLexer, **options) |
|
502 |