ThirdParty/Pygments/pygments/lexers/markup.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.markup
4 ~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for non-HTML markup languages.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexers.html import HtmlLexer, XmlLexer
15 from pygments.lexers.javascript import JavascriptLexer
16 from pygments.lexers.css import CssLexer
17
18 from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
19 using, this, do_insertions, default, words
20 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
21 Number, Punctuation, Generic, Other
22 from pygments.util import get_bool_opt, ClassNotFound
23
24 __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
25 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
26 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
27 'MozPreprocCssLexer']
28
29
30 class BBCodeLexer(RegexLexer):
31 """
32 A lexer that highlights BBCode(-like) syntax.
33
34 .. versionadded:: 0.6
35 """
36
37 name = 'BBCode'
38 aliases = ['bbcode']
39 mimetypes = ['text/x-bbcode']
40
41 tokens = {
42 'root': [
43 (r'[^[]+', Text),
44 # tag/end tag begin
45 (r'\[/?\w+', Keyword, 'tag'),
46 # stray bracket
47 (r'\[', Text),
48 ],
49 'tag': [
50 (r'\s+', Text),
51 # attribute with value
52 (r'(\w+)(=)("?[^\s"\]]+"?)',
53 bygroups(Name.Attribute, Operator, String)),
54 # tag argument (a la [color=green])
55 (r'(=)("?[^\s"\]]+"?)',
56 bygroups(Operator, String)),
57 # tag end
58 (r'\]', Keyword, '#pop'),
59 ],
60 }
61
62
63 class MoinWikiLexer(RegexLexer):
64 """
65 For MoinMoin (and Trac) Wiki markup.
66
67 .. versionadded:: 0.7
68 """
69
70 name = 'MoinMoin/Trac Wiki markup'
71 aliases = ['trac-wiki', 'moin']
72 filenames = []
73 mimetypes = ['text/x-trac-wiki']
74 flags = re.MULTILINE | re.IGNORECASE
75
76 tokens = {
77 'root': [
78 (r'^#.*$', Comment),
79 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
80 # Titles
81 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
82 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
83 # Literal code blocks, with optional shebang
84 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
85 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
86 # Lists
87 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
88 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
89 # Other Formatting
90 (r'\[\[\w+.*?\]\]', Keyword), # Macro
91 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
92 bygroups(Keyword, String, Keyword)), # Link
93 (r'^----+$', Keyword), # Horizontal rules
94 (r'[^\n\'\[{!_~^,|]+', Text),
95 (r'\n', Text),
96 (r'.', Text),
97 ],
98 'codeblock': [
99 (r'\}\}\}', Name.Builtin, '#pop'),
100 # these blocks are allowed to be nested in Trac, but not MoinMoin
101 (r'\{\{\{', Text, '#push'),
102 (r'[^{}]+', Comment.Preproc), # slurp boring text
103 (r'.', Comment.Preproc), # allow loose { or }
104 ],
105 }
106
107
108 class RstLexer(RegexLexer):
109 """
110 For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
111
112 .. versionadded:: 0.7
113
114 Additional options accepted:
115
116 `handlecodeblocks`
117 Highlight the contents of ``.. sourcecode:: language``,
118 ``.. code:: language`` and ``.. code-block:: language``
119 directives with a lexer for the given language (default:
120 ``True``).
121
122 .. versionadded:: 0.8
123 """
124 name = 'reStructuredText'
125 aliases = ['rst', 'rest', 'restructuredtext']
126 filenames = ['*.rst', '*.rest']
127 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
128 flags = re.MULTILINE
129
130 def _handle_sourcecode(self, match):
131 from pygments.lexers import get_lexer_by_name
132
133 # section header
134 yield match.start(1), Punctuation, match.group(1)
135 yield match.start(2), Text, match.group(2)
136 yield match.start(3), Operator.Word, match.group(3)
137 yield match.start(4), Punctuation, match.group(4)
138 yield match.start(5), Text, match.group(5)
139 yield match.start(6), Keyword, match.group(6)
140 yield match.start(7), Text, match.group(7)
141
142 # lookup lexer if wanted and existing
143 lexer = None
144 if self.handlecodeblocks:
145 try:
146 lexer = get_lexer_by_name(match.group(6).strip())
147 except ClassNotFound:
148 pass
149 indention = match.group(8)
150 indention_size = len(indention)
151 code = (indention + match.group(9) + match.group(10) + match.group(11))
152
153 # no lexer for this language. handle it like it was a code block
154 if lexer is None:
155 yield match.start(8), String, code
156 return
157
158 # highlight the lines with the lexer.
159 ins = []
160 codelines = code.splitlines(True)
161 code = ''
162 for line in codelines:
163 if len(line) > indention_size:
164 ins.append((len(code), [(0, Text, line[:indention_size])]))
165 code += line[indention_size:]
166 else:
167 code += line
168 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
169 yield item
170
171 # from docutils.parsers.rst.states
172 closers = u'\'")]}>\u2019\u201d\xbb!?'
173 unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
174 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
175 % (re.escape(unicode_delimiters),
176 re.escape(closers)))
177
178 tokens = {
179 'root': [
180 # Heading with overline
181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
182 r'(.+)(\n)(\1)(\n)',
183 bygroups(Generic.Heading, Text, Generic.Heading,
184 Text, Generic.Heading, Text)),
185 # Plain heading
186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
188 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
189 # Bulleted lists
190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
191 bygroups(Text, Number, using(this, state='inline'))),
192 # Numbered lists
193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
194 bygroups(Text, Number, using(this, state='inline'))),
195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
196 bygroups(Text, Number, using(this, state='inline'))),
197 # Numbered, but keep words at BOL from becoming lists
198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
199 bygroups(Text, Number, using(this, state='inline'))),
200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
201 bygroups(Text, Number, using(this, state='inline'))),
202 # Line blocks
203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
204 bygroups(Text, Operator, using(this, state='inline'))),
205 # Sourcecode directives
206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
208 _handle_sourcecode),
209 # A directive
210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
212 using(this, state='inline'))),
213 # A reference target
214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
216 # A footnote/citation target
217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
219 # A substitution def
220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
222 Punctuation, Text, using(this, state='inline'))),
223 # Comments
224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
225 # Field list
226 (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)),
227 (r'^( *)(:.*?:)([ \t]+)(.*?)$',
228 bygroups(Text, Name.Class, Text, Name.Function)),
229 # Definition list
230 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
231 bygroups(using(this, state='inline'), using(this, state='inline'))),
232 # Code blocks
233 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
234 bygroups(String.Escape, Text, String, String, Text, String)),
235 include('inline'),
236 ],
237 'inline': [
238 (r'\\.', Text), # escape
239 (r'``', String, 'literal'), # code
240 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
241 bygroups(String, String.Interpol, String)),
242 (r'`.+?`__?', String), # reference
243 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
244 bygroups(Name.Variable, Name.Attribute)), # role
245 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
246 bygroups(Name.Attribute, Name.Variable)), # role (content first)
247 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
248 (r'\*.+?\*', Generic.Emph), # Emphasis
249 (r'\[.*?\]_', String), # Footnote or citation
250 (r'<.+?>', Name.Tag), # Hyperlink
251 (r'[^\\\n\[*`:]+', Text),
252 (r'.', Text),
253 ],
254 'literal': [
255 (r'[^`]+', String),
256 (r'``' + end_string_suffix, String, '#pop'),
257 (r'`', String),
258 ]
259 }
260
261 def __init__(self, **options):
262 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
263 RegexLexer.__init__(self, **options)
264
265 def analyse_text(text):
266 if text[:2] == '..' and text[2:3] != '.':
267 return 0.3
268 p1 = text.find("\n")
269 p2 = text.find("\n", p1 + 1)
270 if (p2 > -1 and # has two lines
271 p1 * 2 + 1 == p2 and # they are the same length
272 text[p1+1] in '-=' and # the next line both starts and ends with
273 text[p1+1] == text[p2-1]): # ...a sufficiently high header
274 return 0.5
275
276
277 class TexLexer(RegexLexer):
278 """
279 Lexer for the TeX and LaTeX typesetting languages.
280 """
281
282 name = 'TeX'
283 aliases = ['tex', 'latex']
284 filenames = ['*.tex', '*.aux', '*.toc']
285 mimetypes = ['text/x-tex', 'text/x-latex']
286
287 tokens = {
288 'general': [
289 (r'%.*?\n', Comment),
290 (r'[{}]', Name.Builtin),
291 (r'[&_^]', Name.Builtin),
292 ],
293 'root': [
294 (r'\\\[', String.Backtick, 'displaymath'),
295 (r'\\\(', String, 'inlinemath'),
296 (r'\$\$', String.Backtick, 'displaymath'),
297 (r'\$', String, 'inlinemath'),
298 (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
299 (r'\\$', Keyword),
300 include('general'),
301 (r'[^\\$%&_^{}]+', Text),
302 ],
303 'math': [
304 (r'\\([a-zA-Z]+|.)', Name.Variable),
305 include('general'),
306 (r'[0-9]+', Number),
307 (r'[-=!+*/()\[\]]', Operator),
308 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
309 ],
310 'inlinemath': [
311 (r'\\\)', String, '#pop'),
312 (r'\$', String, '#pop'),
313 include('math'),
314 ],
315 'displaymath': [
316 (r'\\\]', String, '#pop'),
317 (r'\$\$', String, '#pop'),
318 (r'\$', Name.Builtin),
319 include('math'),
320 ],
321 'command': [
322 (r'\[.*?\]', Name.Attribute),
323 (r'\*', Keyword),
324 default('#pop'),
325 ],
326 }
327
328 def analyse_text(text):
329 for start in ("\\documentclass", "\\input", "\\documentstyle",
330 "\\relax"):
331 if text[:len(start)] == start:
332 return True
333
334
335 class GroffLexer(RegexLexer):
336 """
337 Lexer for the (g)roff typesetting language, supporting groff
338 extensions. Mainly useful for highlighting manpage sources.
339
340 .. versionadded:: 0.6
341 """
342
343 name = 'Groff'
344 aliases = ['groff', 'nroff', 'man']
345 filenames = ['*.[1234567]', '*.man']
346 mimetypes = ['application/x-troff', 'text/troff']
347
348 tokens = {
349 'root': [
350 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
351 (r'\.', Punctuation, 'request'),
352 # Regular characters, slurp till we find a backslash or newline
353 (r'[^\\\n]+', Text, 'textline'),
354 default('textline'),
355 ],
356 'textline': [
357 include('escapes'),
358 (r'[^\\\n]+', Text),
359 (r'\n', Text, '#pop'),
360 ],
361 'escapes': [
362 # groff has many ways to write escapes.
363 (r'\\"[^\n]*', Comment),
364 (r'\\[fn]\w', String.Escape),
365 (r'\\\(.{2}', String.Escape),
366 (r'\\.\[.*\]', String.Escape),
367 (r'\\.', String.Escape),
368 (r'\\\n', Text, 'request'),
369 ],
370 'request': [
371 (r'\n', Text, '#pop'),
372 include('escapes'),
373 (r'"[^\n"]+"', String.Double),
374 (r'\d+', Number),
375 (r'\S+', String),
376 (r'\s+', Text),
377 ],
378 }
379
380 def analyse_text(text):
381 if text[:1] != '.':
382 return False
383 if text[:3] == '.\\"':
384 return True
385 if text[:4] == '.TH ':
386 return True
387 if text[1:3].isalnum() and text[3].isspace():
388 return 0.9
389
390
391 class MozPreprocHashLexer(RegexLexer):
392 """
393 Lexer for Mozilla Preprocessor files (with '#' as the marker).
394
395 Other data is left untouched.
396
397 .. versionadded:: 2.0
398 """
399 name = 'mozhashpreproc'
400 aliases = [name]
401 filenames = []
402 mimetypes = []
403
404 tokens = {
405 'root': [
406 (r'^#', Comment.Preproc, ('expr', 'exprstart')),
407 (r'.+', Other),
408 ],
409 'exprstart': [
410 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
411 (words((
412 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
413 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
414 'include', 'includesubst', 'error')),
415 Comment.Preproc, '#pop'),
416 ],
417 'expr': [
418 (words(('!', '!=', '==', '&&', '||')), Operator),
419 (r'(defined)(\()', bygroups(Keyword, Punctuation)),
420 (r'\)', Punctuation),
421 (r'[0-9]+', Number.Decimal),
422 (r'__\w+?__', Name.Variable),
423 (r'@\w+?@', Name.Class),
424 (r'\w+', Name),
425 (r'\n', Text, '#pop'),
426 (r'\s+', Text),
427 (r'\S', Punctuation),
428 ],
429 }
430
431
432 class MozPreprocPercentLexer(MozPreprocHashLexer):
433 """
434 Lexer for Mozilla Preprocessor files (with '%' as the marker).
435
436 Other data is left untouched.
437
438 .. versionadded:: 2.0
439 """
440 name = 'mozpercentpreproc'
441 aliases = [name]
442 filenames = []
443 mimetypes = []
444
445 tokens = {
446 'root': [
447 (r'^%', Comment.Preproc, ('expr', 'exprstart')),
448 (r'.+', Other),
449 ],
450 }
451
452
453 class MozPreprocXulLexer(DelegatingLexer):
454 """
455 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
456 `XmlLexer`.
457
458 .. versionadded:: 2.0
459 """
460 name = "XUL+mozpreproc"
461 aliases = ['xul+mozpreproc']
462 filenames = ['*.xul.in']
463 mimetypes = []
464
465 def __init__(self, **options):
466 super(MozPreprocXulLexer, self).__init__(
467 XmlLexer, MozPreprocHashLexer, **options)
468
469
470 class MozPreprocJavascriptLexer(DelegatingLexer):
471 """
472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
473 `JavascriptLexer`.
474
475 .. versionadded:: 2.0
476 """
477 name = "Javascript+mozpreproc"
478 aliases = ['javascript+mozpreproc']
479 filenames = ['*.js.in']
480 mimetypes = []
481
482 def __init__(self, **options):
483 super(MozPreprocJavascriptLexer, self).__init__(
484 JavascriptLexer, MozPreprocHashLexer, **options)
485
486
487 class MozPreprocCssLexer(DelegatingLexer):
488 """
489 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
490 `CssLexer`.
491
492 .. versionadded:: 2.0
493 """
494 name = "CSS+mozpreproc"
495 aliases = ['css+mozpreproc']
496 filenames = ['*.css.in']
497 mimetypes = []
498
499 def __init__(self, **options):
500 super(MozPreprocCssLexer, self).__init__(
501 CssLexer, MozPreprocPercentLexer, **options)
502

eric ide

mercurial