eric6/ThirdParty/Pygments/pygments/lexers/markup.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.markup
4 ~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for non-HTML markup languages.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexers.html import HtmlLexer, XmlLexer
15 from pygments.lexers.javascript import JavascriptLexer
16 from pygments.lexers.css import CssLexer
17
18 from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
19 using, this, do_insertions, default, words
20 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
21 Number, Punctuation, Generic, Other
22 from pygments.util import get_bool_opt, ClassNotFound
23
24 __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
25 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
26 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
27 'MozPreprocCssLexer', 'MarkdownLexer']
28
29
30 class BBCodeLexer(RegexLexer):
31 """
32 A lexer that highlights BBCode(-like) syntax.
33
34 .. versionadded:: 0.6
35 """
36
37 name = 'BBCode'
38 aliases = ['bbcode']
39 mimetypes = ['text/x-bbcode']
40
41 tokens = {
42 'root': [
43 (r'[^[]+', Text),
44 # tag/end tag begin
45 (r'\[/?\w+', Keyword, 'tag'),
46 # stray bracket
47 (r'\[', Text),
48 ],
49 'tag': [
50 (r'\s+', Text),
51 # attribute with value
52 (r'(\w+)(=)("?[^\s"\]]+"?)',
53 bygroups(Name.Attribute, Operator, String)),
54 # tag argument (a la [color=green])
55 (r'(=)("?[^\s"\]]+"?)',
56 bygroups(Operator, String)),
57 # tag end
58 (r'\]', Keyword, '#pop'),
59 ],
60 }
61
62
63 class MoinWikiLexer(RegexLexer):
64 """
65 For MoinMoin (and Trac) Wiki markup.
66
67 .. versionadded:: 0.7
68 """
69
70 name = 'MoinMoin/Trac Wiki markup'
71 aliases = ['trac-wiki', 'moin']
72 filenames = []
73 mimetypes = ['text/x-trac-wiki']
74 flags = re.MULTILINE | re.IGNORECASE
75
76 tokens = {
77 'root': [
78 (r'^#.*$', Comment),
79 (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next
80 # Titles
81 (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
82 bygroups(Generic.Heading, using(this), Generic.Heading, String)),
83 # Literal code blocks, with optional shebang
84 (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
85 (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting
86 # Lists
87 (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
88 (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
89 # Other Formatting
90 (r'\[\[\w+.*?\]\]', Keyword), # Macro
91 (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
92 bygroups(Keyword, String, Keyword)), # Link
93 (r'^----+$', Keyword), # Horizontal rules
94 (r'[^\n\'\[{!_~^,|]+', Text),
95 (r'\n', Text),
96 (r'.', Text),
97 ],
98 'codeblock': [
99 (r'\}\}\}', Name.Builtin, '#pop'),
100 # these blocks are allowed to be nested in Trac, but not MoinMoin
101 (r'\{\{\{', Text, '#push'),
102 (r'[^{}]+', Comment.Preproc), # slurp boring text
103 (r'.', Comment.Preproc), # allow loose { or }
104 ],
105 }
106
107
108 class RstLexer(RegexLexer):
109 """
110 For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
111
112 .. versionadded:: 0.7
113
114 Additional options accepted:
115
116 `handlecodeblocks`
117 Highlight the contents of ``.. sourcecode:: language``,
118 ``.. code:: language`` and ``.. code-block:: language``
119 directives with a lexer for the given language (default:
120 ``True``).
121
122 .. versionadded:: 0.8
123 """
124 name = 'reStructuredText'
125 aliases = ['rst', 'rest', 'restructuredtext']
126 filenames = ['*.rst', '*.rest']
127 mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
128 flags = re.MULTILINE
129
130 def _handle_sourcecode(self, match):
131 from pygments.lexers import get_lexer_by_name
132
133 # section header
134 yield match.start(1), Punctuation, match.group(1)
135 yield match.start(2), Text, match.group(2)
136 yield match.start(3), Operator.Word, match.group(3)
137 yield match.start(4), Punctuation, match.group(4)
138 yield match.start(5), Text, match.group(5)
139 yield match.start(6), Keyword, match.group(6)
140 yield match.start(7), Text, match.group(7)
141
142 # lookup lexer if wanted and existing
143 lexer = None
144 if self.handlecodeblocks:
145 try:
146 lexer = get_lexer_by_name(match.group(6).strip())
147 except ClassNotFound:
148 pass
149 indention = match.group(8)
150 indention_size = len(indention)
151 code = (indention + match.group(9) + match.group(10) + match.group(11))
152
153 # no lexer for this language. handle it like it was a code block
154 if lexer is None:
155 yield match.start(8), String, code
156 return
157
158 # highlight the lines with the lexer.
159 ins = []
160 codelines = code.splitlines(True)
161 code = ''
162 for line in codelines:
163 if len(line) > indention_size:
164 ins.append((len(code), [(0, Text, line[:indention_size])]))
165 code += line[indention_size:]
166 else:
167 code += line
168 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
169 yield item
170
171 # from docutils.parsers.rst.states
172 closers = u'\'")]}>\u2019\u201d\xbb!?'
173 unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
174 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
175 % (re.escape(unicode_delimiters),
176 re.escape(closers)))
177
178 tokens = {
179 'root': [
180 # Heading with overline
181 (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
182 r'(.+)(\n)(\1)(\n)',
183 bygroups(Generic.Heading, Text, Generic.Heading,
184 Text, Generic.Heading, Text)),
185 # Plain heading
186 (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
187 r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
188 bygroups(Generic.Heading, Text, Generic.Heading, Text)),
189 # Bulleted lists
190 (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)',
191 bygroups(Text, Number, using(this, state='inline'))),
192 # Numbered lists
193 (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)',
194 bygroups(Text, Number, using(this, state='inline'))),
195 (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)',
196 bygroups(Text, Number, using(this, state='inline'))),
197 # Numbered, but keep words at BOL from becoming lists
198 (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)',
199 bygroups(Text, Number, using(this, state='inline'))),
200 (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)',
201 bygroups(Text, Number, using(this, state='inline'))),
202 # Line blocks
203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
204 bygroups(Text, Operator, using(this, state='inline'))),
205 # Sourcecode directives
206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
208 _handle_sourcecode),
209 # A directive
210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
212 using(this, state='inline'))),
213 # A reference target
214 (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
215 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
216 # A footnote/citation target
217 (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
218 bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
219 # A substitution def
220 (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
221 bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
222 Punctuation, Text, using(this, state='inline'))),
223 # Comments
224 (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
225 # Field list
226 (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)),
227 (r'^( *)(:.*?:)([ \t]+)(.*?)$',
228 bygroups(Text, Name.Class, Text, Name.Function)),
229 # Definition list
230 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
231 bygroups(using(this, state='inline'), using(this, state='inline'))),
232 # Code blocks
233 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
234 bygroups(String.Escape, Text, String, String, Text, String)),
235 include('inline'),
236 ],
237 'inline': [
238 (r'\\.', Text), # escape
239 (r'``', String, 'literal'), # code
240 (r'(`.+?)(<.+?>)(`__?)', # reference with inline target
241 bygroups(String, String.Interpol, String)),
242 (r'`.+?`__?', String), # reference
243 (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
244 bygroups(Name.Variable, Name.Attribute)), # role
245 (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
246 bygroups(Name.Attribute, Name.Variable)), # role (content first)
247 (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis
248 (r'\*.+?\*', Generic.Emph), # Emphasis
249 (r'\[.*?\]_', String), # Footnote or citation
250 (r'<.+?>', Name.Tag), # Hyperlink
251 (r'[^\\\n\[*`:]+', Text),
252 (r'.', Text),
253 ],
254 'literal': [
255 (r'[^`]+', String),
256 (r'``' + end_string_suffix, String, '#pop'),
257 (r'`', String),
258 ]
259 }
260
261 def __init__(self, **options):
262 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
263 RegexLexer.__init__(self, **options)
264
265 def analyse_text(text):
266 if text[:2] == '..' and text[2:3] != '.':
267 return 0.3
268 p1 = text.find("\n")
269 p2 = text.find("\n", p1 + 1)
270 if (p2 > -1 and # has two lines
271 p1 * 2 + 1 == p2 and # they are the same length
272 text[p1+1] in '-=' and # the next line both starts and ends with
273 text[p1+1] == text[p2-1]): # ...a sufficiently high header
274 return 0.5
275
276
277 class TexLexer(RegexLexer):
278 """
279 Lexer for the TeX and LaTeX typesetting languages.
280 """
281
282 name = 'TeX'
283 aliases = ['tex', 'latex']
284 filenames = ['*.tex', '*.aux', '*.toc']
285 mimetypes = ['text/x-tex', 'text/x-latex']
286
287 tokens = {
288 'general': [
289 (r'%.*?\n', Comment),
290 (r'[{}]', Name.Builtin),
291 (r'[&_^]', Name.Builtin),
292 ],
293 'root': [
294 (r'\\\[', String.Backtick, 'displaymath'),
295 (r'\\\(', String, 'inlinemath'),
296 (r'\$\$', String.Backtick, 'displaymath'),
297 (r'\$', String, 'inlinemath'),
298 (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
299 (r'\\$', Keyword),
300 include('general'),
301 (r'[^\\$%&_^{}]+', Text),
302 ],
303 'math': [
304 (r'\\([a-zA-Z]+|.)', Name.Variable),
305 include('general'),
306 (r'[0-9]+', Number),
307 (r'[-=!+*/()\[\]]', Operator),
308 (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
309 ],
310 'inlinemath': [
311 (r'\\\)', String, '#pop'),
312 (r'\$', String, '#pop'),
313 include('math'),
314 ],
315 'displaymath': [
316 (r'\\\]', String, '#pop'),
317 (r'\$\$', String, '#pop'),
318 (r'\$', Name.Builtin),
319 include('math'),
320 ],
321 'command': [
322 (r'\[.*?\]', Name.Attribute),
323 (r'\*', Keyword),
324 default('#pop'),
325 ],
326 }
327
328 def analyse_text(text):
329 for start in ("\\documentclass", "\\input", "\\documentstyle",
330 "\\relax"):
331 if text[:len(start)] == start:
332 return True
333
334
335 class GroffLexer(RegexLexer):
336 """
337 Lexer for the (g)roff typesetting language, supporting groff
338 extensions. Mainly useful for highlighting manpage sources.
339
340 .. versionadded:: 0.6
341 """
342
343 name = 'Groff'
344 aliases = ['groff', 'nroff', 'man']
345 filenames = ['*.[1234567]', '*.man']
346 mimetypes = ['application/x-troff', 'text/troff']
347
348 tokens = {
349 'root': [
350 (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
351 (r'\.', Punctuation, 'request'),
352 # Regular characters, slurp till we find a backslash or newline
353 (r'[^\\\n]+', Text, 'textline'),
354 default('textline'),
355 ],
356 'textline': [
357 include('escapes'),
358 (r'[^\\\n]+', Text),
359 (r'\n', Text, '#pop'),
360 ],
361 'escapes': [
362 # groff has many ways to write escapes.
363 (r'\\"[^\n]*', Comment),
364 (r'\\[fn]\w', String.Escape),
365 (r'\\\(.{2}', String.Escape),
366 (r'\\.\[.*\]', String.Escape),
367 (r'\\.', String.Escape),
368 (r'\\\n', Text, 'request'),
369 ],
370 'request': [
371 (r'\n', Text, '#pop'),
372 include('escapes'),
373 (r'"[^\n"]+"', String.Double),
374 (r'\d+', Number),
375 (r'\S+', String),
376 (r'\s+', Text),
377 ],
378 }
379
380 def analyse_text(text):
381 if text[:1] != '.':
382 return False
383 if text[:3] == '.\\"':
384 return True
385 if text[:4] == '.TH ':
386 return True
387 if text[1:3].isalnum() and text[3].isspace():
388 return 0.9
389
390
391 class MozPreprocHashLexer(RegexLexer):
392 """
393 Lexer for Mozilla Preprocessor files (with '#' as the marker).
394
395 Other data is left untouched.
396
397 .. versionadded:: 2.0
398 """
399 name = 'mozhashpreproc'
400 aliases = [name]
401 filenames = []
402 mimetypes = []
403
404 tokens = {
405 'root': [
406 (r'^#', Comment.Preproc, ('expr', 'exprstart')),
407 (r'.+', Other),
408 ],
409 'exprstart': [
410 (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
411 (words((
412 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
413 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
414 'include', 'includesubst', 'error')),
415 Comment.Preproc, '#pop'),
416 ],
417 'expr': [
418 (words(('!', '!=', '==', '&&', '||')), Operator),
419 (r'(defined)(\()', bygroups(Keyword, Punctuation)),
420 (r'\)', Punctuation),
421 (r'[0-9]+', Number.Decimal),
422 (r'__\w+?__', Name.Variable),
423 (r'@\w+?@', Name.Class),
424 (r'\w+', Name),
425 (r'\n', Text, '#pop'),
426 (r'\s+', Text),
427 (r'\S', Punctuation),
428 ],
429 }
430
431
432 class MozPreprocPercentLexer(MozPreprocHashLexer):
433 """
434 Lexer for Mozilla Preprocessor files (with '%' as the marker).
435
436 Other data is left untouched.
437
438 .. versionadded:: 2.0
439 """
440 name = 'mozpercentpreproc'
441 aliases = [name]
442 filenames = []
443 mimetypes = []
444
445 tokens = {
446 'root': [
447 (r'^%', Comment.Preproc, ('expr', 'exprstart')),
448 (r'.+', Other),
449 ],
450 }
451
452
453 class MozPreprocXulLexer(DelegatingLexer):
454 """
455 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
456 `XmlLexer`.
457
458 .. versionadded:: 2.0
459 """
460 name = "XUL+mozpreproc"
461 aliases = ['xul+mozpreproc']
462 filenames = ['*.xul.in']
463 mimetypes = []
464
465 def __init__(self, **options):
466 super(MozPreprocXulLexer, self).__init__(
467 XmlLexer, MozPreprocHashLexer, **options)
468
469
470 class MozPreprocJavascriptLexer(DelegatingLexer):
471 """
472 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
473 `JavascriptLexer`.
474
475 .. versionadded:: 2.0
476 """
477 name = "Javascript+mozpreproc"
478 aliases = ['javascript+mozpreproc']
479 filenames = ['*.js.in']
480 mimetypes = []
481
482 def __init__(self, **options):
483 super(MozPreprocJavascriptLexer, self).__init__(
484 JavascriptLexer, MozPreprocHashLexer, **options)
485
486
487 class MozPreprocCssLexer(DelegatingLexer):
488 """
489 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
490 `CssLexer`.
491
492 .. versionadded:: 2.0
493 """
494 name = "CSS+mozpreproc"
495 aliases = ['css+mozpreproc']
496 filenames = ['*.css.in']
497 mimetypes = []
498
499 def __init__(self, **options):
500 super(MozPreprocCssLexer, self).__init__(
501 CssLexer, MozPreprocPercentLexer, **options)
502
503
504 class MarkdownLexer(RegexLexer):
505 """
506 For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
507
508 .. versionadded:: 2.2
509 """
510 name = 'markdown'
511 aliases = ['md']
512 filenames = ['*.md']
513 mimetypes = ["text/x-markdown"]
514 flags = re.MULTILINE
515
516 def _handle_codeblock(self, match):
517 """
518 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
519 """
520 from pygments.lexers import get_lexer_by_name
521
522 # section header
523 yield match.start(1), String , match.group(1)
524 yield match.start(2), String , match.group(2)
525 yield match.start(3), Text , match.group(3)
526
527 # lookup lexer if wanted and existing
528 lexer = None
529 if self.handlecodeblocks:
530 try:
531 lexer = get_lexer_by_name( match.group(2).strip() )
532 except ClassNotFound:
533 pass
534 code = match.group(4)
535
536 # no lexer for this language. handle it like it was a code block
537 if lexer is None:
538 yield match.start(4), String, code
539 else:
540 for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
541 yield item
542
543 yield match.start(5), String , match.group(5)
544
545 tokens = {
546 'root': [
547 # heading with pound prefix
548 (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
549 (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
550 # task list
551 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
552 bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
553 # bulleted lists
554 (r'^(\s*)([*-])(\s)(.+\n)',
555 bygroups(Text, Keyword, Text, using(this, state='inline'))),
556 # numbered lists
557 (r'^(\s*)([0-9]+\.)( .+\n)',
558 bygroups(Text, Keyword, using(this, state='inline'))),
559 # quote
560 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
561 # text block
562 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
563 # code block with language
564 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
565
566 include('inline'),
567 ],
568 'inline': [
569 # escape
570 (r'\\.', Text),
571 # italics
572 (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)),
573 # bold
574 # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
575 (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)),
576 # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
577 # strikethrough
578 (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)),
579 # inline code
580 (r'`[^`]+`', String.Backtick),
581 # mentions and topics (twitter and github stuff)
582 (r'[@#][\w/:]+', Name.Entity),
583 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
584 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
585
586 # general text, must come last!
587 (r'[^\\\s]+', Text),
588 (r'.', Text),
589 ],
590 }
591
592 def __init__(self, **options):
593 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
594 RegexLexer.__init__(self, **options)

eric ide

mercurial