eric6/ThirdParty/Pygments/pygments/lexers/markup.py

changeset 7701
25f42e208e08
parent 7547
21b0534faebc
child 7983
54c5cfbb1e29
equal deleted inserted replaced
7700:a3cf077a8db3 7701:25f42e208e08
3 pygments.lexers.markup 3 pygments.lexers.markup
4 ~~~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~~~
5 5
6 Lexers for non-HTML markup languages. 6 Lexers for non-HTML markup languages.
7 7
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re 12 import re
13 13
22 from pygments.util import get_bool_opt, ClassNotFound 22 from pygments.util import get_bool_opt, ClassNotFound
23 23
24 __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', 24 __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
25 'MozPreprocHashLexer', 'MozPreprocPercentLexer', 25 'MozPreprocHashLexer', 'MozPreprocPercentLexer',
26 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', 26 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
27 'MozPreprocCssLexer', 'MarkdownLexer'] 27 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer']
28 28
29 29
30 class BBCodeLexer(RegexLexer): 30 class BBCodeLexer(RegexLexer):
31 """ 31 """
32 A lexer that highlights BBCode(-like) syntax. 32 A lexer that highlights BBCode(-like) syntax.
163 if len(line) > indention_size: 163 if len(line) > indention_size:
164 ins.append((len(code), [(0, Text, line[:indention_size])])) 164 ins.append((len(code), [(0, Text, line[:indention_size])]))
165 code += line[indention_size:] 165 code += line[indention_size:]
166 else: 166 else:
167 code += line 167 code += line
168 for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): 168 yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
169 yield item
170 169
171 # from docutils.parsers.rst.states 170 # from docutils.parsers.rst.states
172 closers = u'\'")]}>\u2019\u201d\xbb!?' 171 closers = '\'")]}>\u2019\u201d\xbb!?'
173 unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0' 172 unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
174 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' 173 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
175 % (re.escape(unicode_delimiters), 174 % (re.escape(unicode_delimiters),
176 re.escape(closers))) 175 re.escape(closers)))
177 176
178 tokens = { 177 tokens = {
202 # Line blocks 201 # Line blocks
203 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', 202 (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)',
204 bygroups(Text, Operator, using(this, state='inline'))), 203 bygroups(Text, Operator, using(this, state='inline'))),
205 # Sourcecode directives 204 # Sourcecode directives
206 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' 205 (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
207 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', 206 r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
208 _handle_sourcecode), 207 _handle_sourcecode),
209 # A directive 208 # A directive
210 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', 209 (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
211 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, 210 bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
212 using(this, state='inline'))), 211 using(this, state='inline'))),
227 bygroups(Text, Name.Class, Text)), 226 bygroups(Text, Name.Class, Text)),
228 # Definition list 227 # Definition list
229 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', 228 (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
230 bygroups(using(this, state='inline'), using(this, state='inline'))), 229 bygroups(using(this, state='inline'), using(this, state='inline'))),
231 # Code blocks 230 # Code blocks
232 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)', 231 (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
233 bygroups(String.Escape, Text, String, String, Text, String)), 232 bygroups(String.Escape, Text, String, String, Text, String)),
234 include('inline'), 233 include('inline'),
235 ], 234 ],
236 'inline': [ 235 'inline': [
237 (r'\\.', Text), # escape 236 (r'\\.', Text), # escape
460 aliases = ['xul+mozpreproc'] 459 aliases = ['xul+mozpreproc']
461 filenames = ['*.xul.in'] 460 filenames = ['*.xul.in']
462 mimetypes = [] 461 mimetypes = []
463 462
464 def __init__(self, **options): 463 def __init__(self, **options):
465 super(MozPreprocXulLexer, self).__init__( 464 super().__init__(XmlLexer, MozPreprocHashLexer, **options)
466 XmlLexer, MozPreprocHashLexer, **options)
467 465
468 466
469 class MozPreprocJavascriptLexer(DelegatingLexer): 467 class MozPreprocJavascriptLexer(DelegatingLexer):
470 """ 468 """
471 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 469 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
477 aliases = ['javascript+mozpreproc'] 475 aliases = ['javascript+mozpreproc']
478 filenames = ['*.js.in'] 476 filenames = ['*.js.in']
479 mimetypes = [] 477 mimetypes = []
480 478
481 def __init__(self, **options): 479 def __init__(self, **options):
482 super(MozPreprocJavascriptLexer, self).__init__( 480 super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
483 JavascriptLexer, MozPreprocHashLexer, **options)
484 481
485 482
486 class MozPreprocCssLexer(DelegatingLexer): 483 class MozPreprocCssLexer(DelegatingLexer):
487 """ 484 """
488 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the 485 Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
494 aliases = ['css+mozpreproc'] 491 aliases = ['css+mozpreproc']
495 filenames = ['*.css.in'] 492 filenames = ['*.css.in']
496 mimetypes = [] 493 mimetypes = []
497 494
498 def __init__(self, **options): 495 def __init__(self, **options):
499 super(MozPreprocCssLexer, self).__init__( 496 super().__init__(CssLexer, MozPreprocPercentLexer, **options)
500 CssLexer, MozPreprocPercentLexer, **options)
501 497
502 498
503 class MarkdownLexer(RegexLexer): 499 class MarkdownLexer(RegexLexer):
504 """ 500 """
505 For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup. 501 For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
506 502
507 .. versionadded:: 2.2 503 .. versionadded:: 2.2
508 """ 504 """
509 name = 'markdown' 505 name = 'markdown'
510 aliases = ['md'] 506 aliases = ['md']
511 filenames = ['*.md'] 507 filenames = ['*.md', '*.markdown']
512 mimetypes = ["text/x-markdown"] 508 mimetypes = ["text/x-markdown"]
513 flags = re.MULTILINE 509 flags = re.MULTILINE
514 510
515 def _handle_codeblock(self, match): 511 def _handle_codeblock(self, match):
516 """ 512 """
517 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks 513 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
518 """ 514 """
519 from pygments.lexers import get_lexer_by_name 515 from pygments.lexers import get_lexer_by_name
520 516
521 # section header 517 # section header
522 yield match.start(1), String , match.group(1) 518 yield match.start(1), String.Backtick, match.group(1)
523 yield match.start(2), String , match.group(2) 519 yield match.start(2), String.Backtick, match.group(2)
524 yield match.start(3), Text , match.group(3) 520 yield match.start(3), Text , match.group(3)
525 521
526 # lookup lexer if wanted and existing 522 # lookup lexer if wanted and existing
527 lexer = None 523 lexer = None
528 if self.handlecodeblocks: 524 if self.handlecodeblocks:
529 try: 525 try:
534 530
535 # no lexer for this language. handle it like it was a code block 531 # no lexer for this language. handle it like it was a code block
536 if lexer is None: 532 if lexer is None:
537 yield match.start(4), String, code 533 yield match.start(4), String, code
538 else: 534 else:
539 for item in do_insertions([], lexer.get_tokens_unprocessed(code)): 535 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
540 yield item 536
541 537 yield match.start(5), String.Backtick, match.group(5)
542 yield match.start(5), String , match.group(5) 538
543 539 tokens = {
544 tokens = { 540 'root': [
545 'root': [ 541 # heading with '#' prefix (atx-style)
546 # heading with pound prefix 542 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
547 (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)), 543 # subheading with '#' prefix (atx-style)
548 (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), 544 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
545 # heading with '=' underlines (Setext-style)
546 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
547 # subheading with '-' underlines (Setext-style)
548 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
549 # task list 549 # task list
550 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', 550 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
551 bygroups(Text, Keyword, Keyword, using(this, state='inline'))), 551 bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
552 # bulleted lists 552 # bulleted list
553 (r'^(\s*)([*-])(\s)(.+\n)', 553 (r'^(\s*)([*-])(\s)(.+\n)',
554 bygroups(Text, Keyword, Text, using(this, state='inline'))), 554 bygroups(Text, Keyword, Text, using(this, state='inline'))),
555 # numbered lists 555 # numbered list
556 (r'^(\s*)([0-9]+\.)( .+\n)', 556 (r'^(\s*)([0-9]+\.)( .+\n)',
557 bygroups(Text, Keyword, using(this, state='inline'))), 557 bygroups(Text, Keyword, using(this, state='inline'))),
558 # quote 558 # quote
559 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), 559 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
560 # code block fenced by 3 backticks
561 (r'^(\s*```\n(.+\n)+\s*```$)', String.Backtick),
562 # code block with language
563 (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$)', _handle_codeblock),
564 # code block indented with 4 spaces or 1 tab
565 (r'(\n\n)((\ {4}|\t)(.+\n)+)', bygroups(Text, String.Backtick)),
566
567 include('inline'),
568 ],
569 'inline': [
570 # escape
571 (r'\\.', Text),
572 # inline code
573 (r'([^`])(`[^`\n]+`)', bygroups(Text, String.Backtick)),
574 # warning: the following rules eat outer tags.
575 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
576 # bold fenced by '**'
577 (r'(\*\*[^* \n][^*\n]*\*\*)', bygroups(Generic.Strong)),
578 # # bold fenced by '__'
579 (r'(\_\_[^_ \n][^_\n]*\_\_)', bygroups(Generic.Strong)),
580 # italics fenced by '*'
581 (r'(\*[^* \n][^*\n]*\*)', bygroups(Generic.Emph)),
582 # italics fenced by '_'
583 (r'(\_[^_ \n][^_\n]*\_)', bygroups(Generic.Emph)),
584 # strikethrough
585 (r'([^~]*)(~~[^~]+~~)', bygroups(Text, Generic.Deleted)),
586 # mentions and topics (twitter and github stuff)
587 (r'[@#][\w/:]+', Name.Entity),
588 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
589 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
590 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
591 # reference-style links, e.g.:
592 # [an example][id]
593 # [id]: http://example.com/
594 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
595 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
596 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
597 bygroups(Text, Name.Label, Text, Name.Attribute)),
598
599 # general text, must come last!
600 (r'[^\\\s]+', Text),
601 (r'.', Text),
602 ],
603 }
604
605 def __init__(self, **options):
606 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
607 RegexLexer.__init__(self, **options)
608
609
610 class TiddlyWiki5Lexer(RegexLexer):
611 """
612 For `TiddlyWiki5 <https://tiddlywiki.com/#TiddlerFiles>`_ markup.
613
614 .. versionadded:: 2.7
615 """
616 name = 'tiddler'
617 aliases = ['tid']
618 filenames = ['*.tid']
619 mimetypes = ["text/vnd.tiddlywiki"]
620 flags = re.MULTILINE
621
622 def _handle_codeblock(self, match):
623 """
624 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
625 """
626 from pygments.lexers import get_lexer_by_name
627
628 # section header
629 yield match.start(1), String, match.group(1)
630 yield match.start(2), String, match.group(2)
631 yield match.start(3), Text, match.group(3)
632
633 # lookup lexer if wanted and existing
634 lexer = None
635 if self.handlecodeblocks:
636 try:
637 lexer = get_lexer_by_name(match.group(2).strip())
638 except ClassNotFound:
639 pass
640 code = match.group(4)
641
642 # no lexer for this language. handle it like it was a code block
643 if lexer is None:
644 yield match.start(4), String, code
645 return
646
647 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
648
649 yield match.start(5), String, match.group(5)
650
651 def _handle_cssblock(self, match):
652 """
653 match args: 1:style tag 2:newline, 3:code, 4:closing style tag
654 """
655 from pygments.lexers import get_lexer_by_name
656
657 # section header
658 yield match.start(1), String, match.group(1)
659 yield match.start(2), String, match.group(2)
660
661 lexer = None
662 if self.handlecodeblocks:
663 try:
664 lexer = get_lexer_by_name('css')
665 except ClassNotFound:
666 pass
667 code = match.group(3)
668
669 # no lexer for this language. handle it like it was a code block
670 if lexer is None:
671 yield match.start(3), String, code
672 return
673
674 yield from do_insertions([], lexer.get_tokens_unprocessed(code))
675
676 yield match.start(4), String, match.group(4)
677
678 tokens = {
679 'root': [
680 # title in metadata section
681 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
682 # headings
683 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
684 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
685 # bulleted or numbered lists or single-line block quotes
686 # (can be mixed)
687 (r'^(\s*)([*#>]+)(\s*)(.+\n)',
688 bygroups(Text, Keyword, Text, using(this, state='inline'))),
689 # multi-line block quotes
690 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
691 # table header
692 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
693 # table footer or caption
694 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
695 # table class
696 (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
697 # definitions
698 (r'^(;.*)$', bygroups(Generic.Strong)),
560 # text block 699 # text block
561 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), 700 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
562 # code block with language 701 # code block with language
563 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), 702 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
564 703 # CSS style block
704 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
705
706 include('keywords'),
565 include('inline'), 707 include('inline'),
708 ],
709 'keywords': [
710 (words((
711 '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
712 'title', 'type'), prefix=r'^', suffix=r'\b'),
713 Keyword),
566 ], 714 ],
567 'inline': [ 715 'inline': [
568 # escape 716 # escape
569 (r'\\.', Text), 717 (r'\\.', Text),
718 # created or modified date
719 (r'\d{17}', Number.Integer),
570 # italics 720 # italics
571 (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)), 721 (r'(\s)(//[^/]+//)((?=\W|\n))',
722 bygroups(Text, Generic.Emph, Text)),
723 # superscript
724 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
725 # subscript
726 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
727 # underscore
728 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
572 # bold 729 # bold
573 # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics 730 (r"(\s)(''[^']+'')((?=\W|\n))",
574 (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)), 731 bygroups(Text, Generic.Strong, Text)),
575 # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
576 # strikethrough 732 # strikethrough
577 (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)), 733 (r'(\s)(~~[^~]+~~)((?=\W|\n))',
734 bygroups(Text, Generic.Deleted, Text)),
735 # TiddlyWiki variables
736 (r'<<[^>]+>>', Name.Tag),
737 (r'\$\$[^$]+\$\$', Name.Tag),
738 (r'\$\([^)]+\)\$', Name.Tag),
739 # TiddlyWiki style or class
740 (r'^@@.*$', Name.Tag),
741 # HTML tags
742 (r'</?[^>]+>', Name.Tag),
578 # inline code 743 # inline code
579 (r'`[^`]+`', String.Backtick), 744 (r'`[^`]+`', String.Backtick),
580 # mentions and topics (twitter and github stuff) 745 # HTML escaped symbols
581 (r'[@#][\w/:]+', Name.Entity), 746 (r'&\S*?;', String.Regex),
582 # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png) 747 # Wiki links
583 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), 748 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
584 # reference-style links, e.g.: 749 # External links
585 # [an example][id] 750 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
586 # [id]: http://example.com/ 751 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
587 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), 752 # Transclusion
588 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', bygroups(Text, Name.Label, Text, Name.Attribute)), 753 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
754 # URLs
755 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
589 756
590 # general text, must come last! 757 # general text, must come last!
591 (r'[^\\\s]+', Text), 758 (r'[\w]+', Text),
592 (r'.', Text), 759 (r'.', Text)
593 ], 760 ],
594 } 761 }
595 762
596 def __init__(self, **options): 763 def __init__(self, **options):
597 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) 764 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)

eric ide

mercurial