534 |
530 |
535 # no lexer for this language. handle it like it was a code block |
531 # no lexer for this language. handle it like it was a code block |
536 if lexer is None: |
532 if lexer is None: |
537 yield match.start(4), String, code |
533 yield match.start(4), String, code |
538 else: |
534 else: |
539 for item in do_insertions([], lexer.get_tokens_unprocessed(code)): |
535 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) |
540 yield item |
536 |
541 |
537 yield match.start(5), String.Backtick, match.group(5) |
542 yield match.start(5), String , match.group(5) |
538 |
543 |
539 tokens = { |
544 tokens = { |
540 'root': [ |
545 'root': [ |
541 # heading with '#' prefix (atx-style) |
546 # heading with pound prefix |
542 (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), |
547 (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)), |
543 # subheading with '#' prefix (atx-style) |
548 (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), |
544 (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), |
|
545 # heading with '=' underlines (Setext-style) |
|
546 (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), |
|
547 # subheading with '-' underlines (Setext-style) |
|
548 (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), |
549 # task list |
549 # task list |
550 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', |
550 (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', |
551 bygroups(Text, Keyword, Keyword, using(this, state='inline'))), |
551 bygroups(Text, Keyword, Keyword, using(this, state='inline'))), |
552 # bulleted lists |
552 # bulleted list |
553 (r'^(\s*)([*-])(\s)(.+\n)', |
553 (r'^(\s*)([*-])(\s)(.+\n)', |
554 bygroups(Text, Keyword, Text, using(this, state='inline'))), |
554 bygroups(Text, Keyword, Text, using(this, state='inline'))), |
555 # numbered lists |
555 # numbered list |
556 (r'^(\s*)([0-9]+\.)( .+\n)', |
556 (r'^(\s*)([0-9]+\.)( .+\n)', |
557 bygroups(Text, Keyword, using(this, state='inline'))), |
557 bygroups(Text, Keyword, using(this, state='inline'))), |
558 # quote |
558 # quote |
559 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), |
559 (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), |
|
560 # code block fenced by 3 backticks |
|
561 (r'^(\s*```\n(.+\n)+\s*```$)', String.Backtick), |
|
562 # code block with language |
|
563 (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$)', _handle_codeblock), |
|
564 # code block indented with 4 spaces or 1 tab |
|
565 (r'(\n\n)((\ {4}|\t)(.+\n)+)', bygroups(Text, String.Backtick)), |
|
566 |
|
567 include('inline'), |
|
568 ], |
|
569 'inline': [ |
|
570 # escape |
|
571 (r'\\.', Text), |
|
572 # inline code |
|
573 (r'([^`])(`[^`\n]+`)', bygroups(Text, String.Backtick)), |
|
574 # warning: the following rules eat outer tags. |
|
575 # eg. **foo _bar_ baz** => foo and baz are not recognized as bold |
|
576 # bold fenced by '**' |
|
577 (r'(\*\*[^* \n][^*\n]*\*\*)', bygroups(Generic.Strong)), |
|
578 # # bold fenced by '__' |
|
579 (r'(\_\_[^_ \n][^_\n]*\_\_)', bygroups(Generic.Strong)), |
|
580 # italics fenced by '*' |
|
581 (r'(\*[^* \n][^*\n]*\*)', bygroups(Generic.Emph)), |
|
582 # italics fenced by '_' |
|
583 (r'(\_[^_ \n][^_\n]*\_)', bygroups(Generic.Emph)), |
|
584 # strikethrough |
|
585 (r'([^~]*)(~~[^~]+~~)', bygroups(Text, Generic.Deleted)), |
|
586 # mentions and topics (twitter and github stuff) |
|
587 (r'[@#][\w/:]+', Name.Entity), |
|
588 # (image?) links eg:  |
|
589 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', |
|
590 bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), |
|
591 # reference-style links, e.g.: |
|
592 # [an example][id] |
|
593 # [id]: http://example.com/ |
|
594 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', |
|
595 bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), |
|
596 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', |
|
597 bygroups(Text, Name.Label, Text, Name.Attribute)), |
|
598 |
|
599 # general text, must come last! |
|
600 (r'[^\\\s]+', Text), |
|
601 (r'.', Text), |
|
602 ], |
|
603 } |
|
604 |
|
605 def __init__(self, **options): |
|
606 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
|
607 RegexLexer.__init__(self, **options) |
|
608 |
|
609 |
|
610 class TiddlyWiki5Lexer(RegexLexer): |
|
611 """ |
|
612 For `TiddlyWiki5 <https://tiddlywiki.com/#TiddlerFiles>`_ markup. |
|
613 |
|
614 .. versionadded:: 2.7 |
|
615 """ |
|
616 name = 'tiddler' |
|
617 aliases = ['tid'] |
|
618 filenames = ['*.tid'] |
|
619 mimetypes = ["text/vnd.tiddlywiki"] |
|
620 flags = re.MULTILINE |
|
621 |
|
622 def _handle_codeblock(self, match): |
|
623 """ |
|
624 match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks |
|
625 """ |
|
626 from pygments.lexers import get_lexer_by_name |
|
627 |
|
628 # section header |
|
629 yield match.start(1), String, match.group(1) |
|
630 yield match.start(2), String, match.group(2) |
|
631 yield match.start(3), Text, match.group(3) |
|
632 |
|
633 # lookup lexer if wanted and existing |
|
634 lexer = None |
|
635 if self.handlecodeblocks: |
|
636 try: |
|
637 lexer = get_lexer_by_name(match.group(2).strip()) |
|
638 except ClassNotFound: |
|
639 pass |
|
640 code = match.group(4) |
|
641 |
|
642 # no lexer for this language. handle it like it was a code block |
|
643 if lexer is None: |
|
644 yield match.start(4), String, code |
|
645 return |
|
646 |
|
647 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) |
|
648 |
|
649 yield match.start(5), String, match.group(5) |
|
650 |
|
651 def _handle_cssblock(self, match): |
|
652 """ |
|
653 match args: 1:style tag 2:newline, 3:code, 4:closing style tag |
|
654 """ |
|
655 from pygments.lexers import get_lexer_by_name |
|
656 |
|
657 # section header |
|
658 yield match.start(1), String, match.group(1) |
|
659 yield match.start(2), String, match.group(2) |
|
660 |
|
661 lexer = None |
|
662 if self.handlecodeblocks: |
|
663 try: |
|
664 lexer = get_lexer_by_name('css') |
|
665 except ClassNotFound: |
|
666 pass |
|
667 code = match.group(3) |
|
668 |
|
669 # no lexer for this language. handle it like it was a code block |
|
670 if lexer is None: |
|
671 yield match.start(3), String, code |
|
672 return |
|
673 |
|
674 yield from do_insertions([], lexer.get_tokens_unprocessed(code)) |
|
675 |
|
676 yield match.start(4), String, match.group(4) |
|
677 |
|
678 tokens = { |
|
679 'root': [ |
|
680 # title in metadata section |
|
681 (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), |
|
682 # headings |
|
683 (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), |
|
684 (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), |
|
685 # bulleted or numbered lists or single-line block quotes |
|
686 # (can be mixed) |
|
687 (r'^(\s*)([*#>]+)(\s*)(.+\n)', |
|
688 bygroups(Text, Keyword, Text, using(this, state='inline'))), |
|
689 # multi-line block quotes |
|
690 (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), |
|
691 # table header |
|
692 (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), |
|
693 # table footer or caption |
|
694 (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), |
|
695 # table class |
|
696 (r'^(\|.*?\|k)$', bygroups(Name.Tag)), |
|
697 # definitions |
|
698 (r'^(;.*)$', bygroups(Generic.Strong)), |
560 # text block |
699 # text block |
561 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), |
700 (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), |
562 # code block with language |
701 # code block with language |
563 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), |
702 (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), |
564 |
703 # CSS style block |
|
704 (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), |
|
705 |
|
706 include('keywords'), |
565 include('inline'), |
707 include('inline'), |
|
708 ], |
|
709 'keywords': [ |
|
710 (words(( |
|
711 '\\define', '\\end', 'caption', 'created', 'modified', 'tags', |
|
712 'title', 'type'), prefix=r'^', suffix=r'\b'), |
|
713 Keyword), |
566 ], |
714 ], |
567 'inline': [ |
715 'inline': [ |
568 # escape |
716 # escape |
569 (r'\\.', Text), |
717 (r'\\.', Text), |
|
718 # created or modified date |
|
719 (r'\d{17}', Number.Integer), |
570 # italics |
720 # italics |
571 (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)), |
721 (r'(\s)(//[^/]+//)((?=\W|\n))', |
|
722 bygroups(Text, Generic.Emph, Text)), |
|
723 # superscript |
|
724 (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), |
|
725 # subscript |
|
726 (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), |
|
727 # underscore |
|
728 (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), |
572 # bold |
729 # bold |
573 # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics |
730 (r"(\s)(''[^']+'')((?=\W|\n))", |
574 (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)), |
731 bygroups(Text, Generic.Strong, Text)), |
575 # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)), |
|
576 # strikethrough |
732 # strikethrough |
577 (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)), |
733 (r'(\s)(~~[^~]+~~)((?=\W|\n))', |
|
734 bygroups(Text, Generic.Deleted, Text)), |
|
735 # TiddlyWiki variables |
|
736 (r'<<[^>]+>>', Name.Tag), |
|
737 (r'\$\$[^$]+\$\$', Name.Tag), |
|
738 (r'\$\([^)]+\)\$', Name.Tag), |
|
739 # TiddlyWiki style or class |
|
740 (r'^@@.*$', Name.Tag), |
|
741 # HTML tags |
|
742 (r'</?[^>]+>', Name.Tag), |
578 # inline code |
743 # inline code |
579 (r'`[^`]+`', String.Backtick), |
744 (r'`[^`]+`', String.Backtick), |
580 # mentions and topics (twitter and github stuff) |
745 # HTML escaped symbols |
581 (r'[@#][\w/:]+', Name.Entity), |
746 (r'&\S*?;', String.Regex), |
582 # (image?) links eg:  |
747 # Wiki links |
583 (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), |
748 (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), |
584 # reference-style links, e.g.: |
749 # External links |
585 # [an example][id] |
750 (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', |
586 # [id]: http://example.com/ |
751 bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), |
587 (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), |
752 # Transclusion |
588 (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', bygroups(Text, Name.Label, Text, Name.Attribute)), |
753 (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), |
|
754 # URLs |
|
755 (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), |
589 |
756 |
590 # general text, must come last! |
757 # general text, must come last! |
591 (r'[^\\\s]+', Text), |
758 (r'[\w]+', Text), |
592 (r'.', Text), |
759 (r'.', Text) |
593 ], |
760 ], |
594 } |
761 } |
595 |
762 |
596 def __init__(self, **options): |
763 def __init__(self, **options): |
597 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |
764 self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) |