diff -r 48d210e41c65 -r 54c5cfbb1e29 eric6/ThirdParty/Pygments/pygments/lexers/markup.py --- a/eric6/ThirdParty/Pygments/pygments/lexers/markup.py Thu Jan 14 18:06:56 2021 +0100 +++ b/eric6/ThirdParty/Pygments/pygments/lexers/markup.py Thu Jan 14 18:14:15 2021 +0100 @@ -1,765 +1,763 @@ -# -*- coding: utf-8 -*- -""" - pygments.lexers.markup - ~~~~~~~~~~~~~~~~~~~~~~ - - Lexers for non-HTML markup languages. - - :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re - -from pygments.lexers.html import HtmlLexer, XmlLexer -from pygments.lexers.javascript import JavascriptLexer -from pygments.lexers.css import CssLexer - -from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ - using, this, do_insertions, default, words -from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Generic, Other -from pygments.util import get_bool_opt, ClassNotFound - -__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', - 'MozPreprocHashLexer', 'MozPreprocPercentLexer', - 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', - 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer'] - - -class BBCodeLexer(RegexLexer): - """ - A lexer that highlights BBCode(-like) syntax. - - .. versionadded:: 0.6 - """ - - name = 'BBCode' - aliases = ['bbcode'] - mimetypes = ['text/x-bbcode'] - - tokens = { - 'root': [ - (r'[^[]+', Text), - # tag/end tag begin - (r'\[/?\w+', Keyword, 'tag'), - # stray bracket - (r'\[', Text), - ], - 'tag': [ - (r'\s+', Text), - # attribute with value - (r'(\w+)(=)("?[^\s"\]]+"?)', - bygroups(Name.Attribute, Operator, String)), - # tag argument (a la [color=green]) - (r'(=)("?[^\s"\]]+"?)', - bygroups(Operator, String)), - # tag end - (r'\]', Keyword, '#pop'), - ], - } - - -class MoinWikiLexer(RegexLexer): - """ - For MoinMoin (and Trac) Wiki markup. - - .. versionadded:: 0.7 - """ - - name = 'MoinMoin/Trac Wiki markup' - aliases = ['trac-wiki', 'moin'] - filenames = [] - mimetypes = ['text/x-trac-wiki'] - flags = re.MULTILINE | re.IGNORECASE - - tokens = { - 'root': [ - (r'^#.*$', Comment), - (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next - # Titles - (r'^(=+)([^=]+)(=+)(\s*#.+)?$', - bygroups(Generic.Heading, using(this), Generic.Heading, String)), - # Literal code blocks, with optional shebang - (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), - (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting - # Lists - (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), - (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), - # Other Formatting - (r'\[\[\w+.*?\]\]', Keyword), # Macro - (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', - bygroups(Keyword, String, Keyword)), # Link - (r'^----+$', Keyword), # Horizontal rules - (r'[^\n\'\[{!_~^,|]+', Text), - (r'\n', Text), - (r'.', Text), - ], - 'codeblock': [ - (r'\}\}\}', Name.Builtin, '#pop'), - # these blocks are allowed to be nested in Trac, but not MoinMoin - (r'\{\{\{', Text, '#push'), - (r'[^{}]+', Comment.Preproc), # slurp boring text - (r'.', Comment.Preproc), # allow loose { or } - ], - } - - -class RstLexer(RegexLexer): - """ - For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. - - .. versionadded:: 0.7 - - Additional options accepted: - - `handlecodeblocks` - Highlight the contents of ``.. sourcecode:: language``, - ``.. code:: language`` and ``.. code-block:: language`` - directives with a lexer for the given language (default: - ``True``). - - .. versionadded:: 0.8 - """ - name = 'reStructuredText' - aliases = ['rst', 'rest', 'restructuredtext'] - filenames = ['*.rst', '*.rest'] - mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] - flags = re.MULTILINE - - def _handle_sourcecode(self, match): - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), Punctuation, match.group(1) - yield match.start(2), Text, match.group(2) - yield match.start(3), Operator.Word, match.group(3) - yield match.start(4), Punctuation, match.group(4) - yield match.start(5), Text, match.group(5) - yield match.start(6), Keyword, match.group(6) - yield match.start(7), Text, match.group(7) - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name(match.group(6).strip()) - except ClassNotFound: - pass - indention = match.group(8) - indention_size = len(indention) - code = (indention + match.group(9) + match.group(10) + match.group(11)) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(8), String, code - return - - # highlight the lines with the lexer. - ins = [] - codelines = code.splitlines(True) - code = '' - for line in codelines: - if len(line) > indention_size: - ins.append((len(code), [(0, Text, line[:indention_size])])) - code += line[indention_size:] - else: - code += line - yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) - - # from docutils.parsers.rst.states - closers = '\'")]}>\u2019\u201d\xbb!?' - unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' - end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' - % (re.escape(unicode_delimiters), - re.escape(closers))) - - tokens = { - 'root': [ - # Heading with overline - (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' - r'(.+)(\n)(\1)(\n)', - bygroups(Generic.Heading, Text, Generic.Heading, - Text, Generic.Heading, Text)), - # Plain heading - (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' - r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', - bygroups(Generic.Heading, Text, Generic.Heading, Text)), - # Bulleted lists - (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - # Numbered lists - (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', - bygroups(Text, Number, using(this, state='inline'))), - # Numbered, but keep words at BOL from becoming lists - (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', - bygroups(Text, Number, using(this, state='inline'))), - (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', - bygroups(Text, Number, using(this, state='inline'))), - # Line blocks - (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', - bygroups(Text, Operator, using(this, state='inline'))), - # Sourcecode directives - (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' - r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', - _handle_sourcecode), - # A directive - (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', - bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, - using(this, state='inline'))), - # A reference target - (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', - bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A footnote/citation target - (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', - bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), - # A substitution def - (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', - bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, - Punctuation, Text, using(this, state='inline'))), - # Comments - (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), - # Field list marker - (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', - bygroups(Text, Name.Class, Text)), - # Definition list - (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', - bygroups(using(this, state='inline'), using(this, state='inline'))), - # Code blocks - (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', - bygroups(String.Escape, Text, String, String, Text, String)), - include('inline'), - ], - 'inline': [ - (r'\\.', Text), # escape - (r'``', String, 'literal'), # code - (r'(`.+?)(<.+?>)(`__?)', # reference with inline target - bygroups(String, String.Interpol, String)), - (r'`.+?`__?', String), # reference - (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', - bygroups(Name.Variable, Name.Attribute)), # role - (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', - bygroups(Name.Attribute, Name.Variable)), # role (content first) - (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis - (r'\*.+?\*', Generic.Emph), # Emphasis - (r'\[.*?\]_', String), # Footnote or citation - (r'<.+?>', Name.Tag), # Hyperlink - (r'[^\\\n\[*`:]+', Text), - (r'.', Text), - ], - 'literal': [ - (r'[^`]+', String), - (r'``' + end_string_suffix, String, '#pop'), - (r'`', String), - ] - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) - - def analyse_text(text): - if text[:2] == '..' and text[2:3] != '.': - return 0.3 - p1 = text.find("\n") - p2 = text.find("\n", p1 + 1) - if (p2 > -1 and # has two lines - p1 * 2 + 1 == p2 and # they are the same length - text[p1+1] in '-=' and # the next line both starts and ends with - text[p1+1] == text[p2-1]): # ...a sufficiently high header - return 0.5 - - -class TexLexer(RegexLexer): - """ - Lexer for the TeX and LaTeX typesetting languages. - """ - - name = 'TeX' - aliases = ['tex', 'latex'] - filenames = ['*.tex', '*.aux', '*.toc'] - mimetypes = ['text/x-tex', 'text/x-latex'] - - tokens = { - 'general': [ - (r'%.*?\n', Comment), - (r'[{}]', Name.Builtin), - (r'[&_^]', Name.Builtin), - ], - 'root': [ - (r'\\\[', String.Backtick, 'displaymath'), - (r'\\\(', String, 'inlinemath'), - (r'\$\$', String.Backtick, 'displaymath'), - (r'\$', String, 'inlinemath'), - (r'\\([a-zA-Z]+|.)', Keyword, 'command'), - (r'\\$', Keyword), - include('general'), - (r'[^\\$%&_^{}]+', Text), - ], - 'math': [ - (r'\\([a-zA-Z]+|.)', Name.Variable), - include('general'), - (r'[0-9]+', Number), - (r'[-=!+*/()\[\]]', Operator), - (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), - ], - 'inlinemath': [ - (r'\\\)', String, '#pop'), - (r'\$', String, '#pop'), - include('math'), - ], - 'displaymath': [ - (r'\\\]', String, '#pop'), - (r'\$\$', String, '#pop'), - (r'\$', Name.Builtin), - include('math'), - ], - 'command': [ - (r'\[.*?\]', Name.Attribute), - (r'\*', Keyword), - default('#pop'), - ], - } - - def analyse_text(text): - for start in ("\\documentclass", "\\input", "\\documentstyle", - "\\relax"): - if text[:len(start)] == start: - return True - - -class GroffLexer(RegexLexer): - """ - Lexer for the (g)roff typesetting language, supporting groff - extensions. Mainly useful for highlighting manpage sources. - - .. versionadded:: 0.6 - """ - - name = 'Groff' - aliases = ['groff', 'nroff', 'man'] - filenames = ['*.[1234567]', '*.man'] - mimetypes = ['application/x-troff', 'text/troff'] - - tokens = { - 'root': [ - (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), - (r'\.', Punctuation, 'request'), - # Regular characters, slurp till we find a backslash or newline - (r'[^\\\n]+', Text, 'textline'), - default('textline'), - ], - 'textline': [ - include('escapes'), - (r'[^\\\n]+', Text), - (r'\n', Text, '#pop'), - ], - 'escapes': [ - # groff has many ways to write escapes. - (r'\\"[^\n]*', Comment), - (r'\\[fn]\w', String.Escape), - (r'\\\(.{2}', String.Escape), - (r'\\.\[.*\]', String.Escape), - (r'\\.', String.Escape), - (r'\\\n', Text, 'request'), - ], - 'request': [ - (r'\n', Text, '#pop'), - include('escapes'), - (r'"[^\n"]+"', String.Double), - (r'\d+', Number), - (r'\S+', String), - (r'\s+', Text), - ], - } - - def analyse_text(text): - if text[:1] != '.': - return False - if text[:3] == '.\\"': - return True - if text[:4] == '.TH ': - return True - if text[1:3].isalnum() and text[3].isspace(): - return 0.9 - - -class MozPreprocHashLexer(RegexLexer): - """ - Lexer for Mozilla Preprocessor files (with '#' as the marker). - - Other data is left untouched. - - .. versionadded:: 2.0 - """ - name = 'mozhashpreproc' - aliases = [name] - filenames = [] - mimetypes = [] - - tokens = { - 'root': [ - (r'^#', Comment.Preproc, ('expr', 'exprstart')), - (r'.+', Other), - ], - 'exprstart': [ - (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), - (words(( - 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', - 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', - 'include', 'includesubst', 'error')), - Comment.Preproc, '#pop'), - ], - 'expr': [ - (words(('!', '!=', '==', '&&', '||')), Operator), - (r'(defined)(\()', bygroups(Keyword, Punctuation)), - (r'\)', Punctuation), - (r'[0-9]+', Number.Decimal), - (r'__\w+?__', Name.Variable), - (r'@\w+?@', Name.Class), - (r'\w+', Name), - (r'\n', Text, '#pop'), - (r'\s+', Text), - (r'\S', Punctuation), - ], - } - - -class MozPreprocPercentLexer(MozPreprocHashLexer): - """ - Lexer for Mozilla Preprocessor files (with '%' as the marker). - - Other data is left untouched. - - .. versionadded:: 2.0 - """ - name = 'mozpercentpreproc' - aliases = [name] - filenames = [] - mimetypes = [] - - tokens = { - 'root': [ - (r'^%', Comment.Preproc, ('expr', 'exprstart')), - (r'.+', Other), - ], - } - - -class MozPreprocXulLexer(DelegatingLexer): - """ - Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the - `XmlLexer`. - - .. versionadded:: 2.0 - """ - name = "XUL+mozpreproc" - aliases = ['xul+mozpreproc'] - filenames = ['*.xul.in'] - mimetypes = [] - - def __init__(self, **options): - super().__init__(XmlLexer, MozPreprocHashLexer, **options) - - -class MozPreprocJavascriptLexer(DelegatingLexer): - """ - Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the - `JavascriptLexer`. - - .. versionadded:: 2.0 - """ - name = "Javascript+mozpreproc" - aliases = ['javascript+mozpreproc'] - filenames = ['*.js.in'] - mimetypes = [] - - def __init__(self, **options): - super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) - - -class MozPreprocCssLexer(DelegatingLexer): - """ - Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the - `CssLexer`. - - .. versionadded:: 2.0 - """ - name = "CSS+mozpreproc" - aliases = ['css+mozpreproc'] - filenames = ['*.css.in'] - mimetypes = [] - - def __init__(self, **options): - super().__init__(CssLexer, MozPreprocPercentLexer, **options) - - -class MarkdownLexer(RegexLexer): - """ - For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup. - - .. versionadded:: 2.2 - """ - name = 'markdown' - aliases = ['md'] - filenames = ['*.md', '*.markdown'] - mimetypes = ["text/x-markdown"] - flags = re.MULTILINE - - def _handle_codeblock(self, match): - """ - match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks - """ - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), String.Backtick, match.group(1) - yield match.start(2), String.Backtick, match.group(2) - yield match.start(3), Text , match.group(3) - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name( match.group(2).strip() ) - except ClassNotFound: - pass - code = match.group(4) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(4), String, code - else: - yield from do_insertions([], lexer.get_tokens_unprocessed(code)) - - yield match.start(5), String.Backtick, match.group(5) - - tokens = { - 'root': [ - # heading with '#' prefix (atx-style) - (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), - # subheading with '#' prefix (atx-style) - (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), - # heading with '=' underlines (Setext-style) - (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), - # subheading with '-' underlines (Setext-style) - (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), - # task list - (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', - bygroups(Text, Keyword, Keyword, using(this, state='inline'))), - # bulleted list - (r'^(\s*)([*-])(\s)(.+\n)', - bygroups(Text, Keyword, Text, using(this, state='inline'))), - # numbered list - (r'^(\s*)([0-9]+\.)( .+\n)', - bygroups(Text, Keyword, using(this, state='inline'))), - # quote - (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), - # code block fenced by 3 backticks - (r'^(\s*```\n(.+\n)+\s*```$)', String.Backtick), - # code block with language - (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$)', _handle_codeblock), - # code block indented with 4 spaces or 1 tab - (r'(\n\n)((\ {4}|\t)(.+\n)+)', bygroups(Text, String.Backtick)), - - include('inline'), - ], - 'inline': [ - # escape - (r'\\.', Text), - # inline code - (r'([^`])(`[^`\n]+`)', bygroups(Text, String.Backtick)), - # warning: the following rules eat outer tags. - # eg. **foo _bar_ baz** => foo and baz are not recognized as bold - # bold fenced by '**' - (r'(\*\*[^* \n][^*\n]*\*\*)', bygroups(Generic.Strong)), - # # bold fenced by '__' - (r'(\_\_[^_ \n][^_\n]*\_\_)', bygroups(Generic.Strong)), - # italics fenced by '*' - (r'(\*[^* \n][^*\n]*\*)', bygroups(Generic.Emph)), - # italics fenced by '_' - (r'(\_[^_ \n][^_\n]*\_)', bygroups(Generic.Emph)), - # strikethrough - (r'([^~]*)(~~[^~]+~~)', bygroups(Text, Generic.Deleted)), - # mentions and topics (twitter and github stuff) - (r'[@#][\w/:]+', Name.Entity), - # (image?) links eg:  - (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', - bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), - # reference-style links, e.g.: - # [an example][id] - # [id]: http://example.com/ - (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', - bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), - (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', - bygroups(Text, Name.Label, Text, Name.Attribute)), - - # general text, must come last! - (r'[^\\\s]+', Text), - (r'.', Text), - ], - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) - - -class TiddlyWiki5Lexer(RegexLexer): - """ - For `TiddlyWiki5 <https://tiddlywiki.com/#TiddlerFiles>`_ markup. - - .. versionadded:: 2.7 - """ - name = 'tiddler' - aliases = ['tid'] - filenames = ['*.tid'] - mimetypes = ["text/vnd.tiddlywiki"] - flags = re.MULTILINE - - def _handle_codeblock(self, match): - """ - match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks - """ - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), String, match.group(1) - yield match.start(2), String, match.group(2) - yield match.start(3), Text, match.group(3) - - # lookup lexer if wanted and existing - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name(match.group(2).strip()) - except ClassNotFound: - pass - code = match.group(4) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(4), String, code - return - - yield from do_insertions([], lexer.get_tokens_unprocessed(code)) - - yield match.start(5), String, match.group(5) - - def _handle_cssblock(self, match): - """ - match args: 1:style tag 2:newline, 3:code, 4:closing style tag - """ - from pygments.lexers import get_lexer_by_name - - # section header - yield match.start(1), String, match.group(1) - yield match.start(2), String, match.group(2) - - lexer = None - if self.handlecodeblocks: - try: - lexer = get_lexer_by_name('css') - except ClassNotFound: - pass - code = match.group(3) - - # no lexer for this language. handle it like it was a code block - if lexer is None: - yield match.start(3), String, code - return - - yield from do_insertions([], lexer.get_tokens_unprocessed(code)) - - yield match.start(4), String, match.group(4) - - tokens = { - 'root': [ - # title in metadata section - (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), - # headings - (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), - (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), - # bulleted or numbered lists or single-line block quotes - # (can be mixed) - (r'^(\s*)([*#>]+)(\s*)(.+\n)', - bygroups(Text, Keyword, Text, using(this, state='inline'))), - # multi-line block quotes - (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), - # table header - (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), - # table footer or caption - (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), - # table class - (r'^(\|.*?\|k)$', bygroups(Name.Tag)), - # definitions - (r'^(;.*)$', bygroups(Generic.Strong)), - # text block - (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), - # code block with language - (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), - # CSS style block - (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), - - include('keywords'), - include('inline'), - ], - 'keywords': [ - (words(( - '\\define', '\\end', 'caption', 'created', 'modified', 'tags', - 'title', 'type'), prefix=r'^', suffix=r'\b'), - Keyword), - ], - 'inline': [ - # escape - (r'\\.', Text), - # created or modified date - (r'\d{17}', Number.Integer), - # italics - (r'(\s)(//[^/]+//)((?=\W|\n))', - bygroups(Text, Generic.Emph, Text)), - # superscript - (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), - # subscript - (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), - # underscore - (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), - # bold - (r"(\s)(''[^']+'')((?=\W|\n))", - bygroups(Text, Generic.Strong, Text)), - # strikethrough - (r'(\s)(~~[^~]+~~)((?=\W|\n))', - bygroups(Text, Generic.Deleted, Text)), - # TiddlyWiki variables - (r'<<[^>]+>>', Name.Tag), - (r'\$\$[^$]+\$\$', Name.Tag), - (r'\$\([^)]+\)\$', Name.Tag), - # TiddlyWiki style or class - (r'^@@.*$', Name.Tag), - # HTML tags - (r'</?[^>]+>', Name.Tag), - # inline code - (r'`[^`]+`', String.Backtick), - # HTML escaped symbols - (r'&\S*?;', String.Regex), - # Wiki links - (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), - # External links - (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', - bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), - # Transclusion - (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), - # URLs - (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), - - # general text, must come last! - (r'[\w]+', Text), - (r'.', Text) - ], - } - - def __init__(self, **options): - self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) - RegexLexer.__init__(self, **options) +# -*- coding: utf-8 -*- +""" + pygments.lexers.markup + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for non-HTML markup languages. + + :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexers.html import HtmlLexer, XmlLexer +from pygments.lexers.javascript import JavascriptLexer +from pygments.lexers.css import CssLexer + +from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ + using, this, do_insertions, default, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic, Other +from pygments.util import get_bool_opt, ClassNotFound + +__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', + 'MozPreprocHashLexer', 'MozPreprocPercentLexer', + 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', + 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer'] + + +class BBCodeLexer(RegexLexer): + """ + A lexer that highlights BBCode(-like) syntax. + + .. versionadded:: 0.6 + """ + + name = 'BBCode' + aliases = ['bbcode'] + mimetypes = ['text/x-bbcode'] + + tokens = { + 'root': [ + (r'[^[]+', Text), + # tag/end tag begin + (r'\[/?\w+', Keyword, 'tag'), + # stray bracket + (r'\[', Text), + ], + 'tag': [ + (r'\s+', Text), + # attribute with value + (r'(\w+)(=)("?[^\s"\]]+"?)', + bygroups(Name.Attribute, Operator, String)), + # tag argument (a la [color=green]) + (r'(=)("?[^\s"\]]+"?)', + bygroups(Operator, String)), + # tag end + (r'\]', Keyword, '#pop'), + ], + } + + +class MoinWikiLexer(RegexLexer): + """ + For MoinMoin (and Trac) Wiki markup. + + .. versionadded:: 0.7 + """ + + name = 'MoinMoin/Trac Wiki markup' + aliases = ['trac-wiki', 'moin'] + filenames = [] + mimetypes = ['text/x-trac-wiki'] + flags = re.MULTILINE | re.IGNORECASE + + tokens = { + 'root': [ + (r'^#.*$', Comment), + (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next + # Titles + (r'^(=+)([^=]+)(=+)(\s*#.+)?$', + bygroups(Generic.Heading, using(this), Generic.Heading, String)), + # Literal code blocks, with optional shebang + (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), + (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting + # Lists + (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), + (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), + # Other Formatting + (r'\[\[\w+.*?\]\]', Keyword), # Macro + (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', + bygroups(Keyword, String, Keyword)), # Link + (r'^----+$', Keyword), # Horizontal rules + (r'[^\n\'\[{!_~^,|]+', Text), + (r'\n', Text), + (r'.', Text), + ], + 'codeblock': [ + (r'\}\}\}', Name.Builtin, '#pop'), + # these blocks are allowed to be nested in Trac, but not MoinMoin + (r'\{\{\{', Text, '#push'), + (r'[^{}]+', Comment.Preproc), # slurp boring text + (r'.', Comment.Preproc), # allow loose { or } + ], + } + + +class RstLexer(RegexLexer): + """ + For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup. + + .. versionadded:: 0.7 + + Additional options accepted: + + `handlecodeblocks` + Highlight the contents of ``.. sourcecode:: language``, + ``.. code:: language`` and ``.. code-block:: language`` + directives with a lexer for the given language (default: + ``True``). + + .. versionadded:: 0.8 + """ + name = 'reStructuredText' + aliases = ['rst', 'rest', 'restructuredtext'] + filenames = ['*.rst', '*.rest'] + mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] + flags = re.MULTILINE + + def _handle_sourcecode(self, match): + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), Punctuation, match.group(1) + yield match.start(2), Text, match.group(2) + yield match.start(3), Operator.Word, match.group(3) + yield match.start(4), Punctuation, match.group(4) + yield match.start(5), Text, match.group(5) + yield match.start(6), Keyword, match.group(6) + yield match.start(7), Text, match.group(7) + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name(match.group(6).strip()) + except ClassNotFound: + pass + indention = match.group(8) + indention_size = len(indention) + code = (indention + match.group(9) + match.group(10) + match.group(11)) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(8), String, code + return + + # highlight the lines with the lexer. + ins = [] + codelines = code.splitlines(True) + code = '' + for line in codelines: + if len(line) > indention_size: + ins.append((len(code), [(0, Text, line[:indention_size])])) + code += line[indention_size:] + else: + code += line + yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) + + # from docutils.parsers.rst.states + closers = '\'")]}>\u2019\u201d\xbb!?' + unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' + end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' + % (re.escape(unicode_delimiters), + re.escape(closers))) + + tokens = { + 'root': [ + # Heading with overline + (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' + r'(.+)(\n)(\1)(\n)', + bygroups(Generic.Heading, Text, Generic.Heading, + Text, Generic.Heading, Text)), + # Plain heading + (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' + r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', + bygroups(Generic.Heading, Text, Generic.Heading, Text)), + # Bulleted lists + (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + # Numbered lists + (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + # Numbered, but keep words at BOL from becoming lists + (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', + bygroups(Text, Number, using(this, state='inline'))), + (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', + bygroups(Text, Number, using(this, state='inline'))), + # Line blocks + (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', + bygroups(Text, Operator, using(this, state='inline'))), + # Sourcecode directives + (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' + r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', + _handle_sourcecode), + # A directive + (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, + using(this, state='inline'))), + # A reference target + (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', + bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A footnote/citation target + (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', + bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A substitution def + (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, + Punctuation, Text, using(this, state='inline'))), + # Comments + (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), + # Field list marker + (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', + bygroups(Text, Name.Class, Text)), + # Definition list + (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', + bygroups(using(this, state='inline'), using(this, state='inline'))), + # Code blocks + (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', + bygroups(String.Escape, Text, String, String, Text, String)), + include('inline'), + ], + 'inline': [ + (r'\\.', Text), # escape + (r'``', String, 'literal'), # code + (r'(`.+?)(<.+?>)(`__?)', # reference with inline target + bygroups(String, String.Interpol, String)), + (r'`.+?`__?', String), # reference + (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', + bygroups(Name.Variable, Name.Attribute)), # role + (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', + bygroups(Name.Attribute, Name.Variable)), # role (content first) + (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis + (r'\*.+?\*', Generic.Emph), # Emphasis + (r'\[.*?\]_', String), # Footnote or citation + (r'<.+?>', Name.Tag), # Hyperlink + (r'[^\\\n\[*`:]+', Text), + (r'.', Text), + ], + 'literal': [ + (r'[^`]+', String), + (r'``' + end_string_suffix, String, '#pop'), + (r'`', String), + ] + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options) + + def analyse_text(text): + if text[:2] == '..' and text[2:3] != '.': + return 0.3 + p1 = text.find("\n") + p2 = text.find("\n", p1 + 1) + if (p2 > -1 and # has two lines + p1 * 2 + 1 == p2 and # they are the same length + text[p1+1] in '-=' and # the next line both starts and ends with + text[p1+1] == text[p2-1]): # ...a sufficiently high header + return 0.5 + + +class TexLexer(RegexLexer): + """ + Lexer for the TeX and LaTeX typesetting languages. + """ + + name = 'TeX' + aliases = ['tex', 'latex'] + filenames = ['*.tex', '*.aux', '*.toc'] + mimetypes = ['text/x-tex', 'text/x-latex'] + + tokens = { + 'general': [ + (r'%.*?\n', Comment), + (r'[{}]', Name.Builtin), + (r'[&_^]', Name.Builtin), + ], + 'root': [ + (r'\\\[', String.Backtick, 'displaymath'), + (r'\\\(', String, 'inlinemath'), + (r'\$\$', String.Backtick, 'displaymath'), + (r'\$', String, 'inlinemath'), + (r'\\([a-zA-Z]+|.)', Keyword, 'command'), + (r'\\$', Keyword), + include('general'), + (r'[^\\$%&_^{}]+', Text), + ], + 'math': [ + (r'\\([a-zA-Z]+|.)', Name.Variable), + include('general'), + (r'[0-9]+', Number), + (r'[-=!+*/()\[\]]', Operator), + (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), + ], + 'inlinemath': [ + (r'\\\)', String, '#pop'), + (r'\$', String, '#pop'), + include('math'), + ], + 'displaymath': [ + (r'\\\]', String, '#pop'), + (r'\$\$', String, '#pop'), + (r'\$', Name.Builtin), + include('math'), + ], + 'command': [ + (r'\[.*?\]', Name.Attribute), + (r'\*', Keyword), + default('#pop'), + ], + } + + def analyse_text(text): + for start in ("\\documentclass", "\\input", "\\documentstyle", + "\\relax"): + if text[:len(start)] == start: + return True + + +class GroffLexer(RegexLexer): + """ + Lexer for the (g)roff typesetting language, supporting groff + extensions. Mainly useful for highlighting manpage sources. + + .. versionadded:: 0.6 + """ + + name = 'Groff' + aliases = ['groff', 'nroff', 'man'] + filenames = ['*.[1234567]', '*.man'] + mimetypes = ['application/x-troff', 'text/troff'] + + tokens = { + 'root': [ + (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), + (r'\.', Punctuation, 'request'), + # Regular characters, slurp till we find a backslash or newline + (r'[^\\\n]+', Text, 'textline'), + default('textline'), + ], + 'textline': [ + include('escapes'), + (r'[^\\\n]+', Text), + (r'\n', Text, '#pop'), + ], + 'escapes': [ + # groff has many ways to write escapes. + (r'\\"[^\n]*', Comment), + (r'\\[fn]\w', String.Escape), + (r'\\\(.{2}', String.Escape), + (r'\\.\[.*\]', String.Escape), + (r'\\.', String.Escape), + (r'\\\n', Text, 'request'), + ], + 'request': [ + (r'\n', Text, '#pop'), + include('escapes'), + (r'"[^\n"]+"', String.Double), + (r'\d+', Number), + (r'\S+', String), + (r'\s+', Text), + ], + } + + def analyse_text(text): + if text[:1] != '.': + return False + if text[:3] == '.\\"': + return True + if text[:4] == '.TH ': + return True + if text[1:3].isalnum() and text[3].isspace(): + return 0.9 + + +class MozPreprocHashLexer(RegexLexer): + """ + Lexer for Mozilla Preprocessor files (with '#' as the marker). + + Other data is left untouched. + + .. versionadded:: 2.0 + """ + name = 'mozhashpreproc' + aliases = [name] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + (r'^#', Comment.Preproc, ('expr', 'exprstart')), + (r'.+', Other), + ], + 'exprstart': [ + (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), + (words(( + 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', + 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', + 'include', 'includesubst', 'error')), + Comment.Preproc, '#pop'), + ], + 'expr': [ + (words(('!', '!=', '==', '&&', '||')), Operator), + (r'(defined)(\()', bygroups(Keyword, Punctuation)), + (r'\)', Punctuation), + (r'[0-9]+', Number.Decimal), + (r'__\w+?__', Name.Variable), + (r'@\w+?@', Name.Class), + (r'\w+', Name), + (r'\n', Text, '#pop'), + (r'\s+', Text), + (r'\S', Punctuation), + ], + } + + +class MozPreprocPercentLexer(MozPreprocHashLexer): + """ + Lexer for Mozilla Preprocessor files (with '%' as the marker). + + Other data is left untouched. + + .. versionadded:: 2.0 + """ + name = 'mozpercentpreproc' + aliases = [name] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + (r'^%', Comment.Preproc, ('expr', 'exprstart')), + (r'.+', Other), + ], + } + + +class MozPreprocXulLexer(DelegatingLexer): + """ + Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the + `XmlLexer`. + + .. versionadded:: 2.0 + """ + name = "XUL+mozpreproc" + aliases = ['xul+mozpreproc'] + filenames = ['*.xul.in'] + mimetypes = [] + + def __init__(self, **options): + super().__init__(XmlLexer, MozPreprocHashLexer, **options) + + +class MozPreprocJavascriptLexer(DelegatingLexer): + """ + Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the + `JavascriptLexer`. + + .. versionadded:: 2.0 + """ + name = "Javascript+mozpreproc" + aliases = ['javascript+mozpreproc'] + filenames = ['*.js.in'] + mimetypes = [] + + def __init__(self, **options): + super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) + + +class MozPreprocCssLexer(DelegatingLexer): + """ + Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the + `CssLexer`. + + .. versionadded:: 2.0 + """ + name = "CSS+mozpreproc" + aliases = ['css+mozpreproc'] + filenames = ['*.css.in'] + mimetypes = [] + + def __init__(self, **options): + super().__init__(CssLexer, MozPreprocPercentLexer, **options) + + +class MarkdownLexer(RegexLexer): + """ + For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup. + + .. versionadded:: 2.2 + """ + name = 'markdown' + aliases = ['md'] + filenames = ['*.md', '*.markdown'] + mimetypes = ["text/x-markdown"] + flags = re.MULTILINE + + def _handle_codeblock(self, match): + """ + match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks + """ + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), String.Backtick, match.group(1) + yield match.start(2), String.Backtick, match.group(2) + yield match.start(3), Text , match.group(3) + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name( match.group(2).strip() ) + except ClassNotFound: + pass + code = match.group(4) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(4), String, code + else: + yield from do_insertions([], lexer.get_tokens_unprocessed(code)) + + yield match.start(5), String.Backtick, match.group(5) + + tokens = { + 'root': [ + # heading with '#' prefix (atx-style) + (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), + # subheading with '#' prefix (atx-style) + (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), + # heading with '=' underlines (Setext-style) + (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), + # subheading with '-' underlines (Setext-style) + (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), + # task list + (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', + bygroups(Text, Keyword, Keyword, using(this, state='inline'))), + # bulleted list + (r'^(\s*)([*-])(\s)(.+\n)', + bygroups(Text, Keyword, Text, using(this, state='inline'))), + # numbered list + (r'^(\s*)([0-9]+\.)( .+\n)', + bygroups(Text, Keyword, using(this, state='inline'))), + # quote + (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), + # code block fenced by 3 backticks + (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), + # code block with language + (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$\n)', _handle_codeblock), + + include('inline'), + ], + 'inline': [ + # escape + (r'\\.', Text), + # inline code + (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), + # warning: the following rules eat outer tags. + # eg. **foo _bar_ baz** => foo and baz are not recognized as bold + # bold fenced by '**' + (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), + # bold fenced by '__' + (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), + # italics fenced by '*' + (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), + # italics fenced by '_' + (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), + # strikethrough + (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), + # mentions and topics (twitter and github stuff) + (r'[@#][\w/:]+', Name.Entity), + # (image?) links eg:  + (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', + bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), + # reference-style links, e.g.: + # [an example][id] + # [id]: http://example.com/ + (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', + bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), + (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', + bygroups(Text, Name.Label, Text, Name.Attribute)), + + # general text, must come last! + (r'[^\\\s]+', Text), + (r'.', Text), + ], + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options) + + +class TiddlyWiki5Lexer(RegexLexer): + """ + For `TiddlyWiki5 <https://tiddlywiki.com/#TiddlerFiles>`_ markup. + + .. versionadded:: 2.7 + """ + name = 'tiddler' + aliases = ['tid'] + filenames = ['*.tid'] + mimetypes = ["text/vnd.tiddlywiki"] + flags = re.MULTILINE + + def _handle_codeblock(self, match): + """ + match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks + """ + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), String, match.group(1) + yield match.start(2), String, match.group(2) + yield match.start(3), Text, match.group(3) + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name(match.group(2).strip()) + except ClassNotFound: + pass + code = match.group(4) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(4), String, code + return + + yield from do_insertions([], lexer.get_tokens_unprocessed(code)) + + yield match.start(5), String, match.group(5) + + def _handle_cssblock(self, match): + """ + match args: 1:style tag 2:newline, 3:code, 4:closing style tag + """ + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), String, match.group(1) + yield match.start(2), String, match.group(2) + + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name('css') + except ClassNotFound: + pass + code = match.group(3) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(3), String, code + return + + yield from do_insertions([], lexer.get_tokens_unprocessed(code)) + + yield match.start(4), String, match.group(4) + + tokens = { + 'root': [ + # title in metadata section + (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), + # headings + (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), + (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), + # bulleted or numbered lists or single-line block quotes + # (can be mixed) + (r'^(\s*)([*#>]+)(\s*)(.+\n)', + bygroups(Text, Keyword, Text, using(this, state='inline'))), + # multi-line block quotes + (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), + # table header + (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), + # table footer or caption + (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), + # table class + (r'^(\|.*?\|k)$', bygroups(Name.Tag)), + # definitions + (r'^(;.*)$', bygroups(Generic.Strong)), + # text block + (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), + # code block with language + (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), + # CSS style block + (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), + + include('keywords'), + include('inline'), + ], + 'keywords': [ + (words(( + '\\define', '\\end', 'caption', 'created', 'modified', 'tags', + 'title', 'type'), prefix=r'^', suffix=r'\b'), + Keyword), + ], + 'inline': [ + # escape + (r'\\.', Text), + # created or modified date + (r'\d{17}', Number.Integer), + # italics + (r'(\s)(//[^/]+//)((?=\W|\n))', + bygroups(Text, Generic.Emph, Text)), + # superscript + (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), + # subscript + (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), + # underscore + (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), + # bold + (r"(\s)(''[^']+'')((?=\W|\n))", + bygroups(Text, Generic.Strong, Text)), + # strikethrough + (r'(\s)(~~[^~]+~~)((?=\W|\n))', + bygroups(Text, Generic.Deleted, Text)), + # TiddlyWiki variables + (r'<<[^>]+>>', Name.Tag), + (r'\$\$[^$]+\$\$', Name.Tag), + (r'\$\([^)]+\)\$', Name.Tag), + # TiddlyWiki style or class + (r'^@@.*$', Name.Tag), + # HTML tags + (r'</?[^>]+>', Name.Tag), + # inline code + (r'`[^`]+`', String.Backtick), + # HTML escaped symbols + (r'&\S*?;', String.Regex), + # Wiki links + (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), + # External links + (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', + bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), + # Transclusion + (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), + # URLs + (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), + + # general text, must come last! + (r'[\w]+', Text), + (r'.', Text) + ], + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options)