eric6/ThirdParty/Pygments/pygments/lexers/markup.py

changeset 7701
25f42e208e08
parent 7547
21b0534faebc
child 7983
54c5cfbb1e29
diff -r a3cf077a8db3 -r 25f42e208e08 eric6/ThirdParty/Pygments/pygments/lexers/markup.py
--- a/eric6/ThirdParty/Pygments/pygments/lexers/markup.py	Tue Sep 15 18:46:58 2020 +0200
+++ b/eric6/ThirdParty/Pygments/pygments/lexers/markup.py	Tue Sep 15 19:09:05 2020 +0200
@@ -1,598 +1,765 @@
-# -*- coding: utf-8 -*-
-"""
-    pygments.lexers.markup
-    ~~~~~~~~~~~~~~~~~~~~~~
-
-    Lexers for non-HTML markup languages.
-
-    :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-import re
-
-from pygments.lexers.html import HtmlLexer, XmlLexer
-from pygments.lexers.javascript import JavascriptLexer
-from pygments.lexers.css import CssLexer
-
-from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
-    using, this, do_insertions, default, words
-from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Punctuation, Generic, Other
-from pygments.util import get_bool_opt, ClassNotFound
-
-__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
-           'MozPreprocHashLexer', 'MozPreprocPercentLexer',
-           'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
-           'MozPreprocCssLexer', 'MarkdownLexer']
-
-
-class BBCodeLexer(RegexLexer):
-    """
-    A lexer that highlights BBCode(-like) syntax.
-
-    .. versionadded:: 0.6
-    """
-
-    name = 'BBCode'
-    aliases = ['bbcode']
-    mimetypes = ['text/x-bbcode']
-
-    tokens = {
-        'root': [
-            (r'[^[]+', Text),
-            # tag/end tag begin
-            (r'\[/?\w+', Keyword, 'tag'),
-            # stray bracket
-            (r'\[', Text),
-        ],
-        'tag': [
-            (r'\s+', Text),
-            # attribute with value
-            (r'(\w+)(=)("?[^\s"\]]+"?)',
-             bygroups(Name.Attribute, Operator, String)),
-            # tag argument (a la [color=green])
-            (r'(=)("?[^\s"\]]+"?)',
-             bygroups(Operator, String)),
-            # tag end
-            (r'\]', Keyword, '#pop'),
-        ],
-    }
-
-
-class MoinWikiLexer(RegexLexer):
-    """
-    For MoinMoin (and Trac) Wiki markup.
-
-    .. versionadded:: 0.7
-    """
-
-    name = 'MoinMoin/Trac Wiki markup'
-    aliases = ['trac-wiki', 'moin']
-    filenames = []
-    mimetypes = ['text/x-trac-wiki']
-    flags = re.MULTILINE | re.IGNORECASE
-
-    tokens = {
-        'root': [
-            (r'^#.*$', Comment),
-            (r'(!)(\S+)', bygroups(Keyword, Text)),  # Ignore-next
-            # Titles
-            (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
-             bygroups(Generic.Heading, using(this), Generic.Heading, String)),
-            # Literal code blocks, with optional shebang
-            (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
-            (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment),  # Formatting
-            # Lists
-            (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
-            (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
-            # Other Formatting
-            (r'\[\[\w+.*?\]\]', Keyword),  # Macro
-            (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
-             bygroups(Keyword, String, Keyword)),  # Link
-            (r'^----+$', Keyword),  # Horizontal rules
-            (r'[^\n\'\[{!_~^,|]+', Text),
-            (r'\n', Text),
-            (r'.', Text),
-        ],
-        'codeblock': [
-            (r'\}\}\}', Name.Builtin, '#pop'),
-            # these blocks are allowed to be nested in Trac, but not MoinMoin
-            (r'\{\{\{', Text, '#push'),
-            (r'[^{}]+', Comment.Preproc),  # slurp boring text
-            (r'.', Comment.Preproc),  # allow loose { or }
-        ],
-    }
-
-
-class RstLexer(RegexLexer):
-    """
-    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
-
-    .. versionadded:: 0.7
-
-    Additional options accepted:
-
-    `handlecodeblocks`
-        Highlight the contents of ``.. sourcecode:: language``,
-        ``.. code:: language`` and ``.. code-block:: language``
-        directives with a lexer for the given language (default:
-        ``True``).
-
-        .. versionadded:: 0.8
-    """
-    name = 'reStructuredText'
-    aliases = ['rst', 'rest', 'restructuredtext']
-    filenames = ['*.rst', '*.rest']
-    mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
-    flags = re.MULTILINE
-
-    def _handle_sourcecode(self, match):
-        from pygments.lexers import get_lexer_by_name
-
-        # section header
-        yield match.start(1), Punctuation, match.group(1)
-        yield match.start(2), Text, match.group(2)
-        yield match.start(3), Operator.Word, match.group(3)
-        yield match.start(4), Punctuation, match.group(4)
-        yield match.start(5), Text, match.group(5)
-        yield match.start(6), Keyword, match.group(6)
-        yield match.start(7), Text, match.group(7)
-
-        # lookup lexer if wanted and existing
-        lexer = None
-        if self.handlecodeblocks:
-            try:
-                lexer = get_lexer_by_name(match.group(6).strip())
-            except ClassNotFound:
-                pass
-        indention = match.group(8)
-        indention_size = len(indention)
-        code = (indention + match.group(9) + match.group(10) + match.group(11))
-
-        # no lexer for this language. handle it like it was a code block
-        if lexer is None:
-            yield match.start(8), String, code
-            return
-
-        # highlight the lines with the lexer.
-        ins = []
-        codelines = code.splitlines(True)
-        code = ''
-        for line in codelines:
-            if len(line) > indention_size:
-                ins.append((len(code), [(0, Text, line[:indention_size])]))
-                code += line[indention_size:]
-            else:
-                code += line
-        for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)):
-            yield item
-
-    # from docutils.parsers.rst.states
-    closers = u'\'")]}>\u2019\u201d\xbb!?'
-    unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
-    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
-                         % (re.escape(unicode_delimiters),
-                            re.escape(closers)))
-
-    tokens = {
-        'root': [
-            # Heading with overline
-            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
-             r'(.+)(\n)(\1)(\n)',
-             bygroups(Generic.Heading, Text, Generic.Heading,
-                      Text, Generic.Heading, Text)),
-            # Plain heading
-            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
-             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
-             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
-            # Bulleted lists
-            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
-             bygroups(Text, Number, using(this, state='inline'))),
-            # Numbered lists
-            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
-             bygroups(Text, Number, using(this, state='inline'))),
-            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
-             bygroups(Text, Number, using(this, state='inline'))),
-            # Numbered, but keep words at BOL from becoming lists
-            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
-             bygroups(Text, Number, using(this, state='inline'))),
-            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
-             bygroups(Text, Number, using(this, state='inline'))),
-            # Line blocks
-            (r'^(\s*)(\|)( .+\n(?:\|  .+\n)*)',
-             bygroups(Text, Operator, using(this, state='inline'))),
-            # Sourcecode directives
-            (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
-             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)',
-             _handle_sourcecode),
-            # A directive
-            (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
-             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
-                      using(this, state='inline'))),
-            # A reference target
-            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
-             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
-            # A footnote/citation target
-            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
-             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
-            # A substitution def
-            (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
-             bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
-                      Punctuation, Text, using(this, state='inline'))),
-            # Comments
-            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
-            # Field list marker
-            (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
-             bygroups(Text, Name.Class, Text)),
-            # Definition list
-            (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
-             bygroups(using(this, state='inline'), using(this, state='inline'))),
-            # Code blocks
-            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*|)\n)+)',
-             bygroups(String.Escape, Text, String, String, Text, String)),
-            include('inline'),
-        ],
-        'inline': [
-            (r'\\.', Text),  # escape
-            (r'``', String, 'literal'),  # code
-            (r'(`.+?)(<.+?>)(`__?)',  # reference with inline target
-             bygroups(String, String.Interpol, String)),
-            (r'`.+?`__?', String),  # reference
-            (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
-             bygroups(Name.Variable, Name.Attribute)),  # role
-            (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
-             bygroups(Name.Attribute, Name.Variable)),  # role (content first)
-            (r'\*\*.+?\*\*', Generic.Strong),  # Strong emphasis
-            (r'\*.+?\*', Generic.Emph),  # Emphasis
-            (r'\[.*?\]_', String),  # Footnote or citation
-            (r'<.+?>', Name.Tag),   # Hyperlink
-            (r'[^\\\n\[*`:]+', Text),
-            (r'.', Text),
-        ],
-        'literal': [
-            (r'[^`]+', String),
-            (r'``' + end_string_suffix, String, '#pop'),
-            (r'`', String),
-        ]
-    }
-
-    def __init__(self, **options):
-        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
-        RegexLexer.__init__(self, **options)
-
-    def analyse_text(text):
-        if text[:2] == '..' and text[2:3] != '.':
-            return 0.3
-        p1 = text.find("\n")
-        p2 = text.find("\n", p1 + 1)
-        if (p2 > -1 and              # has two lines
-                p1 * 2 + 1 == p2 and     # they are the same length
-                text[p1+1] in '-=' and   # the next line both starts and ends with
-                text[p1+1] == text[p2-1]):  # ...a sufficiently high header
-            return 0.5
-
-
-class TexLexer(RegexLexer):
-    """
-    Lexer for the TeX and LaTeX typesetting languages.
-    """
-
-    name = 'TeX'
-    aliases = ['tex', 'latex']
-    filenames = ['*.tex', '*.aux', '*.toc']
-    mimetypes = ['text/x-tex', 'text/x-latex']
-
-    tokens = {
-        'general': [
-            (r'%.*?\n', Comment),
-            (r'[{}]', Name.Builtin),
-            (r'[&_^]', Name.Builtin),
-        ],
-        'root': [
-            (r'\\\[', String.Backtick, 'displaymath'),
-            (r'\\\(', String, 'inlinemath'),
-            (r'\$\$', String.Backtick, 'displaymath'),
-            (r'\$', String, 'inlinemath'),
-            (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
-            (r'\\$', Keyword),
-            include('general'),
-            (r'[^\\$%&_^{}]+', Text),
-        ],
-        'math': [
-            (r'\\([a-zA-Z]+|.)', Name.Variable),
-            include('general'),
-            (r'[0-9]+', Number),
-            (r'[-=!+*/()\[\]]', Operator),
-            (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
-        ],
-        'inlinemath': [
-            (r'\\\)', String, '#pop'),
-            (r'\$', String, '#pop'),
-            include('math'),
-        ],
-        'displaymath': [
-            (r'\\\]', String, '#pop'),
-            (r'\$\$', String, '#pop'),
-            (r'\$', Name.Builtin),
-            include('math'),
-        ],
-        'command': [
-            (r'\[.*?\]', Name.Attribute),
-            (r'\*', Keyword),
-            default('#pop'),
-        ],
-    }
-
-    def analyse_text(text):
-        for start in ("\\documentclass", "\\input", "\\documentstyle",
-                      "\\relax"):
-            if text[:len(start)] == start:
-                return True
-
-
-class GroffLexer(RegexLexer):
-    """
-    Lexer for the (g)roff typesetting language, supporting groff
-    extensions. Mainly useful for highlighting manpage sources.
-
-    .. versionadded:: 0.6
-    """
-
-    name = 'Groff'
-    aliases = ['groff', 'nroff', 'man']
-    filenames = ['*.[1234567]', '*.man']
-    mimetypes = ['application/x-troff', 'text/troff']
-
-    tokens = {
-        'root': [
-            (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
-            (r'\.', Punctuation, 'request'),
-            # Regular characters, slurp till we find a backslash or newline
-            (r'[^\\\n]+', Text, 'textline'),
-            default('textline'),
-        ],
-        'textline': [
-            include('escapes'),
-            (r'[^\\\n]+', Text),
-            (r'\n', Text, '#pop'),
-        ],
-        'escapes': [
-            # groff has many ways to write escapes.
-            (r'\\"[^\n]*', Comment),
-            (r'\\[fn]\w', String.Escape),
-            (r'\\\(.{2}', String.Escape),
-            (r'\\.\[.*\]', String.Escape),
-            (r'\\.', String.Escape),
-            (r'\\\n', Text, 'request'),
-        ],
-        'request': [
-            (r'\n', Text, '#pop'),
-            include('escapes'),
-            (r'"[^\n"]+"', String.Double),
-            (r'\d+', Number),
-            (r'\S+', String),
-            (r'\s+', Text),
-        ],
-    }
-
-    def analyse_text(text):
-        if text[:1] != '.':
-            return False
-        if text[:3] == '.\\"':
-            return True
-        if text[:4] == '.TH ':
-            return True
-        if text[1:3].isalnum() and text[3].isspace():
-            return 0.9
-
-
-class MozPreprocHashLexer(RegexLexer):
-    """
-    Lexer for Mozilla Preprocessor files (with '#' as the marker).
-
-    Other data is left untouched.
-
-    .. versionadded:: 2.0
-    """
-    name = 'mozhashpreproc'
-    aliases = [name]
-    filenames = []
-    mimetypes = []
-
-    tokens = {
-        'root': [
-            (r'^#', Comment.Preproc, ('expr', 'exprstart')),
-            (r'.+', Other),
-        ],
-        'exprstart': [
-            (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
-            (words((
-                'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
-                'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
-                'include', 'includesubst', 'error')),
-             Comment.Preproc, '#pop'),
-        ],
-        'expr': [
-            (words(('!', '!=', '==', '&&', '||')), Operator),
-            (r'(defined)(\()', bygroups(Keyword, Punctuation)),
-            (r'\)', Punctuation),
-            (r'[0-9]+', Number.Decimal),
-            (r'__\w+?__', Name.Variable),
-            (r'@\w+?@', Name.Class),
-            (r'\w+', Name),
-            (r'\n', Text, '#pop'),
-            (r'\s+', Text),
-            (r'\S', Punctuation),
-        ],
-    }
-
-
-class MozPreprocPercentLexer(MozPreprocHashLexer):
-    """
-    Lexer for Mozilla Preprocessor files (with '%' as the marker).
-
-    Other data is left untouched.
-
-    .. versionadded:: 2.0
-    """
-    name = 'mozpercentpreproc'
-    aliases = [name]
-    filenames = []
-    mimetypes = []
-
-    tokens = {
-        'root': [
-            (r'^%', Comment.Preproc, ('expr', 'exprstart')),
-            (r'.+', Other),
-        ],
-    }
-
-
-class MozPreprocXulLexer(DelegatingLexer):
-    """
-    Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
-    `XmlLexer`.
-
-    .. versionadded:: 2.0
-    """
-    name = "XUL+mozpreproc"
-    aliases = ['xul+mozpreproc']
-    filenames = ['*.xul.in']
-    mimetypes = []
-
-    def __init__(self, **options):
-        super(MozPreprocXulLexer, self).__init__(
-            XmlLexer, MozPreprocHashLexer, **options)
-
-
-class MozPreprocJavascriptLexer(DelegatingLexer):
-    """
-    Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
-    `JavascriptLexer`.
-
-    .. versionadded:: 2.0
-    """
-    name = "Javascript+mozpreproc"
-    aliases = ['javascript+mozpreproc']
-    filenames = ['*.js.in']
-    mimetypes = []
-
-    def __init__(self, **options):
-        super(MozPreprocJavascriptLexer, self).__init__(
-            JavascriptLexer, MozPreprocHashLexer, **options)
-
-
-class MozPreprocCssLexer(DelegatingLexer):
-    """
-    Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
-    `CssLexer`.
-
-    .. versionadded:: 2.0
-    """
-    name = "CSS+mozpreproc"
-    aliases = ['css+mozpreproc']
-    filenames = ['*.css.in']
-    mimetypes = []
-
-    def __init__(self, **options):
-        super(MozPreprocCssLexer, self).__init__(
-            CssLexer, MozPreprocPercentLexer, **options)
-
-
-class MarkdownLexer(RegexLexer):
-    """
-    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
-
-    .. versionadded:: 2.2
-    """
-    name = 'markdown'
-    aliases = ['md']
-    filenames = ['*.md']
-    mimetypes = ["text/x-markdown"]
-    flags = re.MULTILINE
-
-    def _handle_codeblock(self, match):
-        """
-        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
-        """
-        from pygments.lexers import get_lexer_by_name
-
-        # section header
-        yield match.start(1), String        , match.group(1)
-        yield match.start(2), String        , match.group(2)
-        yield match.start(3), Text          , match.group(3)
-
-        # lookup lexer if wanted and existing
-        lexer = None
-        if self.handlecodeblocks:
-            try:
-                lexer = get_lexer_by_name( match.group(2).strip() )
-            except ClassNotFound:
-                pass
-        code = match.group(4)
-
-        # no lexer for this language. handle it like it was a code block
-        if lexer is None:
-            yield match.start(4), String, code
-        else:
-            for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
-                yield item
-
-        yield match.start(5), String        , match.group(5)
-
-    tokens = {
-        'root': [
-            # heading with pound prefix
-            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
-            (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
-            # task list
-            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
-            bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
-            # bulleted lists
-            (r'^(\s*)([*-])(\s)(.+\n)',
-            bygroups(Text, Keyword, Text, using(this, state='inline'))),
-            # numbered lists
-            (r'^(\s*)([0-9]+\.)( .+\n)',
-            bygroups(Text, Keyword, using(this, state='inline'))),
-            # quote
-            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
-            # text block
-            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
-            # code block with language
-            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
-
-            include('inline'),
-        ],
-        'inline': [
-            # escape
-            (r'\\.', Text),
-            # italics
-            (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)),
-            # bold
-            # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
-            (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)),
-            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
-            # strikethrough
-            (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)),
-            # inline code
-            (r'`[^`]+`', String.Backtick),
-            # mentions and topics (twitter and github stuff)
-            (r'[@#][\w/:]+', Name.Entity),
-            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
-            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
-            # reference-style links, e.g.:
-            #   [an example][id]
-            #   [id]: http://example.com/
-            (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
-            (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', bygroups(Text, Name.Label, Text, Name.Attribute)),
-
-            # general text, must come last!
-            (r'[^\\\s]+', Text),
-            (r'.', Text),
-        ],
-    }
-
-    def __init__(self, **options):
-        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
-        RegexLexer.__init__(self, **options)
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.markup
+    ~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for non-HTML markup languages.
+
+    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexers.html import HtmlLexer, XmlLexer
+from pygments.lexers.javascript import JavascriptLexer
+from pygments.lexers.css import CssLexer
+
+from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \
+    using, this, do_insertions, default, words
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+    Number, Punctuation, Generic, Other
+from pygments.util import get_bool_opt, ClassNotFound
+
+__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
+           'MozPreprocHashLexer', 'MozPreprocPercentLexer',
+           'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
+           'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer']
+
+
+class BBCodeLexer(RegexLexer):
+    """
+    A lexer that highlights BBCode(-like) syntax.
+
+    .. versionadded:: 0.6
+    """
+
+    name = 'BBCode'
+    aliases = ['bbcode']
+    mimetypes = ['text/x-bbcode']
+
+    tokens = {
+        'root': [
+            (r'[^[]+', Text),
+            # tag/end tag begin
+            (r'\[/?\w+', Keyword, 'tag'),
+            # stray bracket
+            (r'\[', Text),
+        ],
+        'tag': [
+            (r'\s+', Text),
+            # attribute with value
+            (r'(\w+)(=)("?[^\s"\]]+"?)',
+             bygroups(Name.Attribute, Operator, String)),
+            # tag argument (a la [color=green])
+            (r'(=)("?[^\s"\]]+"?)',
+             bygroups(Operator, String)),
+            # tag end
+            (r'\]', Keyword, '#pop'),
+        ],
+    }
+
+
+class MoinWikiLexer(RegexLexer):
+    """
+    For MoinMoin (and Trac) Wiki markup.
+
+    .. versionadded:: 0.7
+    """
+
+    name = 'MoinMoin/Trac Wiki markup'
+    aliases = ['trac-wiki', 'moin']
+    filenames = []
+    mimetypes = ['text/x-trac-wiki']
+    flags = re.MULTILINE | re.IGNORECASE
+
+    tokens = {
+        'root': [
+            (r'^#.*$', Comment),
+            (r'(!)(\S+)', bygroups(Keyword, Text)),  # Ignore-next
+            # Titles
+            (r'^(=+)([^=]+)(=+)(\s*#.+)?$',
+             bygroups(Generic.Heading, using(this), Generic.Heading, String)),
+            # Literal code blocks, with optional shebang
+            (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'),
+            (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment),  # Formatting
+            # Lists
+            (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)),
+            (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)),
+            # Other Formatting
+            (r'\[\[\w+.*?\]\]', Keyword),  # Macro
+            (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])',
+             bygroups(Keyword, String, Keyword)),  # Link
+            (r'^----+$', Keyword),  # Horizontal rules
+            (r'[^\n\'\[{!_~^,|]+', Text),
+            (r'\n', Text),
+            (r'.', Text),
+        ],
+        'codeblock': [
+            (r'\}\}\}', Name.Builtin, '#pop'),
+            # these blocks are allowed to be nested in Trac, but not MoinMoin
+            (r'\{\{\{', Text, '#push'),
+            (r'[^{}]+', Comment.Preproc),  # slurp boring text
+            (r'.', Comment.Preproc),  # allow loose { or }
+        ],
+    }
+
+
+class RstLexer(RegexLexer):
+    """
+    For `reStructuredText <http://docutils.sf.net/rst.html>`_ markup.
+
+    .. versionadded:: 0.7
+
+    Additional options accepted:
+
+    `handlecodeblocks`
+        Highlight the contents of ``.. sourcecode:: language``,
+        ``.. code:: language`` and ``.. code-block:: language``
+        directives with a lexer for the given language (default:
+        ``True``).
+
+        .. versionadded:: 0.8
+    """
+    name = 'reStructuredText'
+    aliases = ['rst', 'rest', 'restructuredtext']
+    filenames = ['*.rst', '*.rest']
+    mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"]
+    flags = re.MULTILINE
+
+    def _handle_sourcecode(self, match):
+        from pygments.lexers import get_lexer_by_name
+
+        # section header
+        yield match.start(1), Punctuation, match.group(1)
+        yield match.start(2), Text, match.group(2)
+        yield match.start(3), Operator.Word, match.group(3)
+        yield match.start(4), Punctuation, match.group(4)
+        yield match.start(5), Text, match.group(5)
+        yield match.start(6), Keyword, match.group(6)
+        yield match.start(7), Text, match.group(7)
+
+        # lookup lexer if wanted and existing
+        lexer = None
+        if self.handlecodeblocks:
+            try:
+                lexer = get_lexer_by_name(match.group(6).strip())
+            except ClassNotFound:
+                pass
+        indention = match.group(8)
+        indention_size = len(indention)
+        code = (indention + match.group(9) + match.group(10) + match.group(11))
+
+        # no lexer for this language. handle it like it was a code block
+        if lexer is None:
+            yield match.start(8), String, code
+            return
+
+        # highlight the lines with the lexer.
+        ins = []
+        codelines = code.splitlines(True)
+        code = ''
+        for line in codelines:
+            if len(line) > indention_size:
+                ins.append((len(code), [(0, Text, line[:indention_size])]))
+                code += line[indention_size:]
+            else:
+                code += line
+        yield from do_insertions(ins, lexer.get_tokens_unprocessed(code))
+
+    # from docutils.parsers.rst.states
+    closers = '\'")]}>\u2019\u201d\xbb!?'
+    unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
+    end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
+                         % (re.escape(unicode_delimiters),
+                            re.escape(closers)))
+
+    tokens = {
+        'root': [
+            # Heading with overline
+            (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)'
+             r'(.+)(\n)(\1)(\n)',
+             bygroups(Generic.Heading, Text, Generic.Heading,
+                      Text, Generic.Heading, Text)),
+            # Plain heading
+            (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|'
+             r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)',
+             bygroups(Generic.Heading, Text, Generic.Heading, Text)),
+            # Bulleted lists
+            (r'^(\s*)([-*+])( .+\n(?:\1  .+\n)*)',
+             bygroups(Text, Number, using(this, state='inline'))),
+            # Numbered lists
+            (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1  .+\n)*)',
+             bygroups(Text, Number, using(this, state='inline'))),
+            (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1  .+\n)*)',
+             bygroups(Text, Number, using(this, state='inline'))),
+            # Numbered, but keep words at BOL from becoming lists
+            (r'^(\s*)([A-Z]+\.)( .+\n(?:\1  .+\n)+)',
+             bygroups(Text, Number, using(this, state='inline'))),
+            (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1  .+\n)+)',
+             bygroups(Text, Number, using(this, state='inline'))),
+            # Line blocks
+            (r'^(\s*)(\|)( .+\n(?:\|  .+\n)*)',
+             bygroups(Text, Operator, using(this, state='inline'))),
+            # Sourcecode directives
+            (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)'
+             r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)',
+             _handle_sourcecode),
+            # A directive
+            (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
+             bygroups(Punctuation, Text, Operator.Word, Punctuation, Text,
+                      using(this, state='inline'))),
+            # A reference target
+            (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$',
+             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
+            # A footnote/citation target
+            (r'^( *\.\.)(\s*)(\[.+\])(.*?)$',
+             bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))),
+            # A substitution def
+            (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))',
+             bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word,
+                      Punctuation, Text, using(this, state='inline'))),
+            # Comments
+            (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc),
+            # Field list marker
+            (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)',
+             bygroups(Text, Name.Class, Text)),
+            # Definition list
+            (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)',
+             bygroups(using(this, state='inline'), using(this, state='inline'))),
+            # Code blocks
+            (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)',
+             bygroups(String.Escape, Text, String, String, Text, String)),
+            include('inline'),
+        ],
+        'inline': [
+            (r'\\.', Text),  # escape
+            (r'``', String, 'literal'),  # code
+            (r'(`.+?)(<.+?>)(`__?)',  # reference with inline target
+             bygroups(String, String.Interpol, String)),
+            (r'`.+?`__?', String),  # reference
+            (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?',
+             bygroups(Name.Variable, Name.Attribute)),  # role
+            (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)',
+             bygroups(Name.Attribute, Name.Variable)),  # role (content first)
+            (r'\*\*.+?\*\*', Generic.Strong),  # Strong emphasis
+            (r'\*.+?\*', Generic.Emph),  # Emphasis
+            (r'\[.*?\]_', String),  # Footnote or citation
+            (r'<.+?>', Name.Tag),   # Hyperlink
+            (r'[^\\\n\[*`:]+', Text),
+            (r'.', Text),
+        ],
+        'literal': [
+            (r'[^`]+', String),
+            (r'``' + end_string_suffix, String, '#pop'),
+            (r'`', String),
+        ]
+    }
+
+    def __init__(self, **options):
+        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
+        RegexLexer.__init__(self, **options)
+
+    def analyse_text(text):
+        if text[:2] == '..' and text[2:3] != '.':
+            return 0.3
+        p1 = text.find("\n")
+        p2 = text.find("\n", p1 + 1)
+        if (p2 > -1 and              # has two lines
+                p1 * 2 + 1 == p2 and     # they are the same length
+                text[p1+1] in '-=' and   # the next line both starts and ends with
+                text[p1+1] == text[p2-1]):  # ...a sufficiently high header
+            return 0.5
+
+
+class TexLexer(RegexLexer):
+    """
+    Lexer for the TeX and LaTeX typesetting languages.
+    """
+
+    name = 'TeX'
+    aliases = ['tex', 'latex']
+    filenames = ['*.tex', '*.aux', '*.toc']
+    mimetypes = ['text/x-tex', 'text/x-latex']
+
+    tokens = {
+        'general': [
+            (r'%.*?\n', Comment),
+            (r'[{}]', Name.Builtin),
+            (r'[&_^]', Name.Builtin),
+        ],
+        'root': [
+            (r'\\\[', String.Backtick, 'displaymath'),
+            (r'\\\(', String, 'inlinemath'),
+            (r'\$\$', String.Backtick, 'displaymath'),
+            (r'\$', String, 'inlinemath'),
+            (r'\\([a-zA-Z]+|.)', Keyword, 'command'),
+            (r'\\$', Keyword),
+            include('general'),
+            (r'[^\\$%&_^{}]+', Text),
+        ],
+        'math': [
+            (r'\\([a-zA-Z]+|.)', Name.Variable),
+            include('general'),
+            (r'[0-9]+', Number),
+            (r'[-=!+*/()\[\]]', Operator),
+            (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin),
+        ],
+        'inlinemath': [
+            (r'\\\)', String, '#pop'),
+            (r'\$', String, '#pop'),
+            include('math'),
+        ],
+        'displaymath': [
+            (r'\\\]', String, '#pop'),
+            (r'\$\$', String, '#pop'),
+            (r'\$', Name.Builtin),
+            include('math'),
+        ],
+        'command': [
+            (r'\[.*?\]', Name.Attribute),
+            (r'\*', Keyword),
+            default('#pop'),
+        ],
+    }
+
+    def analyse_text(text):
+        for start in ("\\documentclass", "\\input", "\\documentstyle",
+                      "\\relax"):
+            if text[:len(start)] == start:
+                return True
+
+
+class GroffLexer(RegexLexer):
+    """
+    Lexer for the (g)roff typesetting language, supporting groff
+    extensions. Mainly useful for highlighting manpage sources.
+
+    .. versionadded:: 0.6
+    """
+
+    name = 'Groff'
+    aliases = ['groff', 'nroff', 'man']
+    filenames = ['*.[1234567]', '*.man']
+    mimetypes = ['application/x-troff', 'text/troff']
+
+    tokens = {
+        'root': [
+            (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'),
+            (r'\.', Punctuation, 'request'),
+            # Regular characters, slurp till we find a backslash or newline
+            (r'[^\\\n]+', Text, 'textline'),
+            default('textline'),
+        ],
+        'textline': [
+            include('escapes'),
+            (r'[^\\\n]+', Text),
+            (r'\n', Text, '#pop'),
+        ],
+        'escapes': [
+            # groff has many ways to write escapes.
+            (r'\\"[^\n]*', Comment),
+            (r'\\[fn]\w', String.Escape),
+            (r'\\\(.{2}', String.Escape),
+            (r'\\.\[.*\]', String.Escape),
+            (r'\\.', String.Escape),
+            (r'\\\n', Text, 'request'),
+        ],
+        'request': [
+            (r'\n', Text, '#pop'),
+            include('escapes'),
+            (r'"[^\n"]+"', String.Double),
+            (r'\d+', Number),
+            (r'\S+', String),
+            (r'\s+', Text),
+        ],
+    }
+
+    def analyse_text(text):
+        if text[:1] != '.':
+            return False
+        if text[:3] == '.\\"':
+            return True
+        if text[:4] == '.TH ':
+            return True
+        if text[1:3].isalnum() and text[3].isspace():
+            return 0.9
+
+
+class MozPreprocHashLexer(RegexLexer):
+    """
+    Lexer for Mozilla Preprocessor files (with '#' as the marker).
+
+    Other data is left untouched.
+
+    .. versionadded:: 2.0
+    """
+    name = 'mozhashpreproc'
+    aliases = [name]
+    filenames = []
+    mimetypes = []
+
+    tokens = {
+        'root': [
+            (r'^#', Comment.Preproc, ('expr', 'exprstart')),
+            (r'.+', Other),
+        ],
+        'exprstart': [
+            (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'),
+            (words((
+                'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif',
+                'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter',
+                'include', 'includesubst', 'error')),
+             Comment.Preproc, '#pop'),
+        ],
+        'expr': [
+            (words(('!', '!=', '==', '&&', '||')), Operator),
+            (r'(defined)(\()', bygroups(Keyword, Punctuation)),
+            (r'\)', Punctuation),
+            (r'[0-9]+', Number.Decimal),
+            (r'__\w+?__', Name.Variable),
+            (r'@\w+?@', Name.Class),
+            (r'\w+', Name),
+            (r'\n', Text, '#pop'),
+            (r'\s+', Text),
+            (r'\S', Punctuation),
+        ],
+    }
+
+
+class MozPreprocPercentLexer(MozPreprocHashLexer):
+    """
+    Lexer for Mozilla Preprocessor files (with '%' as the marker).
+
+    Other data is left untouched.
+
+    .. versionadded:: 2.0
+    """
+    name = 'mozpercentpreproc'
+    aliases = [name]
+    filenames = []
+    mimetypes = []
+
+    tokens = {
+        'root': [
+            (r'^%', Comment.Preproc, ('expr', 'exprstart')),
+            (r'.+', Other),
+        ],
+    }
+
+
+class MozPreprocXulLexer(DelegatingLexer):
+    """
+    Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
+    `XmlLexer`.
+
+    .. versionadded:: 2.0
+    """
+    name = "XUL+mozpreproc"
+    aliases = ['xul+mozpreproc']
+    filenames = ['*.xul.in']
+    mimetypes = []
+
+    def __init__(self, **options):
+        super().__init__(XmlLexer, MozPreprocHashLexer, **options)
+
+
+class MozPreprocJavascriptLexer(DelegatingLexer):
+    """
+    Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
+    `JavascriptLexer`.
+
+    .. versionadded:: 2.0
+    """
+    name = "Javascript+mozpreproc"
+    aliases = ['javascript+mozpreproc']
+    filenames = ['*.js.in']
+    mimetypes = []
+
+    def __init__(self, **options):
+        super().__init__(JavascriptLexer, MozPreprocHashLexer, **options)
+
+
+class MozPreprocCssLexer(DelegatingLexer):
+    """
+    Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the
+    `CssLexer`.
+
+    .. versionadded:: 2.0
+    """
+    name = "CSS+mozpreproc"
+    aliases = ['css+mozpreproc']
+    filenames = ['*.css.in']
+    mimetypes = []
+
+    def __init__(self, **options):
+        super().__init__(CssLexer, MozPreprocPercentLexer, **options)
+
+
+class MarkdownLexer(RegexLexer):
+    """
+    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
+
+    .. versionadded:: 2.2
+    """
+    name = 'markdown'
+    aliases = ['md']
+    filenames = ['*.md', '*.markdown']
+    mimetypes = ["text/x-markdown"]
+    flags = re.MULTILINE
+
+    def _handle_codeblock(self, match):
+        """
+        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
+        """
+        from pygments.lexers import get_lexer_by_name
+
+        # section header
+        yield match.start(1), String.Backtick, match.group(1)
+        yield match.start(2), String.Backtick, match.group(2)
+        yield match.start(3), Text           , match.group(3)
+
+        # lookup lexer if wanted and existing
+        lexer = None
+        if self.handlecodeblocks:
+            try:
+                lexer = get_lexer_by_name( match.group(2).strip() )
+            except ClassNotFound:
+                pass
+        code = match.group(4)
+
+        # no lexer for this language. handle it like it was a code block
+        if lexer is None:
+            yield match.start(4), String, code
+        else:
+            yield from do_insertions([], lexer.get_tokens_unprocessed(code))
+
+        yield match.start(5), String.Backtick, match.group(5)
+
+    tokens = {
+        'root': [
+            # heading with '#' prefix (atx-style)
+            (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)),
+            # subheading with '#' prefix (atx-style)
+            (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)),
+            # heading with '=' underlines (Setext-style)
+            (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)),
+            # subheading with '-' underlines (Setext-style)
+            (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)),
+            # task list
+            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
+            bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
+            # bulleted list
+            (r'^(\s*)([*-])(\s)(.+\n)',
+            bygroups(Text, Keyword, Text, using(this, state='inline'))),
+            # numbered list
+            (r'^(\s*)([0-9]+\.)( .+\n)',
+            bygroups(Text, Keyword, using(this, state='inline'))),
+            # quote
+            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
+            # code block fenced by 3 backticks
+            (r'^(\s*```\n(.+\n)+\s*```$)', String.Backtick),
+            # code block with language
+            (r'^(\s*```)(\w+)(\n)([\w\W]*?)(^\s*```$)', _handle_codeblock),
+            # code block indented with 4 spaces or 1 tab
+            (r'(\n\n)((\ {4}|\t)(.+\n)+)', bygroups(Text, String.Backtick)),
+
+            include('inline'),
+        ],
+        'inline': [
+            # escape
+            (r'\\.', Text),
+            # inline code
+            (r'([^`])(`[^`\n]+`)', bygroups(Text, String.Backtick)),
+            # warning: the following rules eat outer tags.
+            # eg. **foo _bar_ baz** => foo and baz are not recognized as bold
+            # bold fenced by '**'
+            (r'(\*\*[^* \n][^*\n]*\*\*)', bygroups(Generic.Strong)),
+            # # bold fenced by '__'
+            (r'(\_\_[^_ \n][^_\n]*\_\_)', bygroups(Generic.Strong)),
+            # italics fenced by '*'
+            (r'(\*[^* \n][^*\n]*\*)', bygroups(Generic.Emph)),
+            # italics fenced by '_'
+            (r'(\_[^_ \n][^_\n]*\_)', bygroups(Generic.Emph)),
+            # strikethrough
+            (r'([^~]*)(~~[^~]+~~)', bygroups(Text, Generic.Deleted)),
+            # mentions and topics (twitter and github stuff)
+            (r'[@#][\w/:]+', Name.Entity),
+            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
+            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))',
+             bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
+            # reference-style links, e.g.:
+            #   [an example][id]
+            #   [id]: http://example.com/
+            (r'(\[)([^]]+)(\])(\[)([^]]*)(\])',
+             bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)),
+            (r'^(\s*\[)([^]]*)(\]:\s*)(.+)',
+             bygroups(Text, Name.Label, Text, Name.Attribute)),
+
+            # general text, must come last!
+            (r'[^\\\s]+', Text),
+            (r'.', Text),
+        ],
+    }
+
+    def __init__(self, **options):
+        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
+        RegexLexer.__init__(self, **options)
+
+
+class TiddlyWiki5Lexer(RegexLexer):
+    """
+    For `TiddlyWiki5 <https://tiddlywiki.com/#TiddlerFiles>`_ markup.
+
+    .. versionadded:: 2.7
+    """
+    name = 'tiddler'
+    aliases = ['tid']
+    filenames = ['*.tid']
+    mimetypes = ["text/vnd.tiddlywiki"]
+    flags = re.MULTILINE
+
+    def _handle_codeblock(self, match):
+        """
+        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
+        """
+        from pygments.lexers import get_lexer_by_name
+
+        # section header
+        yield match.start(1), String, match.group(1)
+        yield match.start(2), String, match.group(2)
+        yield match.start(3), Text,   match.group(3)
+
+        # lookup lexer if wanted and existing
+        lexer = None
+        if self.handlecodeblocks:
+            try:
+                lexer = get_lexer_by_name(match.group(2).strip())
+            except ClassNotFound:
+                pass
+        code = match.group(4)
+
+        # no lexer for this language. handle it like it was a code block
+        if lexer is None:
+            yield match.start(4), String, code
+            return
+
+        yield from do_insertions([], lexer.get_tokens_unprocessed(code))
+
+        yield match.start(5), String, match.group(5)
+
+    def _handle_cssblock(self, match):
+        """
+        match args: 1:style tag 2:newline, 3:code, 4:closing style tag
+        """
+        from pygments.lexers import get_lexer_by_name
+
+        # section header
+        yield match.start(1), String, match.group(1)
+        yield match.start(2), String, match.group(2)
+
+        lexer = None
+        if self.handlecodeblocks:
+            try:
+                lexer = get_lexer_by_name('css')
+            except ClassNotFound:
+                pass
+        code = match.group(3)
+
+        # no lexer for this language. handle it like it was a code block
+        if lexer is None:
+            yield match.start(3), String, code
+            return
+
+        yield from do_insertions([], lexer.get_tokens_unprocessed(code))
+
+        yield match.start(4), String, match.group(4)
+
+    tokens = {
+        'root': [
+            # title in metadata section
+            (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)),
+            # headings
+            (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)),
+            (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
+            # bulleted or numbered lists or single-line block quotes
+            # (can be mixed)
+            (r'^(\s*)([*#>]+)(\s*)(.+\n)',
+             bygroups(Text, Keyword, Text, using(this, state='inline'))),
+            # multi-line block quotes
+            (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)),
+            # table header
+            (r'^(\|.*?\|h)$', bygroups(Generic.Strong)),
+            # table footer or caption
+            (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)),
+            # table class
+            (r'^(\|.*?\|k)$', bygroups(Name.Tag)),
+            # definitions
+            (r'^(;.*)$', bygroups(Generic.Strong)),
+            # text block
+            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
+            # code block with language
+            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
+            # CSS style block
+            (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock),
+
+            include('keywords'),
+            include('inline'),
+        ],
+        'keywords': [
+            (words((
+                '\\define', '\\end', 'caption', 'created', 'modified', 'tags',
+                'title', 'type'), prefix=r'^', suffix=r'\b'),
+             Keyword),
+        ],
+        'inline': [
+            # escape
+            (r'\\.', Text),
+            # created or modified date
+            (r'\d{17}', Number.Integer),
+            # italics
+            (r'(\s)(//[^/]+//)((?=\W|\n))',
+             bygroups(Text, Generic.Emph, Text)),
+            # superscript
+            (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)),
+            # subscript
+            (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)),
+            # underscore
+            (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)),
+            # bold
+            (r"(\s)(''[^']+'')((?=\W|\n))",
+             bygroups(Text, Generic.Strong, Text)),
+            # strikethrough
+            (r'(\s)(~~[^~]+~~)((?=\W|\n))',
+             bygroups(Text, Generic.Deleted, Text)),
+            # TiddlyWiki variables
+            (r'<<[^>]+>>', Name.Tag),
+            (r'\$\$[^$]+\$\$', Name.Tag),
+            (r'\$\([^)]+\)\$', Name.Tag),
+            # TiddlyWiki style or class
+            (r'^@@.*$', Name.Tag),
+            # HTML tags
+            (r'</?[^>]+>', Name.Tag),
+            # inline code
+            (r'`[^`]+`', String.Backtick),
+            # HTML escaped symbols
+            (r'&\S*?;', String.Regex),
+            # Wiki links
+            (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)),
+            # External links
+            (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})',
+            bygroups(Text, Name.Tag, Text, Name.Attribute, Text)),
+            # Transclusion
+            (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)),
+            # URLs
+            (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)),
+
+            # general text, must come last!
+            (r'[\w]+', Text),
+            (r'.', Text)
+        ],
+    }
+
+    def __init__(self, **options):
+        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
+        RegexLexer.__init__(self, **options)

eric ide

mercurial