ThirdParty/Pygments/pygments/lexers/markup.py

changeset 5713
6762afd9f963
parent 4697
c2e9bf425554
child 6651
e8f3b5568b21
--- a/ThirdParty/Pygments/pygments/lexers/markup.py	Sun Apr 23 16:40:31 2017 +0200
+++ b/ThirdParty/Pygments/pygments/lexers/markup.py	Tue Apr 25 18:36:38 2017 +0200
@@ -5,7 +5,7 @@
 
     Lexers for non-HTML markup languages.
 
-    :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
@@ -24,7 +24,7 @@
 __all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer',
            'MozPreprocHashLexer', 'MozPreprocPercentLexer',
            'MozPreprocXulLexer', 'MozPreprocJavascriptLexer',
-           'MozPreprocCssLexer']
+           'MozPreprocCssLexer', 'MarkdownLexer']
 
 
 class BBCodeLexer(RegexLexer):
@@ -500,3 +500,96 @@
         super(MozPreprocCssLexer, self).__init__(
             CssLexer, MozPreprocPercentLexer, **options)
 
+
+class MarkdownLexer(RegexLexer):
+    """
+    For `Markdown <https://help.github.com/categories/writing-on-github/>`_ markup.
+
+    .. versionadded:: 2.2
+    """
+    name = 'markdown'
+    aliases = ['md']
+    filenames = ['*.md']
+    mimetypes = ["text/x-markdown"]
+    flags = re.MULTILINE
+
+    def _handle_codeblock(self, match):
+        """
+        match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks
+        """
+        from pygments.lexers import get_lexer_by_name
+
+        # section header
+        yield match.start(1), String        , match.group(1)
+        yield match.start(2), String        , match.group(2)
+        yield match.start(3), Text          , match.group(3)
+
+        # lookup lexer if wanted and existing
+        lexer = None
+        if self.handlecodeblocks:
+            try:
+                lexer = get_lexer_by_name( match.group(2).strip() )
+            except ClassNotFound:
+                pass
+        code = match.group(4)
+
+        # no lexer for this language. handle it like it was a code block
+        if lexer is None:
+            yield match.start(4), String, code
+            return
+
+        for item in do_insertions([], lexer.get_tokens_unprocessed(code)):
+            yield item
+
+        yield match.start(5), String        , match.group(5)
+
+    tokens = {
+        'root': [
+            # heading with pound prefix
+            (r'^(#)([^#].+\n)', bygroups(Generic.Heading, Text)),
+            (r'^(#{2,6})(.+\n)', bygroups(Generic.Subheading, Text)),
+            # task list
+            (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)',
+            bygroups(Text, Keyword, Keyword, using(this, state='inline'))),
+            # bulleted lists
+            (r'^(\s*)([*-])(\s)(.+\n)',
+            bygroups(Text, Keyword, Text, using(this, state='inline'))),
+            # numbered lists
+            (r'^(\s*)([0-9]+\.)( .+\n)',
+            bygroups(Text, Keyword, using(this, state='inline'))),
+            # quote
+            (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)),
+            # text block
+            (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)),
+            # code block with language
+            (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock),
+
+            include('inline'),
+        ],
+        'inline': [
+            # escape
+            (r'\\.', Text),
+            # italics
+            (r'(\s)([*_][^*_]+[*_])(\W|\n)', bygroups(Text, Generic.Emph, Text)),
+            # bold
+            # warning: the following rule eats internal tags. eg. **foo _bar_ baz** bar is not italics
+            (r'(\s)((\*\*|__).*\3)((?=\W|\n))', bygroups(Text, Generic.Strong, None, Text)),
+            # "proper way" (r'(\s)([*_]{2}[^*_]+[*_]{2})((?=\W|\n))', bygroups(Text, Generic.Strong, Text)),
+            # strikethrough
+            (r'(\s)(~~[^~]+~~)((?=\W|\n))', bygroups(Text, Generic.Deleted, Text)),
+            # inline code
+            (r'`[^`]+`', String.Backtick),
+            # mentions and topics (twitter and github stuff)
+            (r'[@#][\w/:]+', Name.Entity),
+            # (image?) links eg: ![Image of Yaktocat](https://octodex.github.com/images/yaktocat.png)
+            (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)),
+
+            # general text, must come last!
+            (r'[^\\\s]+', Text),
+            (r'.', Text),
+        ],
+    }
+
+    def __init__(self, **options):
+        self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True)
+        RegexLexer.__init__(self, **options)

eric ide

mercurial