eric6/ThirdParty/Pygments/pygments/lexers/html.py

changeset 7983
54c5cfbb1e29
parent 7701
25f42e208e08
--- a/eric6/ThirdParty/Pygments/pygments/lexers/html.py	Thu Jan 14 18:06:56 2021 +0100
+++ b/eric6/ThirdParty/Pygments/pygments/lexers/html.py	Thu Jan 14 18:14:15 2021 +0100
@@ -1,602 +1,614 @@
-# -*- coding: utf-8 -*-
-"""
-    pygments.lexers.html
-    ~~~~~~~~~~~~~~~~~~~~
-
-    Lexers for HTML, XML and related markup.
-
-    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
-    :license: BSD, see LICENSE for details.
-"""
-
-import re
-
-from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
-    default, using
-from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Punctuation
-from pygments.util import looks_like_xml, html_doctype_matches
-
-from pygments.lexers.javascript import JavascriptLexer
-from pygments.lexers.jvm import ScalaLexer
-from pygments.lexers.css import CssLexer, _indentation, _starts_block
-from pygments.lexers.ruby import RubyLexer
-
-__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
-           'ScamlLexer', 'PugLexer']
-
-
-class HtmlLexer(RegexLexer):
-    """
-    For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
-    by the appropriate lexer.
-    """
-
-    name = 'HTML'
-    aliases = ['html']
-    filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
-    mimetypes = ['text/html', 'application/xhtml+xml']
-
-    flags = re.IGNORECASE | re.DOTALL
-    tokens = {
-        'root': [
-            ('[^<&]+', Text),
-            (r'&\S*?;', Name.Entity),
-            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
-            ('<!--', Comment, 'comment'),
-            (r'<\?.*?\?>', Comment.Preproc),
-            ('<![^>]*>', Comment.Preproc),
-            (r'(<)(\s*)(script)(\s*)',
-             bygroups(Punctuation, Text, Name.Tag, Text),
-             ('script-content', 'tag')),
-            (r'(<)(\s*)(style)(\s*)',
-             bygroups(Punctuation, Text, Name.Tag, Text),
-             ('style-content', 'tag')),
-            # note: this allows tag names not used in HTML like <x:with-dash>,
-            # this is to support yet-unknown template engines and the like
-            (r'(<)(\s*)([\w:.-]+)',
-             bygroups(Punctuation, Text, Name.Tag), 'tag'),
-            (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
-             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
-                      Punctuation)),
-        ],
-        'comment': [
-            ('[^-]+', Comment),
-            ('-->', Comment, '#pop'),
-            ('-', Comment),
-        ],
-        'tag': [
-            (r'\s+', Text),
-            (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
-             'attr'),
-            (r'[\w:-]+', Name.Attribute),
-            (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
-        ],
-        'script-content': [
-            (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
-             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
-                      Punctuation), '#pop'),
-            (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
-        ],
-        'style-content': [
-            (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
-             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
-                      Punctuation),'#pop'),
-            (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
-        ],
-        'attr': [
-            ('".*?"', String, '#pop'),
-            ("'.*?'", String, '#pop'),
-            (r'[^\s>]+', String, '#pop'),
-        ],
-    }
-
-    def analyse_text(text):
-        if html_doctype_matches(text):
-            return 0.5
-
-
-class DtdLexer(RegexLexer):
-    """
-    A lexer for DTDs (Document Type Definitions).
-
-    .. versionadded:: 1.5
-    """
-
-    flags = re.MULTILINE | re.DOTALL
-
-    name = 'DTD'
-    aliases = ['dtd']
-    filenames = ['*.dtd']
-    mimetypes = ['application/xml-dtd']
-
-    tokens = {
-        'root': [
-            include('common'),
-
-            (r'(<!ELEMENT)(\s+)(\S+)',
-                bygroups(Keyword, Text, Name.Tag), 'element'),
-            (r'(<!ATTLIST)(\s+)(\S+)',
-                bygroups(Keyword, Text, Name.Tag), 'attlist'),
-            (r'(<!ENTITY)(\s+)(\S+)',
-                bygroups(Keyword, Text, Name.Entity), 'entity'),
-            (r'(<!NOTATION)(\s+)(\S+)',
-                bygroups(Keyword, Text, Name.Tag), 'notation'),
-            (r'(<!\[)([^\[\s]+)(\s*)(\[)',  # conditional sections
-                bygroups(Keyword, Name.Entity, Text, Keyword)),
-
-            (r'(<!DOCTYPE)(\s+)([^>\s]+)',
-                bygroups(Keyword, Text, Name.Tag)),
-            (r'PUBLIC|SYSTEM', Keyword.Constant),
-            (r'[\[\]>]', Keyword),
-        ],
-
-        'common': [
-            (r'\s+', Text),
-            (r'(%|&)[^;]*;', Name.Entity),
-            ('<!--', Comment, 'comment'),
-            (r'[(|)*,?+]', Operator),
-            (r'"[^"]*"', String.Double),
-            (r'\'[^\']*\'', String.Single),
-        ],
-
-        'comment': [
-            ('[^-]+', Comment),
-            ('-->', Comment, '#pop'),
-            ('-', Comment),
-        ],
-
-        'element': [
-            include('common'),
-            (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
-            (r'[^>\s|()?+*,]+', Name.Tag),
-            (r'>', Keyword, '#pop'),
-        ],
-
-        'attlist': [
-            include('common'),
-            (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
-             Keyword.Constant),
-            (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
-            (r'xml:space|xml:lang', Keyword.Reserved),
-            (r'[^>\s|()?+*,]+', Name.Attribute),
-            (r'>', Keyword, '#pop'),
-        ],
-
-        'entity': [
-            include('common'),
-            (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
-            (r'[^>\s|()?+*,]+', Name.Entity),
-            (r'>', Keyword, '#pop'),
-        ],
-
-        'notation': [
-            include('common'),
-            (r'SYSTEM|PUBLIC', Keyword.Constant),
-            (r'[^>\s|()?+*,]+', Name.Attribute),
-            (r'>', Keyword, '#pop'),
-        ],
-    }
-
-    def analyse_text(text):
-        if not looks_like_xml(text) and \
-           ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
-            return 0.8
-
-
-class XmlLexer(RegexLexer):
-    """
-    Generic lexer for XML (eXtensible Markup Language).
-    """
-
-    flags = re.MULTILINE | re.DOTALL | re.UNICODE
-
-    name = 'XML'
-    aliases = ['xml']
-    filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
-                 '*.wsdl', '*.wsf']
-    mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
-                 'application/rss+xml', 'application/atom+xml']
-
-    tokens = {
-        'root': [
-            ('[^<&]+', Text),
-            (r'&\S*?;', Name.Entity),
-            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
-            ('<!--', Comment, 'comment'),
-            (r'<\?.*?\?>', Comment.Preproc),
-            ('<![^>]*>', Comment.Preproc),
-            (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
-            (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
-        ],
-        'comment': [
-            ('[^-]+', Comment),
-            ('-->', Comment, '#pop'),
-            ('-', Comment),
-        ],
-        'tag': [
-            (r'\s+', Text),
-            (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
-            (r'/?\s*>', Name.Tag, '#pop'),
-        ],
-        'attr': [
-            (r'\s+', Text),
-            ('".*?"', String, '#pop'),
-            ("'.*?'", String, '#pop'),
-            (r'[^\s>]+', String, '#pop'),
-        ],
-    }
-
-    def analyse_text(text):
-        if looks_like_xml(text):
-            return 0.45  # less than HTML
-
-
-class XsltLexer(XmlLexer):
-    """
-    A lexer for XSLT.
-
-    .. versionadded:: 0.10
-    """
-
-    name = 'XSLT'
-    aliases = ['xslt']
-    filenames = ['*.xsl', '*.xslt', '*.xpl']  # xpl is XProc
-    mimetypes = ['application/xsl+xml', 'application/xslt+xml']
-
-    EXTRA_KEYWORDS = {
-        'apply-imports', 'apply-templates', 'attribute',
-        'attribute-set', 'call-template', 'choose', 'comment',
-        'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
-        'for-each', 'if', 'import', 'include', 'key', 'message',
-        'namespace-alias', 'number', 'otherwise', 'output', 'param',
-        'preserve-space', 'processing-instruction', 'sort',
-        'strip-space', 'stylesheet', 'template', 'text', 'transform',
-        'value-of', 'variable', 'when', 'with-param'
-    }
-
-    def get_tokens_unprocessed(self, text):
-        for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
-            m = re.match('</?xsl:([^>]*)/?>?', value)
-
-            if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
-                yield index, Keyword, value
-            else:
-                yield index, token, value
-
-    def analyse_text(text):
-        if looks_like_xml(text) and '<xsl' in text:
-            return 0.8
-
-
-class HamlLexer(ExtendedRegexLexer):
-    """
-    For Haml markup.
-
-    .. versionadded:: 1.3
-    """
-
-    name = 'Haml'
-    aliases = ['haml']
-    filenames = ['*.haml']
-    mimetypes = ['text/x-haml']
-
-    flags = re.IGNORECASE
-    # Haml can include " |\n" anywhere,
-    # which is ignored and used to wrap long lines.
-    # To accomodate this, use this custom faux dot instead.
-    _dot = r'(?: \|\n(?=.* \|)|.)'
-
-    # In certain places, a comma at the end of the line
-    # allows line wrapping as well.
-    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
-    tokens = {
-        'root': [
-            (r'[ \t]*\n', Text),
-            (r'[ \t]*', _indentation),
-        ],
-
-        'css': [
-            (r'\.[\w:-]+', Name.Class, 'tag'),
-            (r'\#[\w:-]+', Name.Function, 'tag'),
-        ],
-
-        'eval-or-plain': [
-            (r'[&!]?==', Punctuation, 'plain'),
-            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
-             bygroups(Punctuation, using(RubyLexer)),
-             'root'),
-            default('plain'),
-        ],
-
-        'content': [
-            include('css'),
-            (r'%[\w:-]+', Name.Tag, 'tag'),
-            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
-            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
-             bygroups(Comment, Comment.Special, Comment),
-             '#pop'),
-            (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
-             '#pop'),
-            (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
-                                                  'haml-comment-block'), '#pop'),
-            (r'(-)(' + _comma_dot + r'*\n)',
-             bygroups(Punctuation, using(RubyLexer)),
-             '#pop'),
-            (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
-             '#pop'),
-            include('eval-or-plain'),
-        ],
-
-        'tag': [
-            include('css'),
-            (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
-            (r'\[' + _dot + r'*?\]', using(RubyLexer)),
-            (r'\(', Text, 'html-attributes'),
-            (r'/[ \t]*\n', Punctuation, '#pop:2'),
-            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
-            include('eval-or-plain'),
-        ],
-
-        'plain': [
-            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
-            (r'(#\{)(' + _dot + r'*?)(\})',
-             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
-            (r'\n', Text, 'root'),
-        ],
-
-        'html-attributes': [
-            (r'\s+', Text),
-            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
-            (r'[\w:-]+', Name.Attribute),
-            (r'\)', Text, '#pop'),
-        ],
-
-        'html-attribute-value': [
-            (r'[ \t]+', Text),
-            (r'\w+', Name.Variable, '#pop'),
-            (r'@\w+', Name.Variable.Instance, '#pop'),
-            (r'\$\w+', Name.Variable.Global, '#pop'),
-            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
-            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
-        ],
-
-        'html-comment-block': [
-            (_dot + '+', Comment),
-            (r'\n', Text, 'root'),
-        ],
-
-        'haml-comment-block': [
-            (_dot + '+', Comment.Preproc),
-            (r'\n', Text, 'root'),
-        ],
-
-        'filter-block': [
-            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
-            (r'(#\{)(' + _dot + r'*?)(\})',
-             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
-            (r'\n', Text, 'root'),
-        ],
-    }
-
-
-class ScamlLexer(ExtendedRegexLexer):
-    """
-    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.
-
-    .. versionadded:: 1.4
-    """
-
-    name = 'Scaml'
-    aliases = ['scaml']
-    filenames = ['*.scaml']
-    mimetypes = ['text/x-scaml']
-
-    flags = re.IGNORECASE
-    # Scaml does not yet support the " |\n" notation to
-    # wrap long lines.  Once it does, use the custom faux
-    # dot instead.
-    # _dot = r'(?: \|\n(?=.* \|)|.)'
-    _dot = r'.'
-
-    tokens = {
-        'root': [
-            (r'[ \t]*\n', Text),
-            (r'[ \t]*', _indentation),
-        ],
-
-        'css': [
-            (r'\.[\w:-]+', Name.Class, 'tag'),
-            (r'\#[\w:-]+', Name.Function, 'tag'),
-        ],
-
-        'eval-or-plain': [
-            (r'[&!]?==', Punctuation, 'plain'),
-            (r'([&!]?[=~])(' + _dot + r'*\n)',
-             bygroups(Punctuation, using(ScalaLexer)),
-             'root'),
-            default('plain'),
-        ],
-
-        'content': [
-            include('css'),
-            (r'%[\w:-]+', Name.Tag, 'tag'),
-            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
-            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
-             bygroups(Comment, Comment.Special, Comment),
-             '#pop'),
-            (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
-             '#pop'),
-            (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
-                                                  'scaml-comment-block'), '#pop'),
-            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
-             bygroups(Punctuation, Keyword, using(ScalaLexer)),
-             '#pop'),
-            (r'(-)(' + _dot + r'*\n)',
-             bygroups(Punctuation, using(ScalaLexer)),
-             '#pop'),
-            (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
-             '#pop'),
-            include('eval-or-plain'),
-        ],
-
-        'tag': [
-            include('css'),
-            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
-            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
-            (r'\(', Text, 'html-attributes'),
-            (r'/[ \t]*\n', Punctuation, '#pop:2'),
-            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
-            include('eval-or-plain'),
-        ],
-
-        'plain': [
-            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
-            (r'(#\{)(' + _dot + r'*?)(\})',
-             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-            (r'\n', Text, 'root'),
-        ],
-
-        'html-attributes': [
-            (r'\s+', Text),
-            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
-            (r'[\w:-]+', Name.Attribute),
-            (r'\)', Text, '#pop'),
-        ],
-
-        'html-attribute-value': [
-            (r'[ \t]+', Text),
-            (r'\w+', Name.Variable, '#pop'),
-            (r'@\w+', Name.Variable.Instance, '#pop'),
-            (r'\$\w+', Name.Variable.Global, '#pop'),
-            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
-            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
-        ],
-
-        'html-comment-block': [
-            (_dot + '+', Comment),
-            (r'\n', Text, 'root'),
-        ],
-
-        'scaml-comment-block': [
-            (_dot + '+', Comment.Preproc),
-            (r'\n', Text, 'root'),
-        ],
-
-        'filter-block': [
-            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
-            (r'(#\{)(' + _dot + r'*?)(\})',
-             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-            (r'\n', Text, 'root'),
-        ],
-    }
-
-
-class PugLexer(ExtendedRegexLexer):
-    """
-    For Pug markup.
-    Pug is a variant of Scaml, see:
-    http://scalate.fusesource.org/documentation/scaml-reference.html
-
-    .. versionadded:: 1.4
-    """
-
-    name = 'Pug'
-    aliases = ['pug', 'jade']
-    filenames = ['*.pug', '*.jade']
-    mimetypes = ['text/x-pug', 'text/x-jade']
-
-    flags = re.IGNORECASE
-    _dot = r'.'
-
-    tokens = {
-        'root': [
-            (r'[ \t]*\n', Text),
-            (r'[ \t]*', _indentation),
-        ],
-
-        'css': [
-            (r'\.[\w:-]+', Name.Class, 'tag'),
-            (r'\#[\w:-]+', Name.Function, 'tag'),
-        ],
-
-        'eval-or-plain': [
-            (r'[&!]?==', Punctuation, 'plain'),
-            (r'([&!]?[=~])(' + _dot + r'*\n)',
-             bygroups(Punctuation, using(ScalaLexer)),  'root'),
-            default('plain'),
-        ],
-
-        'content': [
-            include('css'),
-            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
-            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
-             bygroups(Comment, Comment.Special, Comment),
-             '#pop'),
-            (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
-             '#pop'),
-            (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
-                                                  'scaml-comment-block'), '#pop'),
-            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
-             bygroups(Punctuation, Keyword, using(ScalaLexer)),
-             '#pop'),
-            (r'(-)(' + _dot + r'*\n)',
-             bygroups(Punctuation, using(ScalaLexer)),
-             '#pop'),
-            (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
-             '#pop'),
-            (r'[\w:-]+', Name.Tag, 'tag'),
-            (r'\|', Text, 'eval-or-plain'),
-        ],
-
-        'tag': [
-            include('css'),
-            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
-            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
-            (r'\(', Text, 'html-attributes'),
-            (r'/[ \t]*\n', Punctuation, '#pop:2'),
-            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
-            include('eval-or-plain'),
-        ],
-
-        'plain': [
-            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
-            (r'(#\{)(' + _dot + r'*?)(\})',
-             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-            (r'\n', Text, 'root'),
-        ],
-
-        'html-attributes': [
-            (r'\s+', Text),
-            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
-            (r'[\w:-]+', Name.Attribute),
-            (r'\)', Text, '#pop'),
-        ],
-
-        'html-attribute-value': [
-            (r'[ \t]+', Text),
-            (r'\w+', Name.Variable, '#pop'),
-            (r'@\w+', Name.Variable.Instance, '#pop'),
-            (r'\$\w+', Name.Variable.Global, '#pop'),
-            (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
-            (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
-        ],
-
-        'html-comment-block': [
-            (_dot + '+', Comment),
-            (r'\n', Text, 'root'),
-        ],
-
-        'scaml-comment-block': [
-            (_dot + '+', Comment.Preproc),
-            (r'\n', Text, 'root'),
-        ],
-
-        'filter-block': [
-            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
-            (r'(#\{)(' + _dot + r'*?)(\})',
-             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-            (r'\n', Text, 'root'),
-        ],
-    }
-JadeLexer = PugLexer  # compat
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.html
+    ~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for HTML, XML and related markup.
+
+    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
+    default, using
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+    Punctuation
+from pygments.util import looks_like_xml, html_doctype_matches
+
+from pygments.lexers.javascript import JavascriptLexer
+from pygments.lexers.jvm import ScalaLexer
+from pygments.lexers.css import CssLexer, _indentation, _starts_block
+from pygments.lexers.ruby import RubyLexer
+
+__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
+           'ScamlLexer', 'PugLexer']
+
+
+class HtmlLexer(RegexLexer):
+    """
+    For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
+    by the appropriate lexer.
+    """
+
+    name = 'HTML'
+    aliases = ['html']
+    filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
+    mimetypes = ['text/html', 'application/xhtml+xml']
+
+    flags = re.IGNORECASE | re.DOTALL
+    tokens = {
+        'root': [
+            ('[^<&]+', Text),
+            (r'&\S*?;', Name.Entity),
+            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
+            ('<!--', Comment, 'comment'),
+            (r'<\?.*?\?>', Comment.Preproc),
+            ('<![^>]*>', Comment.Preproc),
+            (r'(<)(\s*)(script)(\s*)',
+             bygroups(Punctuation, Text, Name.Tag, Text),
+             ('script-content', 'tag')),
+            (r'(<)(\s*)(style)(\s*)',
+             bygroups(Punctuation, Text, Name.Tag, Text),
+             ('style-content', 'tag')),
+            # note: this allows tag names not used in HTML like <x:with-dash>,
+            # this is to support yet-unknown template engines and the like
+            (r'(<)(\s*)([\w:.-]+)',
+             bygroups(Punctuation, Text, Name.Tag), 'tag'),
+            (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
+             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
+                      Punctuation)),
+        ],
+        'comment': [
+            ('[^-]+', Comment),
+            ('-->', Comment, '#pop'),
+            ('-', Comment),
+        ],
+        'tag': [
+            (r'\s+', Text),
+            (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
+             'attr'),
+            (r'[\w:-]+', Name.Attribute),
+            (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
+        ],
+        'script-content': [
+            (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
+             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
+                      Punctuation), '#pop'),
+            (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
+            # fallback cases for when there is no closing script tag
+            # first look for newline and then go back into root state
+            # if that fails just read the rest of the file
+            # this is similar to the error handling logic in lexer.py
+            (r'.+?\n', using(JavascriptLexer), '#pop'),
+            (r'.+', using(JavascriptLexer), '#pop'),
+        ],
+        'style-content': [
+            (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
+             bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
+                      Punctuation),'#pop'),
+            (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
+            # fallback cases for when there is no closing style tag
+            # first look for newline and then go back into root state
+            # if that fails just read the rest of the file
+            # this is similar to the error handling logic in lexer.py
+            (r'.+?\n', using(CssLexer), '#pop'),
+            (r'.+', using(CssLexer), '#pop'),
+        ],
+        'attr': [
+            ('".*?"', String, '#pop'),
+            ("'.*?'", String, '#pop'),
+            (r'[^\s>]+', String, '#pop'),
+        ],
+    }
+
+    def analyse_text(text):
+        if html_doctype_matches(text):
+            return 0.5
+
+
+class DtdLexer(RegexLexer):
+    """
+    A lexer for DTDs (Document Type Definitions).
+
+    .. versionadded:: 1.5
+    """
+
+    flags = re.MULTILINE | re.DOTALL
+
+    name = 'DTD'
+    aliases = ['dtd']
+    filenames = ['*.dtd']
+    mimetypes = ['application/xml-dtd']
+
+    tokens = {
+        'root': [
+            include('common'),
+
+            (r'(<!ELEMENT)(\s+)(\S+)',
+                bygroups(Keyword, Text, Name.Tag), 'element'),
+            (r'(<!ATTLIST)(\s+)(\S+)',
+                bygroups(Keyword, Text, Name.Tag), 'attlist'),
+            (r'(<!ENTITY)(\s+)(\S+)',
+                bygroups(Keyword, Text, Name.Entity), 'entity'),
+            (r'(<!NOTATION)(\s+)(\S+)',
+                bygroups(Keyword, Text, Name.Tag), 'notation'),
+            (r'(<!\[)([^\[\s]+)(\s*)(\[)',  # conditional sections
+                bygroups(Keyword, Name.Entity, Text, Keyword)),
+
+            (r'(<!DOCTYPE)(\s+)([^>\s]+)',
+                bygroups(Keyword, Text, Name.Tag)),
+            (r'PUBLIC|SYSTEM', Keyword.Constant),
+            (r'[\[\]>]', Keyword),
+        ],
+
+        'common': [
+            (r'\s+', Text),
+            (r'(%|&)[^;]*;', Name.Entity),
+            ('<!--', Comment, 'comment'),
+            (r'[(|)*,?+]', Operator),
+            (r'"[^"]*"', String.Double),
+            (r'\'[^\']*\'', String.Single),
+        ],
+
+        'comment': [
+            ('[^-]+', Comment),
+            ('-->', Comment, '#pop'),
+            ('-', Comment),
+        ],
+
+        'element': [
+            include('common'),
+            (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
+            (r'[^>\s|()?+*,]+', Name.Tag),
+            (r'>', Keyword, '#pop'),
+        ],
+
+        'attlist': [
+            include('common'),
+            (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
+             Keyword.Constant),
+            (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
+            (r'xml:space|xml:lang', Keyword.Reserved),
+            (r'[^>\s|()?+*,]+', Name.Attribute),
+            (r'>', Keyword, '#pop'),
+        ],
+
+        'entity': [
+            include('common'),
+            (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
+            (r'[^>\s|()?+*,]+', Name.Entity),
+            (r'>', Keyword, '#pop'),
+        ],
+
+        'notation': [
+            include('common'),
+            (r'SYSTEM|PUBLIC', Keyword.Constant),
+            (r'[^>\s|()?+*,]+', Name.Attribute),
+            (r'>', Keyword, '#pop'),
+        ],
+    }
+
+    def analyse_text(text):
+        if not looks_like_xml(text) and \
+           ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
+            return 0.8
+
+
+class XmlLexer(RegexLexer):
+    """
+    Generic lexer for XML (eXtensible Markup Language).
+    """
+
+    flags = re.MULTILINE | re.DOTALL | re.UNICODE
+
+    name = 'XML'
+    aliases = ['xml']
+    filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
+                 '*.wsdl', '*.wsf']
+    mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
+                 'application/rss+xml', 'application/atom+xml']
+
+    tokens = {
+        'root': [
+            ('[^<&]+', Text),
+            (r'&\S*?;', Name.Entity),
+            (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
+            ('<!--', Comment, 'comment'),
+            (r'<\?.*?\?>', Comment.Preproc),
+            ('<![^>]*>', Comment.Preproc),
+            (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
+            (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
+        ],
+        'comment': [
+            ('[^-]+', Comment),
+            ('-->', Comment, '#pop'),
+            ('-', Comment),
+        ],
+        'tag': [
+            (r'\s+', Text),
+            (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
+            (r'/?\s*>', Name.Tag, '#pop'),
+        ],
+        'attr': [
+            (r'\s+', Text),
+            ('".*?"', String, '#pop'),
+            ("'.*?'", String, '#pop'),
+            (r'[^\s>]+', String, '#pop'),
+        ],
+    }
+
+    def analyse_text(text):
+        if looks_like_xml(text):
+            return 0.45  # less than HTML
+
+
+class XsltLexer(XmlLexer):
+    """
+    A lexer for XSLT.
+
+    .. versionadded:: 0.10
+    """
+
+    name = 'XSLT'
+    aliases = ['xslt']
+    filenames = ['*.xsl', '*.xslt', '*.xpl']  # xpl is XProc
+    mimetypes = ['application/xsl+xml', 'application/xslt+xml']
+
+    EXTRA_KEYWORDS = {
+        'apply-imports', 'apply-templates', 'attribute',
+        'attribute-set', 'call-template', 'choose', 'comment',
+        'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
+        'for-each', 'if', 'import', 'include', 'key', 'message',
+        'namespace-alias', 'number', 'otherwise', 'output', 'param',
+        'preserve-space', 'processing-instruction', 'sort',
+        'strip-space', 'stylesheet', 'template', 'text', 'transform',
+        'value-of', 'variable', 'when', 'with-param'
+    }
+
+    def get_tokens_unprocessed(self, text):
+        for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
+            m = re.match('</?xsl:([^>]*)/?>?', value)
+
+            if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
+                yield index, Keyword, value
+            else:
+                yield index, token, value
+
+    def analyse_text(text):
+        if looks_like_xml(text) and '<xsl' in text:
+            return 0.8
+
+
+class HamlLexer(ExtendedRegexLexer):
+    """
+    For Haml markup.
+
+    .. versionadded:: 1.3
+    """
+
+    name = 'Haml'
+    aliases = ['haml']
+    filenames = ['*.haml']
+    mimetypes = ['text/x-haml']
+
+    flags = re.IGNORECASE
+    # Haml can include " |\n" anywhere,
+    # which is ignored and used to wrap long lines.
+    # To accomodate this, use this custom faux dot instead.
+    _dot = r'(?: \|\n(?=.* \|)|.)'
+
+    # In certain places, a comma at the end of the line
+    # allows line wrapping as well.
+    _comma_dot = r'(?:,\s*\n|' + _dot + ')'
+    tokens = {
+        'root': [
+            (r'[ \t]*\n', Text),
+            (r'[ \t]*', _indentation),
+        ],
+
+        'css': [
+            (r'\.[\w:-]+', Name.Class, 'tag'),
+            (r'\#[\w:-]+', Name.Function, 'tag'),
+        ],
+
+        'eval-or-plain': [
+            (r'[&!]?==', Punctuation, 'plain'),
+            (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
+             bygroups(Punctuation, using(RubyLexer)),
+             'root'),
+            default('plain'),
+        ],
+
+        'content': [
+            include('css'),
+            (r'%[\w:-]+', Name.Tag, 'tag'),
+            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
+            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
+             bygroups(Comment, Comment.Special, Comment),
+             '#pop'),
+            (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
+             '#pop'),
+            (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
+                                                  'haml-comment-block'), '#pop'),
+            (r'(-)(' + _comma_dot + r'*\n)',
+             bygroups(Punctuation, using(RubyLexer)),
+             '#pop'),
+            (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
+             '#pop'),
+            include('eval-or-plain'),
+        ],
+
+        'tag': [
+            include('css'),
+            (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
+            (r'\[' + _dot + r'*?\]', using(RubyLexer)),
+            (r'\(', Text, 'html-attributes'),
+            (r'/[ \t]*\n', Punctuation, '#pop:2'),
+            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
+            include('eval-or-plain'),
+        ],
+
+        'plain': [
+            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
+            (r'(#\{)(' + _dot + r'*?)(\})',
+             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
+            (r'\n', Text, 'root'),
+        ],
+
+        'html-attributes': [
+            (r'\s+', Text),
+            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
+            (r'[\w:-]+', Name.Attribute),
+            (r'\)', Text, '#pop'),
+        ],
+
+        'html-attribute-value': [
+            (r'[ \t]+', Text),
+            (r'\w+', Name.Variable, '#pop'),
+            (r'@\w+', Name.Variable.Instance, '#pop'),
+            (r'\$\w+', Name.Variable.Global, '#pop'),
+            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
+            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
+        ],
+
+        'html-comment-block': [
+            (_dot + '+', Comment),
+            (r'\n', Text, 'root'),
+        ],
+
+        'haml-comment-block': [
+            (_dot + '+', Comment.Preproc),
+            (r'\n', Text, 'root'),
+        ],
+
+        'filter-block': [
+            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
+            (r'(#\{)(' + _dot + r'*?)(\})',
+             bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
+            (r'\n', Text, 'root'),
+        ],
+    }
+
+
+class ScamlLexer(ExtendedRegexLexer):
+    """
+    For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.
+
+    .. versionadded:: 1.4
+    """
+
+    name = 'Scaml'
+    aliases = ['scaml']
+    filenames = ['*.scaml']
+    mimetypes = ['text/x-scaml']
+
+    flags = re.IGNORECASE
+    # Scaml does not yet support the " |\n" notation to
+    # wrap long lines.  Once it does, use the custom faux
+    # dot instead.
+    # _dot = r'(?: \|\n(?=.* \|)|.)'
+    _dot = r'.'
+
+    tokens = {
+        'root': [
+            (r'[ \t]*\n', Text),
+            (r'[ \t]*', _indentation),
+        ],
+
+        'css': [
+            (r'\.[\w:-]+', Name.Class, 'tag'),
+            (r'\#[\w:-]+', Name.Function, 'tag'),
+        ],
+
+        'eval-or-plain': [
+            (r'[&!]?==', Punctuation, 'plain'),
+            (r'([&!]?[=~])(' + _dot + r'*\n)',
+             bygroups(Punctuation, using(ScalaLexer)),
+             'root'),
+            default('plain'),
+        ],
+
+        'content': [
+            include('css'),
+            (r'%[\w:-]+', Name.Tag, 'tag'),
+            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
+            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
+             bygroups(Comment, Comment.Special, Comment),
+             '#pop'),
+            (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
+             '#pop'),
+            (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
+                                                  'scaml-comment-block'), '#pop'),
+            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
+             bygroups(Punctuation, Keyword, using(ScalaLexer)),
+             '#pop'),
+            (r'(-)(' + _dot + r'*\n)',
+             bygroups(Punctuation, using(ScalaLexer)),
+             '#pop'),
+            (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
+             '#pop'),
+            include('eval-or-plain'),
+        ],
+
+        'tag': [
+            include('css'),
+            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
+            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
+            (r'\(', Text, 'html-attributes'),
+            (r'/[ \t]*\n', Punctuation, '#pop:2'),
+            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
+            include('eval-or-plain'),
+        ],
+
+        'plain': [
+            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
+            (r'(#\{)(' + _dot + r'*?)(\})',
+             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
+            (r'\n', Text, 'root'),
+        ],
+
+        'html-attributes': [
+            (r'\s+', Text),
+            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
+            (r'[\w:-]+', Name.Attribute),
+            (r'\)', Text, '#pop'),
+        ],
+
+        'html-attribute-value': [
+            (r'[ \t]+', Text),
+            (r'\w+', Name.Variable, '#pop'),
+            (r'@\w+', Name.Variable.Instance, '#pop'),
+            (r'\$\w+', Name.Variable.Global, '#pop'),
+            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
+            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
+        ],
+
+        'html-comment-block': [
+            (_dot + '+', Comment),
+            (r'\n', Text, 'root'),
+        ],
+
+        'scaml-comment-block': [
+            (_dot + '+', Comment.Preproc),
+            (r'\n', Text, 'root'),
+        ],
+
+        'filter-block': [
+            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
+            (r'(#\{)(' + _dot + r'*?)(\})',
+             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
+            (r'\n', Text, 'root'),
+        ],
+    }
+
+
+class PugLexer(ExtendedRegexLexer):
+    """
+    For Pug markup.
+    Pug is a variant of Scaml, see:
+    http://scalate.fusesource.org/documentation/scaml-reference.html
+
+    .. versionadded:: 1.4
+    """
+
+    name = 'Pug'
+    aliases = ['pug', 'jade']
+    filenames = ['*.pug', '*.jade']
+    mimetypes = ['text/x-pug', 'text/x-jade']
+
+    flags = re.IGNORECASE
+    _dot = r'.'
+
+    tokens = {
+        'root': [
+            (r'[ \t]*\n', Text),
+            (r'[ \t]*', _indentation),
+        ],
+
+        'css': [
+            (r'\.[\w:-]+', Name.Class, 'tag'),
+            (r'\#[\w:-]+', Name.Function, 'tag'),
+        ],
+
+        'eval-or-plain': [
+            (r'[&!]?==', Punctuation, 'plain'),
+            (r'([&!]?[=~])(' + _dot + r'*\n)',
+             bygroups(Punctuation, using(ScalaLexer)),  'root'),
+            default('plain'),
+        ],
+
+        'content': [
+            include('css'),
+            (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
+            (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
+             bygroups(Comment, Comment.Special, Comment),
+             '#pop'),
+            (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
+             '#pop'),
+            (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
+                                                  'scaml-comment-block'), '#pop'),
+            (r'(-@\s*)(import)?(' + _dot + r'*\n)',
+             bygroups(Punctuation, Keyword, using(ScalaLexer)),
+             '#pop'),
+            (r'(-)(' + _dot + r'*\n)',
+             bygroups(Punctuation, using(ScalaLexer)),
+             '#pop'),
+            (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
+             '#pop'),
+            (r'[\w:-]+', Name.Tag, 'tag'),
+            (r'\|', Text, 'eval-or-plain'),
+        ],
+
+        'tag': [
+            include('css'),
+            (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
+            (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
+            (r'\(', Text, 'html-attributes'),
+            (r'/[ \t]*\n', Punctuation, '#pop:2'),
+            (r'[<>]{1,2}(?=[ \t=])', Punctuation),
+            include('eval-or-plain'),
+        ],
+
+        'plain': [
+            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
+            (r'(#\{)(' + _dot + r'*?)(\})',
+             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
+            (r'\n', Text, 'root'),
+        ],
+
+        'html-attributes': [
+            (r'\s+', Text),
+            (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
+            (r'[\w:-]+', Name.Attribute),
+            (r'\)', Text, '#pop'),
+        ],
+
+        'html-attribute-value': [
+            (r'[ \t]+', Text),
+            (r'\w+', Name.Variable, '#pop'),
+            (r'@\w+', Name.Variable.Instance, '#pop'),
+            (r'\$\w+', Name.Variable.Global, '#pop'),
+            (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
+            (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
+        ],
+
+        'html-comment-block': [
+            (_dot + '+', Comment),
+            (r'\n', Text, 'root'),
+        ],
+
+        'scaml-comment-block': [
+            (_dot + '+', Comment.Preproc),
+            (r'\n', Text, 'root'),
+        ],
+
+        'filter-block': [
+            (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
+            (r'(#\{)(' + _dot + r'*?)(\})',
+             bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
+            (r'\n', Text, 'root'),
+        ],
+    }
+JadeLexer = PugLexer  # compat

eric ide

mercurial