--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ThirdParty/Pygments/pygments/lexers/html.py Wed Mar 11 18:32:27 2015 +0100 @@ -0,0 +1,589 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.html + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for HTML, XML and related markup. + + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ + default, using +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Punctuation +from pygments.util import looks_like_xml, html_doctype_matches + +from pygments.lexers.javascript import JavascriptLexer +from pygments.lexers.jvm import ScalaLexer +from pygments.lexers.css import CssLexer, _indentation, _starts_block +from pygments.lexers.ruby import RubyLexer + +__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer', + 'ScamlLexer', 'JadeLexer'] + + +class HtmlLexer(RegexLexer): + """ + For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted + by the appropriate lexer. + """ + + name = 'HTML' + aliases = ['html'] + filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt'] + mimetypes = ['text/html', 'application/xhtml+xml'] + + flags = re.IGNORECASE | re.DOTALL + tokens = { + 'root': [ + ('[^<&]+', Text), + (r'&\S*?;', Name.Entity), + (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), + ('<!--', Comment, 'comment'), + (r'<\?.*?\?>', Comment.Preproc), + ('<![^>]*>', Comment.Preproc), + (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')), + (r'<\s*style\s*', Name.Tag, ('style-content', 'tag')), + # note: this allows tag names not used in HTML like <x:with-dash>, + # this is to support yet-unknown template engines and the like + (r'<\s*[\w:.-]+', Name.Tag, 'tag'), + (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), + ], + 'comment': [ + ('[^-]+', Comment), + ('-->', Comment, '#pop'), + ('-', Comment), + ], + 'tag': [ + (r'\s+', Text), + (r'([\w:-]+\s*=)(\s*)', bygroups(Name.Attribute, Text), 'attr'), + (r'[\w:-]+', Name.Attribute), + (r'/?\s*>', Name.Tag, '#pop'), + ], + 'script-content': [ + (r'<\s*/\s*script\s*>', Name.Tag, '#pop'), + (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), + ], + 'style-content': [ + (r'<\s*/\s*style\s*>', Name.Tag, '#pop'), + (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), + ], + 'attr': [ + ('".*?"', String, '#pop'), + ("'.*?'", String, '#pop'), + (r'[^\s>]+', String, '#pop'), + ], + } + + def analyse_text(text): + if html_doctype_matches(text): + return 0.5 + + +class DtdLexer(RegexLexer): + """ + A lexer for DTDs (Document Type Definitions). + + .. versionadded:: 1.5 + """ + + flags = re.MULTILINE | re.DOTALL + + name = 'DTD' + aliases = ['dtd'] + filenames = ['*.dtd'] + mimetypes = ['application/xml-dtd'] + + tokens = { + 'root': [ + include('common'), + + (r'(<!ELEMENT)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Tag), 'element'), + (r'(<!ATTLIST)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Tag), 'attlist'), + (r'(<!ENTITY)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Entity), 'entity'), + (r'(<!NOTATION)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Tag), 'notation'), + (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections + bygroups(Keyword, Name.Entity, Text, Keyword)), + + (r'(<!DOCTYPE)(\s+)([^>\s]+)', + bygroups(Keyword, Text, Name.Tag)), + (r'PUBLIC|SYSTEM', Keyword.Constant), + (r'[\[\]>]', Keyword), + ], + + 'common': [ + (r'\s+', Text), + (r'(%|&)[^;]*;', Name.Entity), + ('<!--', Comment, 'comment'), + (r'[(|)*,?+]', Operator), + (r'"[^"]*"', String.Double), + (r'\'[^\']*\'', String.Single), + ], + + 'comment': [ + ('[^-]+', Comment), + ('-->', Comment, '#pop'), + ('-', Comment), + ], + + 'element': [ + include('common'), + (r'EMPTY|ANY|#PCDATA', Keyword.Constant), + (r'[^>\s|()?+*,]+', Name.Tag), + (r'>', Keyword, '#pop'), + ], + + 'attlist': [ + include('common'), + (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION', + Keyword.Constant), + (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant), + (r'xml:space|xml:lang', Keyword.Reserved), + (r'[^>\s|()?+*,]+', Name.Attribute), + (r'>', Keyword, '#pop'), + ], + + 'entity': [ + include('common'), + (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant), + (r'[^>\s|()?+*,]+', Name.Entity), + (r'>', Keyword, '#pop'), + ], + + 'notation': [ + include('common'), + (r'SYSTEM|PUBLIC', Keyword.Constant), + (r'[^>\s|()?+*,]+', Name.Attribute), + (r'>', Keyword, '#pop'), + ], + } + + def analyse_text(text): + if not looks_like_xml(text) and \ + ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text): + return 0.8 + + +class XmlLexer(RegexLexer): + """ + Generic lexer for XML (eXtensible Markup Language). + """ + + flags = re.MULTILINE | re.DOTALL | re.UNICODE + + name = 'XML' + aliases = ['xml'] + filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd', + '*.wsdl', '*.wsf'] + mimetypes = ['text/xml', 'application/xml', 'image/svg+xml', + 'application/rss+xml', 'application/atom+xml'] + + tokens = { + 'root': [ + ('[^<&]+', Text), + (r'&\S*?;', Name.Entity), + (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), + ('<!--', Comment, 'comment'), + (r'<\?.*?\?>', Comment.Preproc), + ('<![^>]*>', Comment.Preproc), + (r'<\s*[\w:.-]+', Name.Tag, 'tag'), + (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), + ], + 'comment': [ + ('[^-]+', Comment), + ('-->', Comment, '#pop'), + ('-', Comment), + ], + 'tag': [ + (r'\s+', Text), + (r'[\w.:-]+\s*=', Name.Attribute, 'attr'), + (r'/?\s*>', Name.Tag, '#pop'), + ], + 'attr': [ + ('\s+', Text), + ('".*?"', String, '#pop'), + ("'.*?'", String, '#pop'), + (r'[^\s>]+', String, '#pop'), + ], + } + + def analyse_text(text): + if looks_like_xml(text): + return 0.45 # less than HTML + + +class XsltLexer(XmlLexer): + """ + A lexer for XSLT. + + .. versionadded:: 0.10 + """ + + name = 'XSLT' + aliases = ['xslt'] + filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc + mimetypes = ['application/xsl+xml', 'application/xslt+xml'] + + EXTRA_KEYWORDS = set(( + 'apply-imports', 'apply-templates', 'attribute', + 'attribute-set', 'call-template', 'choose', 'comment', + 'copy', 'copy-of', 'decimal-format', 'element', 'fallback', + 'for-each', 'if', 'import', 'include', 'key', 'message', + 'namespace-alias', 'number', 'otherwise', 'output', 'param', + 'preserve-space', 'processing-instruction', 'sort', + 'strip-space', 'stylesheet', 'template', 'text', 'transform', + 'value-of', 'variable', 'when', 'with-param' + )) + + def get_tokens_unprocessed(self, text): + for index, token, value in XmlLexer.get_tokens_unprocessed(self, text): + m = re.match('</?xsl:([^>]*)/?>?', value) + + if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS: + yield index, Keyword, value + else: + yield index, token, value + + def analyse_text(text): + if looks_like_xml(text) and '<xsl' in text: + return 0.8 + + +class HamlLexer(ExtendedRegexLexer): + """ + For Haml markup. + + .. versionadded:: 1.3 + """ + + name = 'Haml' + aliases = ['haml'] + filenames = ['*.haml'] + mimetypes = ['text/x-haml'] + + flags = re.IGNORECASE + # Haml can include " |\n" anywhere, + # which is ignored and used to wrap long lines. + # To accomodate this, use this custom faux dot instead. + _dot = r'(?: \|\n(?=.* \|)|.)' + + # In certain places, a comma at the end of the line + # allows line wrapping as well. + _comma_dot = r'(?:,\s*\n|' + _dot + ')' + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _comma_dot + r'*\n)', + bygroups(Punctuation, using(RubyLexer)), + 'root'), + default('plain'), + ], + + 'content': [ + include('css'), + (r'%[\w:-]+', Name.Tag, 'tag'), + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, + 'haml-comment-block'), '#pop'), + (r'(-)(' + _comma_dot + r'*\n)', + bygroups(Punctuation, using(RubyLexer)), + '#pop'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + include('eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + ')*?\}', using(RubyLexer)), + (r'\[' + _dot + '*?\]', using(RubyLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(RubyLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'haml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(RubyLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + } + + +class ScamlLexer(ExtendedRegexLexer): + """ + For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala. + + .. versionadded:: 1.4 + """ + + name = 'Scaml' + aliases = ['scaml'] + filenames = ['*.scaml'] + mimetypes = ['text/x-scaml'] + + flags = re.IGNORECASE + # Scaml does not yet support the " |\n" notation to + # wrap long lines. Once it does, use the custom faux + # dot instead. + # _dot = r'(?: \|\n(?=.* \|)|.)' + _dot = r'.' + + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), + 'root'), + default('plain'), + ], + + 'content': [ + include('css'), + (r'%[\w:-]+', Name.Tag, 'tag'), + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, + 'scaml-comment-block'), '#pop'), + (r'(-@\s*)(import)?(' + _dot + r'*\n)', + bygroups(Punctuation, Keyword, using(ScalaLexer)), + '#pop'), + (r'(-)(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), + '#pop'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + include('eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + ')*?\}', using(ScalaLexer)), + (r'\[' + _dot + '*?\]', using(ScalaLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'scaml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + } + + +class JadeLexer(ExtendedRegexLexer): + """ + For Jade markup. + Jade is a variant of Scaml, see: + http://scalate.fusesource.org/documentation/scaml-reference.html + + .. versionadded:: 1.4 + """ + + name = 'Jade' + aliases = ['jade'] + filenames = ['*.jade'] + mimetypes = ['text/x-jade'] + + flags = re.IGNORECASE + _dot = r'.' + + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), 'root'), + default('plain'), + ], + + 'content': [ + include('css'), + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + '*?\])(' + _dot + r'*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, + 'scaml-comment-block'), '#pop'), + (r'(-@\s*)(import)?(' + _dot + r'*\n)', + bygroups(Punctuation, Keyword, using(ScalaLexer)), + '#pop'), + (r'(-)(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), + '#pop'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + (r'[\w:-]+', Name.Tag, 'tag'), + (r'\|', Text, 'eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + ')*?\}', using(ScalaLexer)), + (r'\[' + _dot + '*?\]', using(ScalaLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'scaml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + '*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + }