eric: comparison eric6/ThirdParty/Pygments/pygments/lexers/html.py

-:28146736bbfc
+:82b608e352ec
-# -*- coding: utf-8 -*-
-"""
-pygments.lexers.html
-~~~~~~~~~~~~~~~~~~~~
-Lexers for HTML, XML and related markup.
-:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
-:license: BSD, see LICENSE for details.
-"""
-import re
-from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
-default, using
-from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-Punctuation
-from pygments.util import looks_like_xml, html_doctype_matches
-from pygments.lexers.javascript import JavascriptLexer
-from pygments.lexers.jvm import ScalaLexer
-from pygments.lexers.css import CssLexer, _indentation, _starts_block
-from pygments.lexers.ruby import RubyLexer
-__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
-'ScamlLexer', 'PugLexer']
-class HtmlLexer(RegexLexer):
-"""
-For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
-by the appropriate lexer.
-"""
-name = 'HTML'
-aliases = ['html']
-filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
-mimetypes = ['text/html', 'application/xhtml+xml']
-flags = re.IGNORECASE | re.DOTALL
-tokens = {
-'root': [
-('[^<&]+', Text),
-(r'&\S*?;', Name.Entity),
-(r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
-('<!--', Comment, 'comment'),
-(r'<\?.*?\?>', Comment.Preproc),
-('<![^>]*>', Comment.Preproc),
-(r'(<)(\s*)(script)(\s*)',
-bygroups(Punctuation, Text, Name.Tag, Text),
-('script-content', 'tag')),
-(r'(<)(\s*)(style)(\s*)',
-bygroups(Punctuation, Text, Name.Tag, Text),
-('style-content', 'tag')),
-# note: this allows tag names not used in HTML like <x:with-dash>,
-# this is to support yet-unknown template engines and the like
-(r'(<)(\s*)([\w:.-]+)',
-bygroups(Punctuation, Text, Name.Tag), 'tag'),
-(r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
-bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
-Punctuation)),
-],
-'comment': [
-('[^-]+', Comment),
-('-->', Comment, '#pop'),
-('-', Comment),
-],
-'tag': [
-(r'\s+', Text),
-(r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
-'attr'),
-(r'[\w:-]+', Name.Attribute),
-(r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
-],
-'script-content': [
-(r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
-bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
-Punctuation), '#pop'),
-(r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
-# fallback cases for when there is no closing script tag
-# first look for newline and then go back into root state
-# if that fails just read the rest of the file
-# this is similar to the error handling logic in lexer.py
-(r'.+?\n', using(JavascriptLexer), '#pop'),
-(r'.+', using(JavascriptLexer), '#pop'),
-],
-'style-content': [
-(r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
-bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
-Punctuation),'#pop'),
-(r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
-# fallback cases for when there is no closing style tag
-# first look for newline and then go back into root state
-# if that fails just read the rest of the file
-# this is similar to the error handling logic in lexer.py
-(r'.+?\n', using(CssLexer), '#pop'),
-(r'.+', using(CssLexer), '#pop'),
-],
-'attr': [
-('".*?"', String, '#pop'),
-("'.*?'", String, '#pop'),
-(r'[^\s>]+', String, '#pop'),
-],
-}
-def analyse_text(text):
-if html_doctype_matches(text):
-return 0.5
-class DtdLexer(RegexLexer):
-"""
-A lexer for DTDs (Document Type Definitions).
-.. versionadded:: 1.5
-"""
-flags = re.MULTILINE | re.DOTALL
-name = 'DTD'
-aliases = ['dtd']
-filenames = ['*.dtd']
-mimetypes = ['application/xml-dtd']
-tokens = {
-'root': [
-include('common'),
-(r'(<!ELEMENT)(\s+)(\S+)',
-bygroups(Keyword, Text, Name.Tag), 'element'),
-(r'(<!ATTLIST)(\s+)(\S+)',
-bygroups(Keyword, Text, Name.Tag), 'attlist'),
-(r'(<!ENTITY)(\s+)(\S+)',
-bygroups(Keyword, Text, Name.Entity), 'entity'),
-(r'(<!NOTATION)(\s+)(\S+)',
-bygroups(Keyword, Text, Name.Tag), 'notation'),
-(r'(<!\[)([^\[\s]+)(\s*)(\[)',  # conditional sections
-bygroups(Keyword, Name.Entity, Text, Keyword)),
-(r'(<!DOCTYPE)(\s+)([^>\s]+)',
-bygroups(Keyword, Text, Name.Tag)),
-(r'PUBLIC|SYSTEM', Keyword.Constant),
-(r'[\[\]>]', Keyword),
-],
-'common': [
-(r'\s+', Text),
-(r'(%|&)[^;]*;', Name.Entity),
-('<!--', Comment, 'comment'),
-(r'[(|)*,?+]', Operator),
-(r'"[^"]*"', String.Double),
-(r'\'[^\']*\'', String.Single),
-],
-'comment': [
-('[^-]+', Comment),
-('-->', Comment, '#pop'),
-('-', Comment),
-],
-'element': [
-include('common'),
-(r'EMPTY|ANY|#PCDATA', Keyword.Constant),
-(r'[^>\s|()?+*,]+', Name.Tag),
-(r'>', Keyword, '#pop'),
-],
-'attlist': [
-include('common'),
-(r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
-Keyword.Constant),
-(r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
-(r'xml:space|xml:lang', Keyword.Reserved),
-(r'[^>\s|()?+*,]+', Name.Attribute),
-(r'>', Keyword, '#pop'),
-],
-'entity': [
-include('common'),
-(r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
-(r'[^>\s|()?+*,]+', Name.Entity),
-(r'>', Keyword, '#pop'),
-],
-'notation': [
-include('common'),
-(r'SYSTEM|PUBLIC', Keyword.Constant),
-(r'[^>\s|()?+*,]+', Name.Attribute),
-(r'>', Keyword, '#pop'),
-],
-}
-def analyse_text(text):
-if not looks_like_xml(text) and \
-('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
-return 0.8
-class XmlLexer(RegexLexer):
-"""
-Generic lexer for XML (eXtensible Markup Language).
-"""
-flags = re.MULTILINE | re.DOTALL | re.UNICODE
-name = 'XML'
-aliases = ['xml']
-filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
-'*.wsdl', '*.wsf']
-mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
-'application/rss+xml', 'application/atom+xml']
-tokens = {
-'root': [
-('[^<&]+', Text),
-(r'&\S*?;', Name.Entity),
-(r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
-('<!--', Comment, 'comment'),
-(r'<\?.*?\?>', Comment.Preproc),
-('<![^>]*>', Comment.Preproc),
-(r'<\s*[\w:.-]+', Name.Tag, 'tag'),
-(r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
-],
-'comment': [
-('[^-]+', Comment),
-('-->', Comment, '#pop'),
-('-', Comment),
-],
-'tag': [
-(r'\s+', Text),
-(r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
-(r'/?\s*>', Name.Tag, '#pop'),
-],
-'attr': [
-(r'\s+', Text),
-('".*?"', String, '#pop'),
-("'.*?'", String, '#pop'),
-(r'[^\s>]+', String, '#pop'),
-],
-}
-def analyse_text(text):
-if looks_like_xml(text):
-return 0.45  # less than HTML
-class XsltLexer(XmlLexer):
-"""
-A lexer for XSLT.
-.. versionadded:: 0.10
-"""
-name = 'XSLT'
-aliases = ['xslt']
-filenames = ['*.xsl', '*.xslt', '*.xpl']  # xpl is XProc
-mimetypes = ['application/xsl+xml', 'application/xslt+xml']
-EXTRA_KEYWORDS = {
-'apply-imports', 'apply-templates', 'attribute',
-'attribute-set', 'call-template', 'choose', 'comment',
-'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
-'for-each', 'if', 'import', 'include', 'key', 'message',
-'namespace-alias', 'number', 'otherwise', 'output', 'param',
-'preserve-space', 'processing-instruction', 'sort',
-'strip-space', 'stylesheet', 'template', 'text', 'transform',
-'value-of', 'variable', 'when', 'with-param'
-}
-def get_tokens_unprocessed(self, text):
-for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
-m = re.match('</?xsl:([^>]*)/?>?', value)
-if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
-yield index, Keyword, value
-else:
-yield index, token, value
-def analyse_text(text):
-if looks_like_xml(text) and '<xsl' in text:
-return 0.8
-class HamlLexer(ExtendedRegexLexer):
-"""
-For Haml markup.
-.. versionadded:: 1.3
-"""
-name = 'Haml'
-aliases = ['haml']
-filenames = ['*.haml']
-mimetypes = ['text/x-haml']
-flags = re.IGNORECASE
-# Haml can include " |\n" anywhere,
-# which is ignored and used to wrap long lines.
-# To accomodate this, use this custom faux dot instead.
-_dot = r'(?: \|\n(?=.* \|)|.)'
-# In certain places, a comma at the end of the line
-# allows line wrapping as well.
-_comma_dot = r'(?:,\s*\n|' + _dot + ')'
-tokens = {
-'root': [
-(r'[ \t]*\n', Text),
-(r'[ \t]*', _indentation),
-],
-'css': [
-(r'\.[\w:-]+', Name.Class, 'tag'),
-(r'\#[\w:-]+', Name.Function, 'tag'),
-],
-'eval-or-plain': [
-(r'[&!]?==', Punctuation, 'plain'),
-(r'([&!]?[=~])(' + _comma_dot + r'*\n)',
-bygroups(Punctuation, using(RubyLexer)),
-'root'),
-default('plain'),
-],
-'content': [
-include('css'),
-(r'%[\w:-]+', Name.Tag, 'tag'),
-(r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
-(r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
-bygroups(Comment, Comment.Special, Comment),
-'#pop'),
-(r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
-'#pop'),
-(r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
-'haml-comment-block'), '#pop'),
-(r'(-)(' + _comma_dot + r'*\n)',
-bygroups(Punctuation, using(RubyLexer)),
-'#pop'),
-(r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
-'#pop'),
-include('eval-or-plain'),
-],
-'tag': [
-include('css'),
-(r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
-(r'\[' + _dot + r'*?\]', using(RubyLexer)),
-(r'\(', Text, 'html-attributes'),
-(r'/[ \t]*\n', Punctuation, '#pop:2'),
-(r'[<>]{1,2}(?=[ \t=])', Punctuation),
-include('eval-or-plain'),
-],
-'plain': [
-(r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
-(r'(#\{)(' + _dot + r'*?)(\})',
-bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
-(r'\n', Text, 'root'),
-],
-'html-attributes': [
-(r'\s+', Text),
-(r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
-(r'[\w:-]+', Name.Attribute),
-(r'\)', Text, '#pop'),
-],
-'html-attribute-value': [
-(r'[ \t]+', Text),
-(r'\w+', Name.Variable, '#pop'),
-(r'@\w+', Name.Variable.Instance, '#pop'),
-(r'\$\w+', Name.Variable.Global, '#pop'),
-(r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
-(r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
-],
-'html-comment-block': [
-(_dot + '+', Comment),
-(r'\n', Text, 'root'),
-],
-'haml-comment-block': [
-(_dot + '+', Comment.Preproc),
-(r'\n', Text, 'root'),
-],
-'filter-block': [
-(r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
-(r'(#\{)(' + _dot + r'*?)(\})',
-bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
-(r'\n', Text, 'root'),
-],
-}
-class ScamlLexer(ExtendedRegexLexer):
-"""
-For `Scaml markup <http://scalate.fusesource.org/>`_.  Scaml is Haml for Scala.
-.. versionadded:: 1.4
-"""
-name = 'Scaml'
-aliases = ['scaml']
-filenames = ['*.scaml']
-mimetypes = ['text/x-scaml']
-flags = re.IGNORECASE
-# Scaml does not yet support the " |\n" notation to
-# wrap long lines.  Once it does, use the custom faux
-# dot instead.
-# _dot = r'(?: \|\n(?=.* \|)|.)'
-_dot = r'.'
-tokens = {
-'root': [
-(r'[ \t]*\n', Text),
-(r'[ \t]*', _indentation),
-],
-'css': [
-(r'\.[\w:-]+', Name.Class, 'tag'),
-(r'\#[\w:-]+', Name.Function, 'tag'),
-],
-'eval-or-plain': [
-(r'[&!]?==', Punctuation, 'plain'),
-(r'([&!]?[=~])(' + _dot + r'*\n)',
-bygroups(Punctuation, using(ScalaLexer)),
-'root'),
-default('plain'),
-],
-'content': [
-include('css'),
-(r'%[\w:-]+', Name.Tag, 'tag'),
-(r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
-(r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
-bygroups(Comment, Comment.Special, Comment),
-'#pop'),
-(r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
-'#pop'),
-(r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
-'scaml-comment-block'), '#pop'),
-(r'(-@\s*)(import)?(' + _dot + r'*\n)',
-bygroups(Punctuation, Keyword, using(ScalaLexer)),
-'#pop'),
-(r'(-)(' + _dot + r'*\n)',
-bygroups(Punctuation, using(ScalaLexer)),
-'#pop'),
-(r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
-'#pop'),
-include('eval-or-plain'),
-],
-'tag': [
-include('css'),
-(r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
-(r'\[' + _dot + r'*?\]', using(ScalaLexer)),
-(r'\(', Text, 'html-attributes'),
-(r'/[ \t]*\n', Punctuation, '#pop:2'),
-(r'[<>]{1,2}(?=[ \t=])', Punctuation),
-include('eval-or-plain'),
-],
-'plain': [
-(r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
-(r'(#\{)(' + _dot + r'*?)(\})',
-bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-(r'\n', Text, 'root'),
-],
-'html-attributes': [
-(r'\s+', Text),
-(r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
-(r'[\w:-]+', Name.Attribute),
-(r'\)', Text, '#pop'),
-],
-'html-attribute-value': [
-(r'[ \t]+', Text),
-(r'\w+', Name.Variable, '#pop'),
-(r'@\w+', Name.Variable.Instance, '#pop'),
-(r'\$\w+', Name.Variable.Global, '#pop'),
-(r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
-(r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
-],
-'html-comment-block': [
-(_dot + '+', Comment),
-(r'\n', Text, 'root'),
-],
-'scaml-comment-block': [
-(_dot + '+', Comment.Preproc),
-(r'\n', Text, 'root'),
-],
-'filter-block': [
-(r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
-(r'(#\{)(' + _dot + r'*?)(\})',
-bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-(r'\n', Text, 'root'),
-],
-}
-class PugLexer(ExtendedRegexLexer):
-"""
-For Pug markup.
-Pug is a variant of Scaml, see:
-http://scalate.fusesource.org/documentation/scaml-reference.html
-.. versionadded:: 1.4
-"""
-name = 'Pug'
-aliases = ['pug', 'jade']
-filenames = ['*.pug', '*.jade']
-mimetypes = ['text/x-pug', 'text/x-jade']
-flags = re.IGNORECASE
-_dot = r'.'
-tokens = {
-'root': [
-(r'[ \t]*\n', Text),
-(r'[ \t]*', _indentation),
-],
-'css': [
-(r'\.[\w:-]+', Name.Class, 'tag'),
-(r'\#[\w:-]+', Name.Function, 'tag'),
-],
-'eval-or-plain': [
-(r'[&!]?==', Punctuation, 'plain'),
-(r'([&!]?[=~])(' + _dot + r'*\n)',
-bygroups(Punctuation, using(ScalaLexer)),  'root'),
-default('plain'),
-],
-'content': [
-include('css'),
-(r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
-(r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
-bygroups(Comment, Comment.Special, Comment),
-'#pop'),
-(r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
-'#pop'),
-(r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
-'scaml-comment-block'), '#pop'),
-(r'(-@\s*)(import)?(' + _dot + r'*\n)',
-bygroups(Punctuation, Keyword, using(ScalaLexer)),
-'#pop'),
-(r'(-)(' + _dot + r'*\n)',
-bygroups(Punctuation, using(ScalaLexer)),
-'#pop'),
-(r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
-'#pop'),
-(r'[\w:-]+', Name.Tag, 'tag'),
-(r'\|', Text, 'eval-or-plain'),
-],
-'tag': [
-include('css'),
-(r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
-(r'\[' + _dot + r'*?\]', using(ScalaLexer)),
-(r'\(', Text, 'html-attributes'),
-(r'/[ \t]*\n', Punctuation, '#pop:2'),
-(r'[<>]{1,2}(?=[ \t=])', Punctuation),
-include('eval-or-plain'),
-],
-'plain': [
-(r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
-(r'(#\{)(' + _dot + r'*?)(\})',
-bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-(r'\n', Text, 'root'),
-],
-'html-attributes': [
-(r'\s+', Text),
-(r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
-(r'[\w:-]+', Name.Attribute),
-(r'\)', Text, '#pop'),
-],
-'html-attribute-value': [
-(r'[ \t]+', Text),
-(r'\w+', Name.Variable, '#pop'),
-(r'@\w+', Name.Variable.Instance, '#pop'),
-(r'\$\w+', Name.Variable.Global, '#pop'),
-(r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
-(r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
-],
-'html-comment-block': [
-(_dot + '+', Comment),
-(r'\n', Text, 'root'),
-],
-'scaml-comment-block': [
-(_dot + '+', Comment.Preproc),
-(r'\n', Text, 'root'),
-],
-'filter-block': [
-(r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
-(r'(#\{)(' + _dot + r'*?)(\})',
-bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
-(r'\n', Text, 'root'),
-],
-}
-JadeLexer = PugLexer  # compat

Mercurial Repositories > eric / file comparison

comparison: eric6/ThirdParty/Pygments/pygments/lexers/html.py

eric6/ThirdParty/Pygments/pygments/lexers/html.py