eric6/ThirdParty/Pygments/pygments/lexers/html.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.html
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for HTML, XML and related markup.
7
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15 default, using
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Punctuation
18 from pygments.util import looks_like_xml, html_doctype_matches
19
20 from pygments.lexers.javascript import JavascriptLexer
21 from pygments.lexers.jvm import ScalaLexer
22 from pygments.lexers.css import CssLexer, _indentation, _starts_block
23 from pygments.lexers.ruby import RubyLexer
24
25 __all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
26 'ScamlLexer', 'PugLexer']
27
28
29 class HtmlLexer(RegexLexer):
30 """
31 For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
32 by the appropriate lexer.
33 """
34
35 name = 'HTML'
36 aliases = ['html']
37 filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
38 mimetypes = ['text/html', 'application/xhtml+xml']
39
40 flags = re.IGNORECASE | re.DOTALL
41 tokens = {
42 'root': [
43 ('[^<&]+', Text),
44 (r'&\S*?;', Name.Entity),
45 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
46 ('<!--', Comment, 'comment'),
47 (r'<\?.*?\?>', Comment.Preproc),
48 ('<![^>]*>', Comment.Preproc),
49 (r'(<)(\s*)(script)(\s*)',
50 bygroups(Punctuation, Text, Name.Tag, Text),
51 ('script-content', 'tag')),
52 (r'(<)(\s*)(style)(\s*)',
53 bygroups(Punctuation, Text, Name.Tag, Text),
54 ('style-content', 'tag')),
55 # note: this allows tag names not used in HTML like <x:with-dash>,
56 # this is to support yet-unknown template engines and the like
57 (r'(<)(\s*)([\w:.-]+)',
58 bygroups(Punctuation, Text, Name.Tag), 'tag'),
59 (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
60 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
61 Punctuation)),
62 ],
63 'comment': [
64 ('[^-]+', Comment),
65 ('-->', Comment, '#pop'),
66 ('-', Comment),
67 ],
68 'tag': [
69 (r'\s+', Text),
70 (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
71 'attr'),
72 (r'[\w:-]+', Name.Attribute),
73 (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
74 ],
75 'script-content': [
76 (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
77 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
78 Punctuation), '#pop'),
79 (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
80 # fallback cases for when there is no closing script tag
81 # first look for newline and then go back into root state
82 # if that fails just read the rest of the file
83 # this is similar to the error handling logic in lexer.py
84 (r'.+?\n', using(JavascriptLexer), '#pop'),
85 (r'.+', using(JavascriptLexer), '#pop'),
86 ],
87 'style-content': [
88 (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
89 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
90 Punctuation),'#pop'),
91 (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
92 # fallback cases for when there is no closing style tag
93 # first look for newline and then go back into root state
94 # if that fails just read the rest of the file
95 # this is similar to the error handling logic in lexer.py
96 (r'.+?\n', using(CssLexer), '#pop'),
97 (r'.+', using(CssLexer), '#pop'),
98 ],
99 'attr': [
100 ('".*?"', String, '#pop'),
101 ("'.*?'", String, '#pop'),
102 (r'[^\s>]+', String, '#pop'),
103 ],
104 }
105
106 def analyse_text(text):
107 if html_doctype_matches(text):
108 return 0.5
109
110
111 class DtdLexer(RegexLexer):
112 """
113 A lexer for DTDs (Document Type Definitions).
114
115 .. versionadded:: 1.5
116 """
117
118 flags = re.MULTILINE | re.DOTALL
119
120 name = 'DTD'
121 aliases = ['dtd']
122 filenames = ['*.dtd']
123 mimetypes = ['application/xml-dtd']
124
125 tokens = {
126 'root': [
127 include('common'),
128
129 (r'(<!ELEMENT)(\s+)(\S+)',
130 bygroups(Keyword, Text, Name.Tag), 'element'),
131 (r'(<!ATTLIST)(\s+)(\S+)',
132 bygroups(Keyword, Text, Name.Tag), 'attlist'),
133 (r'(<!ENTITY)(\s+)(\S+)',
134 bygroups(Keyword, Text, Name.Entity), 'entity'),
135 (r'(<!NOTATION)(\s+)(\S+)',
136 bygroups(Keyword, Text, Name.Tag), 'notation'),
137 (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections
138 bygroups(Keyword, Name.Entity, Text, Keyword)),
139
140 (r'(<!DOCTYPE)(\s+)([^>\s]+)',
141 bygroups(Keyword, Text, Name.Tag)),
142 (r'PUBLIC|SYSTEM', Keyword.Constant),
143 (r'[\[\]>]', Keyword),
144 ],
145
146 'common': [
147 (r'\s+', Text),
148 (r'(%|&)[^;]*;', Name.Entity),
149 ('<!--', Comment, 'comment'),
150 (r'[(|)*,?+]', Operator),
151 (r'"[^"]*"', String.Double),
152 (r'\'[^\']*\'', String.Single),
153 ],
154
155 'comment': [
156 ('[^-]+', Comment),
157 ('-->', Comment, '#pop'),
158 ('-', Comment),
159 ],
160
161 'element': [
162 include('common'),
163 (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
164 (r'[^>\s|()?+*,]+', Name.Tag),
165 (r'>', Keyword, '#pop'),
166 ],
167
168 'attlist': [
169 include('common'),
170 (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
171 Keyword.Constant),
172 (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
173 (r'xml:space|xml:lang', Keyword.Reserved),
174 (r'[^>\s|()?+*,]+', Name.Attribute),
175 (r'>', Keyword, '#pop'),
176 ],
177
178 'entity': [
179 include('common'),
180 (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
181 (r'[^>\s|()?+*,]+', Name.Entity),
182 (r'>', Keyword, '#pop'),
183 ],
184
185 'notation': [
186 include('common'),
187 (r'SYSTEM|PUBLIC', Keyword.Constant),
188 (r'[^>\s|()?+*,]+', Name.Attribute),
189 (r'>', Keyword, '#pop'),
190 ],
191 }
192
193 def analyse_text(text):
194 if not looks_like_xml(text) and \
195 ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
196 return 0.8
197
198
199 class XmlLexer(RegexLexer):
200 """
201 Generic lexer for XML (eXtensible Markup Language).
202 """
203
204 flags = re.MULTILINE | re.DOTALL | re.UNICODE
205
206 name = 'XML'
207 aliases = ['xml']
208 filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
209 '*.wsdl', '*.wsf']
210 mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
211 'application/rss+xml', 'application/atom+xml']
212
213 tokens = {
214 'root': [
215 ('[^<&]+', Text),
216 (r'&\S*?;', Name.Entity),
217 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
218 ('<!--', Comment, 'comment'),
219 (r'<\?.*?\?>', Comment.Preproc),
220 ('<![^>]*>', Comment.Preproc),
221 (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
222 (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
223 ],
224 'comment': [
225 ('[^-]+', Comment),
226 ('-->', Comment, '#pop'),
227 ('-', Comment),
228 ],
229 'tag': [
230 (r'\s+', Text),
231 (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
232 (r'/?\s*>', Name.Tag, '#pop'),
233 ],
234 'attr': [
235 (r'\s+', Text),
236 ('".*?"', String, '#pop'),
237 ("'.*?'", String, '#pop'),
238 (r'[^\s>]+', String, '#pop'),
239 ],
240 }
241
242 def analyse_text(text):
243 if looks_like_xml(text):
244 return 0.45 # less than HTML
245
246
247 class XsltLexer(XmlLexer):
248 """
249 A lexer for XSLT.
250
251 .. versionadded:: 0.10
252 """
253
254 name = 'XSLT'
255 aliases = ['xslt']
256 filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc
257 mimetypes = ['application/xsl+xml', 'application/xslt+xml']
258
259 EXTRA_KEYWORDS = {
260 'apply-imports', 'apply-templates', 'attribute',
261 'attribute-set', 'call-template', 'choose', 'comment',
262 'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
263 'for-each', 'if', 'import', 'include', 'key', 'message',
264 'namespace-alias', 'number', 'otherwise', 'output', 'param',
265 'preserve-space', 'processing-instruction', 'sort',
266 'strip-space', 'stylesheet', 'template', 'text', 'transform',
267 'value-of', 'variable', 'when', 'with-param'
268 }
269
270 def get_tokens_unprocessed(self, text):
271 for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
272 m = re.match('</?xsl:([^>]*)/?>?', value)
273
274 if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
275 yield index, Keyword, value
276 else:
277 yield index, token, value
278
279 def analyse_text(text):
280 if looks_like_xml(text) and '<xsl' in text:
281 return 0.8
282
283
284 class HamlLexer(ExtendedRegexLexer):
285 """
286 For Haml markup.
287
288 .. versionadded:: 1.3
289 """
290
291 name = 'Haml'
292 aliases = ['haml']
293 filenames = ['*.haml']
294 mimetypes = ['text/x-haml']
295
296 flags = re.IGNORECASE
297 # Haml can include " |\n" anywhere,
298 # which is ignored and used to wrap long lines.
299 # To accomodate this, use this custom faux dot instead.
300 _dot = r'(?: \|\n(?=.* \|)|.)'
301
302 # In certain places, a comma at the end of the line
303 # allows line wrapping as well.
304 _comma_dot = r'(?:,\s*\n|' + _dot + ')'
305 tokens = {
306 'root': [
307 (r'[ \t]*\n', Text),
308 (r'[ \t]*', _indentation),
309 ],
310
311 'css': [
312 (r'\.[\w:-]+', Name.Class, 'tag'),
313 (r'\#[\w:-]+', Name.Function, 'tag'),
314 ],
315
316 'eval-or-plain': [
317 (r'[&!]?==', Punctuation, 'plain'),
318 (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
319 bygroups(Punctuation, using(RubyLexer)),
320 'root'),
321 default('plain'),
322 ],
323
324 'content': [
325 include('css'),
326 (r'%[\w:-]+', Name.Tag, 'tag'),
327 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
328 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
329 bygroups(Comment, Comment.Special, Comment),
330 '#pop'),
331 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
332 '#pop'),
333 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
334 'haml-comment-block'), '#pop'),
335 (r'(-)(' + _comma_dot + r'*\n)',
336 bygroups(Punctuation, using(RubyLexer)),
337 '#pop'),
338 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
339 '#pop'),
340 include('eval-or-plain'),
341 ],
342
343 'tag': [
344 include('css'),
345 (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
346 (r'\[' + _dot + r'*?\]', using(RubyLexer)),
347 (r'\(', Text, 'html-attributes'),
348 (r'/[ \t]*\n', Punctuation, '#pop:2'),
349 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
350 include('eval-or-plain'),
351 ],
352
353 'plain': [
354 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
355 (r'(#\{)(' + _dot + r'*?)(\})',
356 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
357 (r'\n', Text, 'root'),
358 ],
359
360 'html-attributes': [
361 (r'\s+', Text),
362 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
363 (r'[\w:-]+', Name.Attribute),
364 (r'\)', Text, '#pop'),
365 ],
366
367 'html-attribute-value': [
368 (r'[ \t]+', Text),
369 (r'\w+', Name.Variable, '#pop'),
370 (r'@\w+', Name.Variable.Instance, '#pop'),
371 (r'\$\w+', Name.Variable.Global, '#pop'),
372 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
373 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
374 ],
375
376 'html-comment-block': [
377 (_dot + '+', Comment),
378 (r'\n', Text, 'root'),
379 ],
380
381 'haml-comment-block': [
382 (_dot + '+', Comment.Preproc),
383 (r'\n', Text, 'root'),
384 ],
385
386 'filter-block': [
387 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
388 (r'(#\{)(' + _dot + r'*?)(\})',
389 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
390 (r'\n', Text, 'root'),
391 ],
392 }
393
394
395 class ScamlLexer(ExtendedRegexLexer):
396 """
397 For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala.
398
399 .. versionadded:: 1.4
400 """
401
402 name = 'Scaml'
403 aliases = ['scaml']
404 filenames = ['*.scaml']
405 mimetypes = ['text/x-scaml']
406
407 flags = re.IGNORECASE
408 # Scaml does not yet support the " |\n" notation to
409 # wrap long lines. Once it does, use the custom faux
410 # dot instead.
411 # _dot = r'(?: \|\n(?=.* \|)|.)'
412 _dot = r'.'
413
414 tokens = {
415 'root': [
416 (r'[ \t]*\n', Text),
417 (r'[ \t]*', _indentation),
418 ],
419
420 'css': [
421 (r'\.[\w:-]+', Name.Class, 'tag'),
422 (r'\#[\w:-]+', Name.Function, 'tag'),
423 ],
424
425 'eval-or-plain': [
426 (r'[&!]?==', Punctuation, 'plain'),
427 (r'([&!]?[=~])(' + _dot + r'*\n)',
428 bygroups(Punctuation, using(ScalaLexer)),
429 'root'),
430 default('plain'),
431 ],
432
433 'content': [
434 include('css'),
435 (r'%[\w:-]+', Name.Tag, 'tag'),
436 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
437 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
438 bygroups(Comment, Comment.Special, Comment),
439 '#pop'),
440 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
441 '#pop'),
442 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
443 'scaml-comment-block'), '#pop'),
444 (r'(-@\s*)(import)?(' + _dot + r'*\n)',
445 bygroups(Punctuation, Keyword, using(ScalaLexer)),
446 '#pop'),
447 (r'(-)(' + _dot + r'*\n)',
448 bygroups(Punctuation, using(ScalaLexer)),
449 '#pop'),
450 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
451 '#pop'),
452 include('eval-or-plain'),
453 ],
454
455 'tag': [
456 include('css'),
457 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
458 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
459 (r'\(', Text, 'html-attributes'),
460 (r'/[ \t]*\n', Punctuation, '#pop:2'),
461 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
462 include('eval-or-plain'),
463 ],
464
465 'plain': [
466 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
467 (r'(#\{)(' + _dot + r'*?)(\})',
468 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
469 (r'\n', Text, 'root'),
470 ],
471
472 'html-attributes': [
473 (r'\s+', Text),
474 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
475 (r'[\w:-]+', Name.Attribute),
476 (r'\)', Text, '#pop'),
477 ],
478
479 'html-attribute-value': [
480 (r'[ \t]+', Text),
481 (r'\w+', Name.Variable, '#pop'),
482 (r'@\w+', Name.Variable.Instance, '#pop'),
483 (r'\$\w+', Name.Variable.Global, '#pop'),
484 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
485 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
486 ],
487
488 'html-comment-block': [
489 (_dot + '+', Comment),
490 (r'\n', Text, 'root'),
491 ],
492
493 'scaml-comment-block': [
494 (_dot + '+', Comment.Preproc),
495 (r'\n', Text, 'root'),
496 ],
497
498 'filter-block': [
499 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
500 (r'(#\{)(' + _dot + r'*?)(\})',
501 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
502 (r'\n', Text, 'root'),
503 ],
504 }
505
506
507 class PugLexer(ExtendedRegexLexer):
508 """
509 For Pug markup.
510 Pug is a variant of Scaml, see:
511 http://scalate.fusesource.org/documentation/scaml-reference.html
512
513 .. versionadded:: 1.4
514 """
515
516 name = 'Pug'
517 aliases = ['pug', 'jade']
518 filenames = ['*.pug', '*.jade']
519 mimetypes = ['text/x-pug', 'text/x-jade']
520
521 flags = re.IGNORECASE
522 _dot = r'.'
523
524 tokens = {
525 'root': [
526 (r'[ \t]*\n', Text),
527 (r'[ \t]*', _indentation),
528 ],
529
530 'css': [
531 (r'\.[\w:-]+', Name.Class, 'tag'),
532 (r'\#[\w:-]+', Name.Function, 'tag'),
533 ],
534
535 'eval-or-plain': [
536 (r'[&!]?==', Punctuation, 'plain'),
537 (r'([&!]?[=~])(' + _dot + r'*\n)',
538 bygroups(Punctuation, using(ScalaLexer)), 'root'),
539 default('plain'),
540 ],
541
542 'content': [
543 include('css'),
544 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
545 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
546 bygroups(Comment, Comment.Special, Comment),
547 '#pop'),
548 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
549 '#pop'),
550 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
551 'scaml-comment-block'), '#pop'),
552 (r'(-@\s*)(import)?(' + _dot + r'*\n)',
553 bygroups(Punctuation, Keyword, using(ScalaLexer)),
554 '#pop'),
555 (r'(-)(' + _dot + r'*\n)',
556 bygroups(Punctuation, using(ScalaLexer)),
557 '#pop'),
558 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
559 '#pop'),
560 (r'[\w:-]+', Name.Tag, 'tag'),
561 (r'\|', Text, 'eval-or-plain'),
562 ],
563
564 'tag': [
565 include('css'),
566 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
567 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
568 (r'\(', Text, 'html-attributes'),
569 (r'/[ \t]*\n', Punctuation, '#pop:2'),
570 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
571 include('eval-or-plain'),
572 ],
573
574 'plain': [
575 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
576 (r'(#\{)(' + _dot + r'*?)(\})',
577 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
578 (r'\n', Text, 'root'),
579 ],
580
581 'html-attributes': [
582 (r'\s+', Text),
583 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
584 (r'[\w:-]+', Name.Attribute),
585 (r'\)', Text, '#pop'),
586 ],
587
588 'html-attribute-value': [
589 (r'[ \t]+', Text),
590 (r'\w+', Name.Variable, '#pop'),
591 (r'@\w+', Name.Variable.Instance, '#pop'),
592 (r'\$\w+', Name.Variable.Global, '#pop'),
593 (r"'(\\\\|\\[^\\]|[^'\\\n])*'", String, '#pop'),
594 (r'"(\\\\|\\[^\\]|[^"\\\n])*"', String, '#pop'),
595 ],
596
597 'html-comment-block': [
598 (_dot + '+', Comment),
599 (r'\n', Text, 'root'),
600 ],
601
602 'scaml-comment-block': [
603 (_dot + '+', Comment.Preproc),
604 (r'\n', Text, 'root'),
605 ],
606
607 'filter-block': [
608 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
609 (r'(#\{)(' + _dot + r'*?)(\})',
610 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
611 (r'\n', Text, 'root'),
612 ],
613 }
614 JadeLexer = PugLexer # compat

eric ide

mercurial