eric6/ThirdParty/Pygments/pygments/lexers/html.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.html
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for HTML, XML and related markup.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \
15 default, using
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Punctuation
18 from pygments.util import looks_like_xml, html_doctype_matches
19
20 from pygments.lexers.javascript import JavascriptLexer
21 from pygments.lexers.jvm import ScalaLexer
22 from pygments.lexers.css import CssLexer, _indentation, _starts_block
23 from pygments.lexers.ruby import RubyLexer
24
25 __all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer',
26 'ScamlLexer', 'PugLexer']
27
28
29 class HtmlLexer(RegexLexer):
30 """
31 For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted
32 by the appropriate lexer.
33 """
34
35 name = 'HTML'
36 aliases = ['html']
37 filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt']
38 mimetypes = ['text/html', 'application/xhtml+xml']
39
40 flags = re.IGNORECASE | re.DOTALL
41 tokens = {
42 'root': [
43 ('[^<&]+', Text),
44 (r'&\S*?;', Name.Entity),
45 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
46 ('<!--', Comment, 'comment'),
47 (r'<\?.*?\?>', Comment.Preproc),
48 ('<![^>]*>', Comment.Preproc),
49 (r'(<)(\s*)(script)(\s*)',
50 bygroups(Punctuation, Text, Name.Tag, Text),
51 ('script-content', 'tag')),
52 (r'(<)(\s*)(style)(\s*)',
53 bygroups(Punctuation, Text, Name.Tag, Text),
54 ('style-content', 'tag')),
55 # note: this allows tag names not used in HTML like <x:with-dash>,
56 # this is to support yet-unknown template engines and the like
57 (r'(<)(\s*)([\w:.-]+)',
58 bygroups(Punctuation, Text, Name.Tag), 'tag'),
59 (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)',
60 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
61 Punctuation)),
62 ],
63 'comment': [
64 ('[^-]+', Comment),
65 ('-->', Comment, '#pop'),
66 ('-', Comment),
67 ],
68 'tag': [
69 (r'\s+', Text),
70 (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text),
71 'attr'),
72 (r'[\w:-]+', Name.Attribute),
73 (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'),
74 ],
75 'script-content': [
76 (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)',
77 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
78 Punctuation), '#pop'),
79 (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)),
80 ],
81 'style-content': [
82 (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)',
83 bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text,
84 Punctuation),'#pop'),
85 (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)),
86 ],
87 'attr': [
88 ('".*?"', String, '#pop'),
89 ("'.*?'", String, '#pop'),
90 (r'[^\s>]+', String, '#pop'),
91 ],
92 }
93
94 def analyse_text(text):
95 if html_doctype_matches(text):
96 return 0.5
97
98
99 class DtdLexer(RegexLexer):
100 """
101 A lexer for DTDs (Document Type Definitions).
102
103 .. versionadded:: 1.5
104 """
105
106 flags = re.MULTILINE | re.DOTALL
107
108 name = 'DTD'
109 aliases = ['dtd']
110 filenames = ['*.dtd']
111 mimetypes = ['application/xml-dtd']
112
113 tokens = {
114 'root': [
115 include('common'),
116
117 (r'(<!ELEMENT)(\s+)(\S+)',
118 bygroups(Keyword, Text, Name.Tag), 'element'),
119 (r'(<!ATTLIST)(\s+)(\S+)',
120 bygroups(Keyword, Text, Name.Tag), 'attlist'),
121 (r'(<!ENTITY)(\s+)(\S+)',
122 bygroups(Keyword, Text, Name.Entity), 'entity'),
123 (r'(<!NOTATION)(\s+)(\S+)',
124 bygroups(Keyword, Text, Name.Tag), 'notation'),
125 (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections
126 bygroups(Keyword, Name.Entity, Text, Keyword)),
127
128 (r'(<!DOCTYPE)(\s+)([^>\s]+)',
129 bygroups(Keyword, Text, Name.Tag)),
130 (r'PUBLIC|SYSTEM', Keyword.Constant),
131 (r'[\[\]>]', Keyword),
132 ],
133
134 'common': [
135 (r'\s+', Text),
136 (r'(%|&)[^;]*;', Name.Entity),
137 ('<!--', Comment, 'comment'),
138 (r'[(|)*,?+]', Operator),
139 (r'"[^"]*"', String.Double),
140 (r'\'[^\']*\'', String.Single),
141 ],
142
143 'comment': [
144 ('[^-]+', Comment),
145 ('-->', Comment, '#pop'),
146 ('-', Comment),
147 ],
148
149 'element': [
150 include('common'),
151 (r'EMPTY|ANY|#PCDATA', Keyword.Constant),
152 (r'[^>\s|()?+*,]+', Name.Tag),
153 (r'>', Keyword, '#pop'),
154 ],
155
156 'attlist': [
157 include('common'),
158 (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION',
159 Keyword.Constant),
160 (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant),
161 (r'xml:space|xml:lang', Keyword.Reserved),
162 (r'[^>\s|()?+*,]+', Name.Attribute),
163 (r'>', Keyword, '#pop'),
164 ],
165
166 'entity': [
167 include('common'),
168 (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant),
169 (r'[^>\s|()?+*,]+', Name.Entity),
170 (r'>', Keyword, '#pop'),
171 ],
172
173 'notation': [
174 include('common'),
175 (r'SYSTEM|PUBLIC', Keyword.Constant),
176 (r'[^>\s|()?+*,]+', Name.Attribute),
177 (r'>', Keyword, '#pop'),
178 ],
179 }
180
181 def analyse_text(text):
182 if not looks_like_xml(text) and \
183 ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text):
184 return 0.8
185
186
187 class XmlLexer(RegexLexer):
188 """
189 Generic lexer for XML (eXtensible Markup Language).
190 """
191
192 flags = re.MULTILINE | re.DOTALL | re.UNICODE
193
194 name = 'XML'
195 aliases = ['xml']
196 filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd',
197 '*.wsdl', '*.wsf']
198 mimetypes = ['text/xml', 'application/xml', 'image/svg+xml',
199 'application/rss+xml', 'application/atom+xml']
200
201 tokens = {
202 'root': [
203 ('[^<&]+', Text),
204 (r'&\S*?;', Name.Entity),
205 (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc),
206 ('<!--', Comment, 'comment'),
207 (r'<\?.*?\?>', Comment.Preproc),
208 ('<![^>]*>', Comment.Preproc),
209 (r'<\s*[\w:.-]+', Name.Tag, 'tag'),
210 (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag),
211 ],
212 'comment': [
213 ('[^-]+', Comment),
214 ('-->', Comment, '#pop'),
215 ('-', Comment),
216 ],
217 'tag': [
218 (r'\s+', Text),
219 (r'[\w.:-]+\s*=', Name.Attribute, 'attr'),
220 (r'/?\s*>', Name.Tag, '#pop'),
221 ],
222 'attr': [
223 (r'\s+', Text),
224 ('".*?"', String, '#pop'),
225 ("'.*?'", String, '#pop'),
226 (r'[^\s>]+', String, '#pop'),
227 ],
228 }
229
230 def analyse_text(text):
231 if looks_like_xml(text):
232 return 0.45 # less than HTML
233
234
235 class XsltLexer(XmlLexer):
236 """
237 A lexer for XSLT.
238
239 .. versionadded:: 0.10
240 """
241
242 name = 'XSLT'
243 aliases = ['xslt']
244 filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc
245 mimetypes = ['application/xsl+xml', 'application/xslt+xml']
246
247 EXTRA_KEYWORDS = set((
248 'apply-imports', 'apply-templates', 'attribute',
249 'attribute-set', 'call-template', 'choose', 'comment',
250 'copy', 'copy-of', 'decimal-format', 'element', 'fallback',
251 'for-each', 'if', 'import', 'include', 'key', 'message',
252 'namespace-alias', 'number', 'otherwise', 'output', 'param',
253 'preserve-space', 'processing-instruction', 'sort',
254 'strip-space', 'stylesheet', 'template', 'text', 'transform',
255 'value-of', 'variable', 'when', 'with-param'
256 ))
257
258 def get_tokens_unprocessed(self, text):
259 for index, token, value in XmlLexer.get_tokens_unprocessed(self, text):
260 m = re.match('</?xsl:([^>]*)/?>?', value)
261
262 if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS:
263 yield index, Keyword, value
264 else:
265 yield index, token, value
266
267 def analyse_text(text):
268 if looks_like_xml(text) and '<xsl' in text:
269 return 0.8
270
271
272 class HamlLexer(ExtendedRegexLexer):
273 """
274 For Haml markup.
275
276 .. versionadded:: 1.3
277 """
278
279 name = 'Haml'
280 aliases = ['haml']
281 filenames = ['*.haml']
282 mimetypes = ['text/x-haml']
283
284 flags = re.IGNORECASE
285 # Haml can include " |\n" anywhere,
286 # which is ignored and used to wrap long lines.
287 # To accomodate this, use this custom faux dot instead.
288 _dot = r'(?: \|\n(?=.* \|)|.)'
289
290 # In certain places, a comma at the end of the line
291 # allows line wrapping as well.
292 _comma_dot = r'(?:,\s*\n|' + _dot + ')'
293 tokens = {
294 'root': [
295 (r'[ \t]*\n', Text),
296 (r'[ \t]*', _indentation),
297 ],
298
299 'css': [
300 (r'\.[\w:-]+', Name.Class, 'tag'),
301 (r'\#[\w:-]+', Name.Function, 'tag'),
302 ],
303
304 'eval-or-plain': [
305 (r'[&!]?==', Punctuation, 'plain'),
306 (r'([&!]?[=~])(' + _comma_dot + r'*\n)',
307 bygroups(Punctuation, using(RubyLexer)),
308 'root'),
309 default('plain'),
310 ],
311
312 'content': [
313 include('css'),
314 (r'%[\w:-]+', Name.Tag, 'tag'),
315 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
316 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
317 bygroups(Comment, Comment.Special, Comment),
318 '#pop'),
319 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
320 '#pop'),
321 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
322 'haml-comment-block'), '#pop'),
323 (r'(-)(' + _comma_dot + r'*\n)',
324 bygroups(Punctuation, using(RubyLexer)),
325 '#pop'),
326 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
327 '#pop'),
328 include('eval-or-plain'),
329 ],
330
331 'tag': [
332 include('css'),
333 (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)),
334 (r'\[' + _dot + r'*?\]', using(RubyLexer)),
335 (r'\(', Text, 'html-attributes'),
336 (r'/[ \t]*\n', Punctuation, '#pop:2'),
337 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
338 include('eval-or-plain'),
339 ],
340
341 'plain': [
342 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
343 (r'(#\{)(' + _dot + r'*?)(\})',
344 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
345 (r'\n', Text, 'root'),
346 ],
347
348 'html-attributes': [
349 (r'\s+', Text),
350 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
351 (r'[\w:-]+', Name.Attribute),
352 (r'\)', Text, '#pop'),
353 ],
354
355 'html-attribute-value': [
356 (r'[ \t]+', Text),
357 (r'\w+', Name.Variable, '#pop'),
358 (r'@\w+', Name.Variable.Instance, '#pop'),
359 (r'\$\w+', Name.Variable.Global, '#pop'),
360 (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
361 (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
362 ],
363
364 'html-comment-block': [
365 (_dot + '+', Comment),
366 (r'\n', Text, 'root'),
367 ],
368
369 'haml-comment-block': [
370 (_dot + '+', Comment.Preproc),
371 (r'\n', Text, 'root'),
372 ],
373
374 'filter-block': [
375 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
376 (r'(#\{)(' + _dot + r'*?)(\})',
377 bygroups(String.Interpol, using(RubyLexer), String.Interpol)),
378 (r'\n', Text, 'root'),
379 ],
380 }
381
382
383 class ScamlLexer(ExtendedRegexLexer):
384 """
385 For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala.
386
387 .. versionadded:: 1.4
388 """
389
390 name = 'Scaml'
391 aliases = ['scaml']
392 filenames = ['*.scaml']
393 mimetypes = ['text/x-scaml']
394
395 flags = re.IGNORECASE
396 # Scaml does not yet support the " |\n" notation to
397 # wrap long lines. Once it does, use the custom faux
398 # dot instead.
399 # _dot = r'(?: \|\n(?=.* \|)|.)'
400 _dot = r'.'
401
402 tokens = {
403 'root': [
404 (r'[ \t]*\n', Text),
405 (r'[ \t]*', _indentation),
406 ],
407
408 'css': [
409 (r'\.[\w:-]+', Name.Class, 'tag'),
410 (r'\#[\w:-]+', Name.Function, 'tag'),
411 ],
412
413 'eval-or-plain': [
414 (r'[&!]?==', Punctuation, 'plain'),
415 (r'([&!]?[=~])(' + _dot + r'*\n)',
416 bygroups(Punctuation, using(ScalaLexer)),
417 'root'),
418 default('plain'),
419 ],
420
421 'content': [
422 include('css'),
423 (r'%[\w:-]+', Name.Tag, 'tag'),
424 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
425 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
426 bygroups(Comment, Comment.Special, Comment),
427 '#pop'),
428 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
429 '#pop'),
430 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
431 'scaml-comment-block'), '#pop'),
432 (r'(-@\s*)(import)?(' + _dot + r'*\n)',
433 bygroups(Punctuation, Keyword, using(ScalaLexer)),
434 '#pop'),
435 (r'(-)(' + _dot + r'*\n)',
436 bygroups(Punctuation, using(ScalaLexer)),
437 '#pop'),
438 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
439 '#pop'),
440 include('eval-or-plain'),
441 ],
442
443 'tag': [
444 include('css'),
445 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
446 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
447 (r'\(', Text, 'html-attributes'),
448 (r'/[ \t]*\n', Punctuation, '#pop:2'),
449 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
450 include('eval-or-plain'),
451 ],
452
453 'plain': [
454 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
455 (r'(#\{)(' + _dot + r'*?)(\})',
456 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
457 (r'\n', Text, 'root'),
458 ],
459
460 'html-attributes': [
461 (r'\s+', Text),
462 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
463 (r'[\w:-]+', Name.Attribute),
464 (r'\)', Text, '#pop'),
465 ],
466
467 'html-attribute-value': [
468 (r'[ \t]+', Text),
469 (r'\w+', Name.Variable, '#pop'),
470 (r'@\w+', Name.Variable.Instance, '#pop'),
471 (r'\$\w+', Name.Variable.Global, '#pop'),
472 (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
473 (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
474 ],
475
476 'html-comment-block': [
477 (_dot + '+', Comment),
478 (r'\n', Text, 'root'),
479 ],
480
481 'scaml-comment-block': [
482 (_dot + '+', Comment.Preproc),
483 (r'\n', Text, 'root'),
484 ],
485
486 'filter-block': [
487 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
488 (r'(#\{)(' + _dot + r'*?)(\})',
489 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
490 (r'\n', Text, 'root'),
491 ],
492 }
493
494
495 class PugLexer(ExtendedRegexLexer):
496 """
497 For Pug markup.
498 Pug is a variant of Scaml, see:
499 http://scalate.fusesource.org/documentation/scaml-reference.html
500
501 .. versionadded:: 1.4
502 """
503
504 name = 'Pug'
505 aliases = ['pug', 'jade']
506 filenames = ['*.pug', '*.jade']
507 mimetypes = ['text/x-pug', 'text/x-jade']
508
509 flags = re.IGNORECASE
510 _dot = r'.'
511
512 tokens = {
513 'root': [
514 (r'[ \t]*\n', Text),
515 (r'[ \t]*', _indentation),
516 ],
517
518 'css': [
519 (r'\.[\w:-]+', Name.Class, 'tag'),
520 (r'\#[\w:-]+', Name.Function, 'tag'),
521 ],
522
523 'eval-or-plain': [
524 (r'[&!]?==', Punctuation, 'plain'),
525 (r'([&!]?[=~])(' + _dot + r'*\n)',
526 bygroups(Punctuation, using(ScalaLexer)), 'root'),
527 default('plain'),
528 ],
529
530 'content': [
531 include('css'),
532 (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'),
533 (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)',
534 bygroups(Comment, Comment.Special, Comment),
535 '#pop'),
536 (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'),
537 '#pop'),
538 (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc,
539 'scaml-comment-block'), '#pop'),
540 (r'(-@\s*)(import)?(' + _dot + r'*\n)',
541 bygroups(Punctuation, Keyword, using(ScalaLexer)),
542 '#pop'),
543 (r'(-)(' + _dot + r'*\n)',
544 bygroups(Punctuation, using(ScalaLexer)),
545 '#pop'),
546 (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'),
547 '#pop'),
548 (r'[\w:-]+', Name.Tag, 'tag'),
549 (r'\|', Text, 'eval-or-plain'),
550 ],
551
552 'tag': [
553 include('css'),
554 (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)),
555 (r'\[' + _dot + r'*?\]', using(ScalaLexer)),
556 (r'\(', Text, 'html-attributes'),
557 (r'/[ \t]*\n', Punctuation, '#pop:2'),
558 (r'[<>]{1,2}(?=[ \t=])', Punctuation),
559 include('eval-or-plain'),
560 ],
561
562 'plain': [
563 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text),
564 (r'(#\{)(' + _dot + r'*?)(\})',
565 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
566 (r'\n', Text, 'root'),
567 ],
568
569 'html-attributes': [
570 (r'\s+', Text),
571 (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'),
572 (r'[\w:-]+', Name.Attribute),
573 (r'\)', Text, '#pop'),
574 ],
575
576 'html-attribute-value': [
577 (r'[ \t]+', Text),
578 (r'\w+', Name.Variable, '#pop'),
579 (r'@\w+', Name.Variable.Instance, '#pop'),
580 (r'\$\w+', Name.Variable.Global, '#pop'),
581 (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'),
582 (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'),
583 ],
584
585 'html-comment-block': [
586 (_dot + '+', Comment),
587 (r'\n', Text, 'root'),
588 ],
589
590 'scaml-comment-block': [
591 (_dot + '+', Comment.Preproc),
592 (r'\n', Text, 'root'),
593 ],
594
595 'filter-block': [
596 (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator),
597 (r'(#\{)(' + _dot + r'*?)(\})',
598 bygroups(String.Interpol, using(ScalaLexer), String.Interpol)),
599 (r'\n', Text, 'root'),
600 ],
601 }
602 JadeLexer = PugLexer # compat

eric ide

mercurial