3 pygments.lexers.webmisc |
3 pygments.lexers.webmisc |
4 ~~~~~~~~~~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~~~~~~~~~~ |
5 |
5 |
6 Lexers for misc. web stuff. |
6 Lexers for misc. web stuff. |
7 |
7 |
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 |
11 |
12 import re |
12 import re |
13 |
13 |
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ |
14 from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ |
15 default, using |
15 default, using |
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
17 Number, Punctuation, Literal |
17 Number, Punctuation, Literal |
18 from pygments.util import unirange |
|
19 |
18 |
20 from pygments.lexers.css import _indentation, _starts_block |
19 from pygments.lexers.css import _indentation, _starts_block |
21 from pygments.lexers.html import HtmlLexer |
20 from pygments.lexers.html import HtmlLexer |
22 from pygments.lexers.javascript import JavascriptLexer |
21 from pygments.lexers.javascript import JavascriptLexer |
23 from pygments.lexers.ruby import RubyLexer |
22 from pygments.lexers.ruby import RubyLexer |
72 |
71 |
73 xquery_parse_state = [] |
72 xquery_parse_state = [] |
74 |
73 |
75 # FIX UNICODE LATER |
74 # FIX UNICODE LATER |
76 # ncnamestartchar = ( |
75 # ncnamestartchar = ( |
77 # ur"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|" |
76 # r"[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|" |
78 # ur"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|" |
77 # r"[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|" |
79 # ur"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|" |
78 # r"[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|" |
80 # ur"[\u10000-\uEFFFF]" |
79 # r"[\u10000-\uEFFFF]" |
81 # ) |
80 # ) |
82 ncnamestartchar = r"(?:[A-Z]|_|[a-z])" |
81 ncnamestartchar = r"(?:[A-Z]|_|[a-z])" |
83 # FIX UNICODE LATER |
82 # FIX UNICODE LATER |
84 # ncnamechar = ncnamestartchar + (ur"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|" |
83 # ncnamechar = ncnamestartchar + (r"|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|" |
85 # ur"[\u203F-\u2040]") |
84 # r"[\u203F-\u2040]") |
86 ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])" |
85 ncnamechar = r"(?:" + ncnamestartchar + r"|-|\.|[0-9])" |
87 ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar) |
86 ncname = "(?:%s+%s*)" % (ncnamestartchar, ncnamechar) |
88 pitarget_namestartchar = r"(?:[A-KN-WYZ]|_|:|[a-kn-wyz])" |
87 pitarget_namestartchar = r"(?:[A-KN-WYZ]|_|:|[a-kn-wyz])" |
89 pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])" |
88 pitarget_namechar = r"(?:" + pitarget_namestartchar + r"|-|\.|[0-9])" |
90 pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar) |
89 pitarget = "%s+%s*" % (pitarget_namestartchar, pitarget_namechar) |
97 |
96 |
98 stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")' |
97 stringdouble = r'(?:"(?:' + entityref + r'|' + charref + r'|""|[^&"])*")' |
99 stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')" |
98 stringsingle = r"(?:'(?:" + entityref + r"|" + charref + r"|''|[^&'])*')" |
100 |
99 |
101 # FIX UNICODE LATER |
100 # FIX UNICODE LATER |
102 # elementcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' |
101 # elementcontentchar = (r'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' |
103 # ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') |
102 # r'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') |
104 elementcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' |
103 elementcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' |
105 # quotattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|' |
104 # quotattrcontentchar = (r'\t|\r|\n|[\u0020-\u0021]|[\u0023-\u0025]|' |
106 # ur'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]') |
105 # r'[\u0027-\u003b]|[\u003d-\u007a]|\u007c|[\u007e-\u007F]') |
107 quotattrcontentchar = r'[A-Za-z]|\s|\d|[!#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' |
106 quotattrcontentchar = r'[A-Za-z]|\s|\d|[!#$%()*+,\-./:;=?@\[\\\]^_\'`|~]' |
108 # aposattrcontentchar = (ur'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' |
107 # aposattrcontentchar = (r'\t|\r|\n|[\u0020-\u0025]|[\u0028-\u003b]|' |
109 # ur'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') |
108 # r'[\u003d-\u007a]|\u007c|[\u007e-\u007F]') |
110 aposattrcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_`|~]' |
109 aposattrcontentchar = r'[A-Za-z]|\s|\d|[!"#$%()*+,\-./:;=?@\[\\\]^_`|~]' |
111 |
110 |
112 # CHAR elements - fix the above elementcontentchar, quotattrcontentchar, |
111 # CHAR elements - fix the above elementcontentchar, quotattrcontentchar, |
113 # aposattrcontentchar |
112 # aposattrcontentchar |
114 # x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] |
113 # x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] |
516 (r'\?', Punctuation), |
515 (r'\?', Punctuation), |
517 ], |
516 ], |
518 'xml_comment': [ |
517 'xml_comment': [ |
519 (r'(-->)', popstate_xmlcomment_callback), |
518 (r'(-->)', popstate_xmlcomment_callback), |
520 (r'[^-]{1,2}', Literal), |
519 (r'[^-]{1,2}', Literal), |
521 (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + |
520 (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', |
522 unirange(0x10000, 0x10ffff), Literal), |
521 Literal), |
523 ], |
522 ], |
524 'processing_instruction': [ |
523 'processing_instruction': [ |
525 (r'\s+', Text, 'processing_instruction_content'), |
524 (r'\s+', Text, 'processing_instruction_content'), |
526 (r'\?>', String.Doc, '#pop'), |
525 (r'\?>', String.Doc, '#pop'), |
527 (pitarget, Name), |
526 (pitarget, Name), |
528 ], |
527 ], |
529 'processing_instruction_content': [ |
528 'processing_instruction_content': [ |
530 (r'\?>', String.Doc, '#pop'), |
529 (r'\?>', String.Doc, '#pop'), |
531 (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + |
530 (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', |
532 unirange(0x10000, 0x10ffff), Literal), |
531 Literal), |
533 ], |
532 ], |
534 'cdata_section': [ |
533 'cdata_section': [ |
535 (r']]>', String.Doc, '#pop'), |
534 (r']]>', String.Doc, '#pop'), |
536 (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + |
535 (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', |
537 unirange(0x10000, 0x10ffff), Literal), |
536 Literal), |
538 ], |
537 ], |
539 'start_tag': [ |
538 'start_tag': [ |
540 include('whitespace'), |
539 include('whitespace'), |
541 (r'(/>)', popstate_tag_callback), |
540 (r'(/>)', popstate_tag_callback), |
542 (r'>', Name.Tag, 'element_content'), |
541 (r'>', Name.Tag, 'element_content'), |
601 'pragma': [ |
600 'pragma': [ |
602 (qname, Name.Variable, 'pragmacontents'), |
601 (qname, Name.Variable, 'pragmacontents'), |
603 ], |
602 ], |
604 'pragmacontents': [ |
603 'pragmacontents': [ |
605 (r'#\)', Punctuation, 'operator'), |
604 (r'#\)', Punctuation, 'operator'), |
606 (u'\\t|\\r|\\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' + |
605 (r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|[\U00010000-\U0010FFFF]', |
607 unirange(0x10000, 0x10ffff), Literal), |
606 Literal), |
608 (r'(\s+)', Text), |
607 (r'(\s+)', Text), |
609 ], |
608 ], |
610 'occurrenceindicator': [ |
609 'occurrenceindicator': [ |
611 include('whitespace'), |
610 include('whitespace'), |
612 (r'\(:', Comment, 'comment'), |
611 (r'\(:', Comment, 'comment'), |