diff -r a3cf077a8db3 -r 25f42e208e08 eric6/ThirdParty/Pygments/pygments/lexers/mime.py --- a/eric6/ThirdParty/Pygments/pygments/lexers/mime.py Tue Sep 15 18:46:58 2020 +0200 +++ b/eric6/ThirdParty/Pygments/pygments/lexers/mime.py Tue Sep 15 19:09:05 2020 +0200 @@ -1,226 +1,226 @@ -# -*- coding: utf-8 -*- -""" - pygments.lexers.mime - ~~~~~~~~~~~~~~~~~~~~ - - Lexer for Multipurpose Internet Mail Extensions (MIME) data. - - :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re - -from pygments.lexer import RegexLexer, include -from pygments.lexers import get_lexer_for_mimetype -from pygments.token import Text, Name, String, Operator, Comment, Other -from pygments.util import get_int_opt, ClassNotFound - -__all__ = ["MIMELexer"] - - -class MIMELexer(RegexLexer): - """ - Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is - designed to process the nested mulitpart data. - - It assumes that the given data contains both header and body (and is - splitted by empty line). If no valid header is found, then the entire data - would be treated as body. - - Additional options accepted: - - `MIME-max-level` - Max recurssion level for nested MIME structure. Any negative number - would treated as unlimited. (default: -1) - - `Content-Type` - Treat the data as specific content type. Useful when header is - missing, or this lexer would try to parse from header. (default: - `text/plain`) - - `Multipart-Boundary` - Set the default multipart boundary delimiter. This option is only used - when `Content-Type` is `multipart` and header is missing. This lexer - would try to parse from header by default. (default: None) - - `Content-Transfer-Encoding` - Treat the data as specific encoding. Or this lexer would try to parse - from header by default. (default: None) - - .. versionadded:: 2.5 - """ - - name = "MIME" - aliases = ["mime"] - mimetypes = ["multipart/mixed", - "multipart/related", - "multipart/alternative"] - - def __init__(self, **options): - super(MIMELexer, self).__init__(**options) - self.boundary = options.get("Multipart-Boundary") - self.content_transfer_encoding = options.get("Content_Transfer_Encoding") - self.content_type = options.get("Content_Type", "text/plain") - self.max_nested_level = get_int_opt(options, "MIME-max-level", -1) - - def analyse_text(text): - try: - header, body = text.strip().split("\n\n", 1) - if not body.strip(): - return 0.1 - - invalid_headers = MIMELexer.tokens["header"].sub("", header) - if invalid_headers.strip(): - return 0.1 - else: - return 1 - - except ValueError: - return 0.1 - - def get_header_tokens(self, match): - field = match.group(1) - - if field.lower() in self.attention_headers: - yield match.start(1), Name.Tag, field + ":" - yield match.start(2), Text.Whitespace, match.group(2) - - pos = match.end(2) - body = match.group(3) - for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())): - yield pos + i, t, v - - else: - yield match.start(), Comment, match.group() - - def get_body_tokens(self, match): - pos_body_start = match.start() - entire_body = match.group() - - # skip first newline - if entire_body[0] == '\n': - yield pos_body_start, Text.Whitespace, u'\n' - pos_body_start = pos_body_start + 1 - entire_body = entire_body[1:] - - # if it is not a mulitpart - if not self.content_type.startswith("multipart") or not self.boundary: - for i, t, v in self.get_bodypart_tokens(entire_body): - yield pos_body_start + i, t, v - return - - # find boundary - bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary) - bdry_matcher = re.compile(bdry_pattern, re.MULTILINE) - - # some data has prefix text before first boundary - m = bdry_matcher.search(entire_body) - if m: - pos_part_start = pos_body_start + m.end() - pos_iter_start = lpos_end = m.end() - yield pos_body_start, Text, entire_body[:m.start()] - yield pos_body_start + lpos_end, String.Delimiter, m.group() - else: - pos_part_start = pos_body_start - pos_iter_start = 0 - - # process tokens of each body part - for m in bdry_matcher.finditer(entire_body, pos_iter_start): - # bodypart - lpos_start = pos_part_start - pos_body_start - lpos_end = m.start() - part = entire_body[lpos_start:lpos_end] - for i, t, v in self.get_bodypart_tokens(part): - yield pos_part_start + i, t, v - - # boundary - yield pos_body_start + lpos_end, String.Delimiter, m.group() - pos_part_start = pos_body_start + m.end() - - # some data has suffix text after last boundary - lpos_start = pos_part_start - pos_body_start - if lpos_start != len(entire_body): - yield pos_part_start, Text, entire_body[lpos_start:] - - def get_bodypart_tokens(self, text): - # return if: - # * no content - # * no content type specific - # * content encoding is not readable - # * max recurrsion exceed - if not text.strip() or not self.content_type: - return [(0, Other, text)] - - cte = self.content_transfer_encoding - if cte and cte not in {"8bit", "7bit", "quoted-printable"}: - return [(0, Other, text)] - - if self.max_nested_level == 0: - return [(0, Other, text)] - - # get lexer - try: - lexer = get_lexer_for_mimetype(self.content_type) - except ClassNotFound: - return [(0, Other, text)] - - if isinstance(lexer, type(self)): - lexer.max_nested_level = self.max_nested_level - 1 - - return lexer.get_tokens_unprocessed(text) - - def store_content_type(self, match): - self.content_type = match.group(1) - - prefix_len = match.start(1) - match.start(0) - yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len] - yield match.start(1), Name.Label, match.group(2) - yield match.end(2), String.Delimiter, u"/" - yield match.start(3), Name.Label, match.group(3) - - def get_content_type_subtokens(self, match): - yield match.start(1), Text, match.group(1) - yield match.start(2), Text.Whitespace, match.group(2) - yield match.start(3), Name.Attribute, match.group(3) - yield match.start(4), Operator, match.group(4) - yield match.start(5), String, match.group(5) - - if match.group(3).lower() == "boundary": - boundary = match.group(5).strip() - if boundary[0] == '"' and boundary[-1] == '"': - boundary = boundary[1:-1] - self.boundary = boundary - - def store_content_transfer_encoding(self, match): - self.content_transfer_encoding = match.group(0).lower() - yield match.start(0), Name.Constant, match.group(0) - - attention_headers = {"content-type", "content-transfer-encoding"} - - tokens = { - "root": [ - (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens), - (r"^$[\s\S]+", get_body_tokens), - ], - "header": [ - # folding - (r"\n[ \t]", Text.Whitespace), - (r"\n(?![ \t])", Text.Whitespace, "#pop"), - ], - "content-type": [ - include("header"), - ( - r"^\s*((multipart|application|audio|font|image|model|text|video" - r"|message)/([\w-]+))", - store_content_type, - ), - (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))', - get_content_type_subtokens), - (r';[ \t]*\n(?![ \t])', Text, '#pop'), - ], - "content-transfer-encoding": [ - include("header"), - (r"([\w-]+)", store_content_transfer_encoding), - ], - } +# -*- coding: utf-8 -*- +""" + pygments.lexers.mime + ~~~~~~~~~~~~~~~~~~~~ + + Lexer for Multipurpose Internet Mail Extensions (MIME) data. + + :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include +from pygments.lexers import get_lexer_for_mimetype +from pygments.token import Text, Name, String, Operator, Comment, Other +from pygments.util import get_int_opt, ClassNotFound + +__all__ = ["MIMELexer"] + + +class MIMELexer(RegexLexer): + """ + Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is + designed to process the nested mulitpart data. + + It assumes that the given data contains both header and body (and is + splitted by empty line). If no valid header is found, then the entire data + would be treated as body. + + Additional options accepted: + + `MIME-max-level` + Max recurssion level for nested MIME structure. Any negative number + would treated as unlimited. (default: -1) + + `Content-Type` + Treat the data as specific content type. Useful when header is + missing, or this lexer would try to parse from header. (default: + `text/plain`) + + `Multipart-Boundary` + Set the default multipart boundary delimiter. This option is only used + when `Content-Type` is `multipart` and header is missing. This lexer + would try to parse from header by default. (default: None) + + `Content-Transfer-Encoding` + Treat the data as specific encoding. Or this lexer would try to parse + from header by default. (default: None) + + .. versionadded:: 2.5 + """ + + name = "MIME" + aliases = ["mime"] + mimetypes = ["multipart/mixed", + "multipart/related", + "multipart/alternative"] + + def __init__(self, **options): + super().__init__(**options) + self.boundary = options.get("Multipart-Boundary") + self.content_transfer_encoding = options.get("Content_Transfer_Encoding") + self.content_type = options.get("Content_Type", "text/plain") + self.max_nested_level = get_int_opt(options, "MIME-max-level", -1) + + def analyse_text(text): + try: + header, body = text.strip().split("\n\n", 1) + if not body.strip(): + return 0.1 + + invalid_headers = MIMELexer.tokens["header"].sub("", header) + if invalid_headers.strip(): + return 0.1 + else: + return 1 + + except ValueError: + return 0.1 + + def get_header_tokens(self, match): + field = match.group(1) + + if field.lower() in self.attention_headers: + yield match.start(1), Name.Tag, field + ":" + yield match.start(2), Text.Whitespace, match.group(2) + + pos = match.end(2) + body = match.group(3) + for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())): + yield pos + i, t, v + + else: + yield match.start(), Comment, match.group() + + def get_body_tokens(self, match): + pos_body_start = match.start() + entire_body = match.group() + + # skip first newline + if entire_body[0] == '\n': + yield pos_body_start, Text.Whitespace, '\n' + pos_body_start = pos_body_start + 1 + entire_body = entire_body[1:] + + # if it is not a mulitpart + if not self.content_type.startswith("multipart") or not self.boundary: + for i, t, v in self.get_bodypart_tokens(entire_body): + yield pos_body_start + i, t, v + return + + # find boundary + bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary) + bdry_matcher = re.compile(bdry_pattern, re.MULTILINE) + + # some data has prefix text before first boundary + m = bdry_matcher.search(entire_body) + if m: + pos_part_start = pos_body_start + m.end() + pos_iter_start = lpos_end = m.end() + yield pos_body_start, Text, entire_body[:m.start()] + yield pos_body_start + lpos_end, String.Delimiter, m.group() + else: + pos_part_start = pos_body_start + pos_iter_start = 0 + + # process tokens of each body part + for m in bdry_matcher.finditer(entire_body, pos_iter_start): + # bodypart + lpos_start = pos_part_start - pos_body_start + lpos_end = m.start() + part = entire_body[lpos_start:lpos_end] + for i, t, v in self.get_bodypart_tokens(part): + yield pos_part_start + i, t, v + + # boundary + yield pos_body_start + lpos_end, String.Delimiter, m.group() + pos_part_start = pos_body_start + m.end() + + # some data has suffix text after last boundary + lpos_start = pos_part_start - pos_body_start + if lpos_start != len(entire_body): + yield pos_part_start, Text, entire_body[lpos_start:] + + def get_bodypart_tokens(self, text): + # return if: + # * no content + # * no content type specific + # * content encoding is not readable + # * max recurrsion exceed + if not text.strip() or not self.content_type: + return [(0, Other, text)] + + cte = self.content_transfer_encoding + if cte and cte not in {"8bit", "7bit", "quoted-printable"}: + return [(0, Other, text)] + + if self.max_nested_level == 0: + return [(0, Other, text)] + + # get lexer + try: + lexer = get_lexer_for_mimetype(self.content_type) + except ClassNotFound: + return [(0, Other, text)] + + if isinstance(lexer, type(self)): + lexer.max_nested_level = self.max_nested_level - 1 + + return lexer.get_tokens_unprocessed(text) + + def store_content_type(self, match): + self.content_type = match.group(1) + + prefix_len = match.start(1) - match.start(0) + yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len] + yield match.start(1), Name.Label, match.group(2) + yield match.end(2), String.Delimiter, '/' + yield match.start(3), Name.Label, match.group(3) + + def get_content_type_subtokens(self, match): + yield match.start(1), Text, match.group(1) + yield match.start(2), Text.Whitespace, match.group(2) + yield match.start(3), Name.Attribute, match.group(3) + yield match.start(4), Operator, match.group(4) + yield match.start(5), String, match.group(5) + + if match.group(3).lower() == "boundary": + boundary = match.group(5).strip() + if boundary[0] == '"' and boundary[-1] == '"': + boundary = boundary[1:-1] + self.boundary = boundary + + def store_content_transfer_encoding(self, match): + self.content_transfer_encoding = match.group(0).lower() + yield match.start(0), Name.Constant, match.group(0) + + attention_headers = {"content-type", "content-transfer-encoding"} + + tokens = { + "root": [ + (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens), + (r"^$[\s\S]+", get_body_tokens), + ], + "header": [ + # folding + (r"\n[ \t]", Text.Whitespace), + (r"\n(?![ \t])", Text.Whitespace, "#pop"), + ], + "content-type": [ + include("header"), + ( + r"^\s*((multipart|application|audio|font|image|model|text|video" + r"|message)/([\w-]+))", + store_content_type, + ), + (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))', + get_content_type_subtokens), + (r';[ \t]*\n(?![ \t])', Text, '#pop'), + ], + "content-transfer-encoding": [ + include("header"), + (r"([\w-]+)", store_content_transfer_encoding), + ], + }