eric: comparison eric6/ThirdParty/Pygments/pygments/lexers/mime.py

-:bf5f777260a6
+:21b0534faebc
+# -*- coding: utf-8 -*-
+"""
+pygments.lexers.mime
+~~~~~~~~~~~~~~~~~~~~
+Lexer for Multipurpose Internet Mail Extensions (MIME) data.
+:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
+:license: BSD, see LICENSE for details.
+"""
+import re
+from pygments.lexer import RegexLexer, include
+from pygments.lexers import get_lexer_for_mimetype
+from pygments.token import Text, Name, String, Operator, Comment, Other
+from pygments.util import get_int_opt, ClassNotFound
+__all__ = ["MIMELexer"]
+class MIMELexer(RegexLexer):
+"""
+Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is
+designed to process the nested mulitpart data.
+It assumes that the given data contains both header and body (and is
+splitted by empty line). If no valid header is found, then the entire data
+would be treated as body.
+Additional options accepted:
+`MIME-max-level`
+Max recurssion level for nested MIME structure. Any negative number
+would treated as unlimited. (default: -1)
+`Content-Type`
+Treat the data as specific content type. Useful when header is
+missing, or this lexer would try to parse from header. (default:
+`text/plain`)
+`Multipart-Boundary`
+Set the default multipart boundary delimiter. This option is only used
+when `Content-Type` is `multipart` and header is missing. This lexer
+would try to parse from header by default. (default: None)
+`Content-Transfer-Encoding`
+Treat the data as specific encoding. Or this lexer would try to parse
+from header by default. (default: None)
+.. versionadded:: 2.5
+"""
+name = "MIME"
+aliases = ["mime"]
+mimetypes = ["multipart/mixed",
+"multipart/related",
+"multipart/alternative"]
+def __init__(self, **options):
+super(MIMELexer, self).__init__(**options)
+self.boundary = options.get("Multipart-Boundary")
+self.content_transfer_encoding = options.get("Content_Transfer_Encoding")
+self.content_type = options.get("Content_Type", "text/plain")
+self.max_nested_level = get_int_opt(options, "MIME-max-level", -1)
+def analyse_text(text):
+try:
+header, body = text.strip().split("\n\n", 1)
+if not body.strip():
+return 0.1
+invalid_headers = MIMELexer.tokens["header"].sub("", header)
+if invalid_headers.strip():
+return 0.1
+else:
+return 1
+except ValueError:
+return 0.1
+def get_header_tokens(self, match):
+field = match.group(1)
+if field.lower() in self.attention_headers:
+yield match.start(1), Name.Tag, field + ":"
+yield match.start(2), Text.Whitespace, match.group(2)
+pos = match.end(2)
+body = match.group(3)
+for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())):
+yield pos + i, t, v
+else:
+yield match.start(), Comment, match.group()
+def get_body_tokens(self, match):
+pos_body_start = match.start()
+entire_body = match.group()
+# skip first newline
+if entire_body[0] == '\n':
+yield pos_body_start, Text.Whitespace, u'\n'
+pos_body_start = pos_body_start + 1
+entire_body = entire_body[1:]
+# if it is not a mulitpart
+if not self.content_type.startswith("multipart") or not self.boundary:
+for i, t, v in self.get_bodypart_tokens(entire_body):
+yield pos_body_start + i, t, v
+return
+# find boundary
+bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary)
+bdry_matcher = re.compile(bdry_pattern, re.MULTILINE)
+# some data has prefix text before first boundary
+m = bdry_matcher.search(entire_body)
+if m:
+pos_part_start = pos_body_start + m.end()
+pos_iter_start = lpos_end = m.end()
+yield pos_body_start, Text, entire_body[:m.start()]
+yield pos_body_start + lpos_end, String.Delimiter, m.group()
+else:
+pos_part_start = pos_body_start
+pos_iter_start = 0
+# process tokens of each body part
+for m in bdry_matcher.finditer(entire_body, pos_iter_start):
+# bodypart
+lpos_start = pos_part_start - pos_body_start
+lpos_end = m.start()
+part = entire_body[lpos_start:lpos_end]
+for i, t, v in self.get_bodypart_tokens(part):
+yield pos_part_start + i, t, v
+# boundary
+yield pos_body_start + lpos_end, String.Delimiter, m.group()
+pos_part_start = pos_body_start + m.end()
+# some data has suffix text after last boundary
+lpos_start = pos_part_start - pos_body_start
+if lpos_start != len(entire_body):
+yield pos_part_start, Text, entire_body[lpos_start:]
+def get_bodypart_tokens(self, text):
+# return if:
+#  * no content
+#  * no content type specific
+#  * content encoding is not readable
+#  * max recurrsion exceed
+if not text.strip() or not self.content_type:
+return [(0, Other, text)]
+cte = self.content_transfer_encoding
+if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
+return [(0, Other, text)]
+if self.max_nested_level == 0:
+return [(0, Other, text)]
+# get lexer
+try:
+lexer = get_lexer_for_mimetype(self.content_type)
+except ClassNotFound:
+return [(0, Other, text)]
+if isinstance(lexer, type(self)):
+lexer.max_nested_level = self.max_nested_level - 1
+return lexer.get_tokens_unprocessed(text)
+def store_content_type(self, match):
+self.content_type = match.group(1)
+prefix_len = match.start(1) - match.start(0)
+yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len]
+yield match.start(1), Name.Label, match.group(2)
+yield match.end(2), String.Delimiter, u"/"
+yield match.start(3), Name.Label, match.group(3)
+def get_content_type_subtokens(self, match):
+yield match.start(1), Text, match.group(1)
+yield match.start(2), Text.Whitespace, match.group(2)
+yield match.start(3), Name.Attribute, match.group(3)
+yield match.start(4), Operator, match.group(4)
+yield match.start(5), String, match.group(5)
+if match.group(3).lower() == "boundary":
+boundary = match.group(5).strip()
+if boundary[0] == '"' and boundary[-1] == '"':
+boundary = boundary[1:-1]
+self.boundary = boundary
+def store_content_transfer_encoding(self, match):
+self.content_transfer_encoding = match.group(0).lower()
+yield match.start(0), Name.Constant, match.group(0)
+attention_headers = {"content-type", "content-transfer-encoding"}
+tokens = {
+"root": [
+(r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens),
+(r"^$[\s\S]+", get_body_tokens),
+],
+"header": [
+# folding
+(r"\n[ \t]", Text.Whitespace),
+(r"\n(?![ \t])", Text.Whitespace, "#pop"),
+],
+"content-type": [
+include("header"),
+(
+r"^\s*((multipart|application|audio|font|image|model|text|video"
+r"|message)/([\w-]+))",
+store_content_type,
+),
+(r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))',
+get_content_type_subtokens),
+(r';[ \t]*\n(?![ \t])', Text, '#pop'),
+],
+"content-transfer-encoding": [
+include("header"),
+(r"([\w-]+)", store_content_transfer_encoding),
+],
+}

Mercurial Repositories > eric / file comparison

comparison: eric6/ThirdParty/Pygments/pygments/lexers/mime.py

eric6/ThirdParty/Pygments/pygments/lexers/mime.py