eric6/ThirdParty/Pygments/pygments/lexers/mime.py

Tue, 21 Apr 2020 19:47:10 +0200

author
Detlev Offenbach <detlev@die-offenbachs.de>
date
Tue, 21 Apr 2020 19:47:10 +0200
changeset 7547
21b0534faebc
child 7701
25f42e208e08
permissions
-rw-r--r--

Pygments: updated Pygments to 2.3.1

7547
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
1 # -*- coding: utf-8 -*-
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
2 """
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
3 pygments.lexers.mime
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
4 ~~~~~~~~~~~~~~~~~~~~
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
5
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
6 Lexer for Multipurpose Internet Mail Extensions (MIME) data.
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
7
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
8 :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
9 :license: BSD, see LICENSE for details.
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
10 """
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
11
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
12 import re
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
13
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
14 from pygments.lexer import RegexLexer, include
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
15 from pygments.lexers import get_lexer_for_mimetype
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
16 from pygments.token import Text, Name, String, Operator, Comment, Other
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
17 from pygments.util import get_int_opt, ClassNotFound
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
18
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
19 __all__ = ["MIMELexer"]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
20
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
21
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
22 class MIMELexer(RegexLexer):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
23 """
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
24 Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
25 designed to process the nested mulitpart data.
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
26
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
27 It assumes that the given data contains both header and body (and is
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
28 splitted by empty line). If no valid header is found, then the entire data
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
29 would be treated as body.
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
30
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
31 Additional options accepted:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
32
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
33 `MIME-max-level`
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
34 Max recurssion level for nested MIME structure. Any negative number
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
35 would treated as unlimited. (default: -1)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
36
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
37 `Content-Type`
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
38 Treat the data as specific content type. Useful when header is
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
39 missing, or this lexer would try to parse from header. (default:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
40 `text/plain`)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
41
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
42 `Multipart-Boundary`
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
43 Set the default multipart boundary delimiter. This option is only used
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
44 when `Content-Type` is `multipart` and header is missing. This lexer
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
45 would try to parse from header by default. (default: None)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
46
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
47 `Content-Transfer-Encoding`
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
48 Treat the data as specific encoding. Or this lexer would try to parse
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
49 from header by default. (default: None)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
50
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
51 .. versionadded:: 2.5
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
52 """
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
53
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
54 name = "MIME"
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
55 aliases = ["mime"]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
56 mimetypes = ["multipart/mixed",
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
57 "multipart/related",
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
58 "multipart/alternative"]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
59
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
60 def __init__(self, **options):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
61 super(MIMELexer, self).__init__(**options)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
62 self.boundary = options.get("Multipart-Boundary")
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
63 self.content_transfer_encoding = options.get("Content_Transfer_Encoding")
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
64 self.content_type = options.get("Content_Type", "text/plain")
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
65 self.max_nested_level = get_int_opt(options, "MIME-max-level", -1)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
66
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
67 def analyse_text(text):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
68 try:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
69 header, body = text.strip().split("\n\n", 1)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
70 if not body.strip():
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
71 return 0.1
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
72
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
73 invalid_headers = MIMELexer.tokens["header"].sub("", header)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
74 if invalid_headers.strip():
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
75 return 0.1
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
76 else:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
77 return 1
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
78
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
79 except ValueError:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
80 return 0.1
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
81
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
82 def get_header_tokens(self, match):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
83 field = match.group(1)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
84
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
85 if field.lower() in self.attention_headers:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
86 yield match.start(1), Name.Tag, field + ":"
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
87 yield match.start(2), Text.Whitespace, match.group(2)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
88
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
89 pos = match.end(2)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
90 body = match.group(3)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
91 for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
92 yield pos + i, t, v
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
93
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
94 else:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
95 yield match.start(), Comment, match.group()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
96
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
97 def get_body_tokens(self, match):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
98 pos_body_start = match.start()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
99 entire_body = match.group()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
100
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
101 # skip first newline
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
102 if entire_body[0] == '\n':
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
103 yield pos_body_start, Text.Whitespace, u'\n'
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
104 pos_body_start = pos_body_start + 1
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
105 entire_body = entire_body[1:]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
106
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
107 # if it is not a mulitpart
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
108 if not self.content_type.startswith("multipart") or not self.boundary:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
109 for i, t, v in self.get_bodypart_tokens(entire_body):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
110 yield pos_body_start + i, t, v
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
111 return
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
112
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
113 # find boundary
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
114 bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
115 bdry_matcher = re.compile(bdry_pattern, re.MULTILINE)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
116
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
117 # some data has prefix text before first boundary
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
118 m = bdry_matcher.search(entire_body)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
119 if m:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
120 pos_part_start = pos_body_start + m.end()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
121 pos_iter_start = lpos_end = m.end()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
122 yield pos_body_start, Text, entire_body[:m.start()]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
123 yield pos_body_start + lpos_end, String.Delimiter, m.group()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
124 else:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
125 pos_part_start = pos_body_start
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
126 pos_iter_start = 0
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
127
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
128 # process tokens of each body part
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
129 for m in bdry_matcher.finditer(entire_body, pos_iter_start):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
130 # bodypart
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
131 lpos_start = pos_part_start - pos_body_start
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
132 lpos_end = m.start()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
133 part = entire_body[lpos_start:lpos_end]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
134 for i, t, v in self.get_bodypart_tokens(part):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
135 yield pos_part_start + i, t, v
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
136
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
137 # boundary
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
138 yield pos_body_start + lpos_end, String.Delimiter, m.group()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
139 pos_part_start = pos_body_start + m.end()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
140
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
141 # some data has suffix text after last boundary
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
142 lpos_start = pos_part_start - pos_body_start
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
143 if lpos_start != len(entire_body):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
144 yield pos_part_start, Text, entire_body[lpos_start:]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
145
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
146 def get_bodypart_tokens(self, text):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
147 # return if:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
148 # * no content
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
149 # * no content type specific
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
150 # * content encoding is not readable
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
151 # * max recurrsion exceed
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
152 if not text.strip() or not self.content_type:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
153 return [(0, Other, text)]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
154
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
155 cte = self.content_transfer_encoding
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
156 if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
157 return [(0, Other, text)]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
158
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
159 if self.max_nested_level == 0:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
160 return [(0, Other, text)]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
161
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
162 # get lexer
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
163 try:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
164 lexer = get_lexer_for_mimetype(self.content_type)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
165 except ClassNotFound:
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
166 return [(0, Other, text)]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
167
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
168 if isinstance(lexer, type(self)):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
169 lexer.max_nested_level = self.max_nested_level - 1
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
170
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
171 return lexer.get_tokens_unprocessed(text)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
172
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
173 def store_content_type(self, match):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
174 self.content_type = match.group(1)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
175
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
176 prefix_len = match.start(1) - match.start(0)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
177 yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
178 yield match.start(1), Name.Label, match.group(2)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
179 yield match.end(2), String.Delimiter, u"/"
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
180 yield match.start(3), Name.Label, match.group(3)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
181
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
182 def get_content_type_subtokens(self, match):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
183 yield match.start(1), Text, match.group(1)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
184 yield match.start(2), Text.Whitespace, match.group(2)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
185 yield match.start(3), Name.Attribute, match.group(3)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
186 yield match.start(4), Operator, match.group(4)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
187 yield match.start(5), String, match.group(5)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
188
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
189 if match.group(3).lower() == "boundary":
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
190 boundary = match.group(5).strip()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
191 if boundary[0] == '"' and boundary[-1] == '"':
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
192 boundary = boundary[1:-1]
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
193 self.boundary = boundary
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
194
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
195 def store_content_transfer_encoding(self, match):
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
196 self.content_transfer_encoding = match.group(0).lower()
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
197 yield match.start(0), Name.Constant, match.group(0)
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
198
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
199 attention_headers = {"content-type", "content-transfer-encoding"}
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
200
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
201 tokens = {
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
202 "root": [
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
203 (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
204 (r"^$[\s\S]+", get_body_tokens),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
205 ],
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
206 "header": [
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
207 # folding
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
208 (r"\n[ \t]", Text.Whitespace),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
209 (r"\n(?![ \t])", Text.Whitespace, "#pop"),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
210 ],
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
211 "content-type": [
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
212 include("header"),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
213 (
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
214 r"^\s*((multipart|application|audio|font|image|model|text|video"
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
215 r"|message)/([\w-]+))",
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
216 store_content_type,
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
217 ),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
218 (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))',
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
219 get_content_type_subtokens),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
220 (r';[ \t]*\n(?![ \t])', Text, '#pop'),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
221 ],
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
222 "content-transfer-encoding": [
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
223 include("header"),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
224 (r"([\w-]+)", store_content_transfer_encoding),
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
225 ],
21b0534faebc Pygments: updated Pygments to 2.3.1
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff changeset
226 }

eric ide

mercurial