eric6/ThirdParty/Pygments/pygments/lexers/mime.py

Thu, 14 Jan 2021 18:14:15 +0100

author
Detlev Offenbach <detlev@die-offenbachs.de>
date
Thu, 14 Jan 2021 18:14:15 +0100
changeset 7983
54c5cfbb1e29
parent 7701
25f42e208e08
permissions
-rw-r--r--

Third Party packages
- updated Pygments to 2.7.4

7983
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
1 # -*- coding: utf-8 -*-
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
2 """
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
3 pygments.lexers.mime
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
4 ~~~~~~~~~~~~~~~~~~~~
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
5
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
6 Lexer for Multipurpose Internet Mail Extensions (MIME) data.
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
7
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
9 :license: BSD, see LICENSE for details.
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
10 """
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
11
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
12 import re
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
13
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
14 from pygments.lexer import RegexLexer, include
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
15 from pygments.lexers import get_lexer_for_mimetype
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
16 from pygments.token import Text, Name, String, Operator, Comment, Other
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
17 from pygments.util import get_int_opt, ClassNotFound
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
18
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
19 __all__ = ["MIMELexer"]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
20
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
21
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
22 class MIMELexer(RegexLexer):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
23 """
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
24 Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
25 designed to process the nested mulitpart data.
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
26
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
27 It assumes that the given data contains both header and body (and is
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
28 splitted by empty line). If no valid header is found, then the entire data
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
29 would be treated as body.
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
30
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
31 Additional options accepted:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
32
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
33 `MIME-max-level`
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
34 Max recurssion level for nested MIME structure. Any negative number
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
35 would treated as unlimited. (default: -1)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
36
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
37 `Content-Type`
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
38 Treat the data as specific content type. Useful when header is
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
39 missing, or this lexer would try to parse from header. (default:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
40 `text/plain`)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
41
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
42 `Multipart-Boundary`
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
43 Set the default multipart boundary delimiter. This option is only used
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
44 when `Content-Type` is `multipart` and header is missing. This lexer
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
45 would try to parse from header by default. (default: None)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
46
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
47 `Content-Transfer-Encoding`
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
48 Treat the data as specific encoding. Or this lexer would try to parse
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
49 from header by default. (default: None)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
50
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
51 .. versionadded:: 2.5
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
52 """
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
53
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
54 name = "MIME"
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
55 aliases = ["mime"]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
56 mimetypes = ["multipart/mixed",
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
57 "multipart/related",
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
58 "multipart/alternative"]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
59
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
60 def __init__(self, **options):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
61 super().__init__(**options)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
62 self.boundary = options.get("Multipart-Boundary")
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
63 self.content_transfer_encoding = options.get("Content_Transfer_Encoding")
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
64 self.content_type = options.get("Content_Type", "text/plain")
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
65 self.max_nested_level = get_int_opt(options, "MIME-max-level", -1)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
66
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
67 def analyse_text(text):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
68 try:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
69 header, body = text.strip().split("\n\n", 1)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
70 if not body.strip():
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
71 return 0.1
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
72
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
73 invalid_headers = MIMELexer.tokens["header"].sub("", header)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
74 if invalid_headers.strip():
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
75 return 0.1
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
76 else:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
77 return 1
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
78
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
79 except ValueError:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
80 return 0.1
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
81
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
82 def get_header_tokens(self, match):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
83 field = match.group(1)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
84
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
85 if field.lower() in self.attention_headers:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
86 yield match.start(1), Name.Tag, field + ":"
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
87 yield match.start(2), Text.Whitespace, match.group(2)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
88
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
89 pos = match.end(2)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
90 body = match.group(3)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
91 for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
92 yield pos + i, t, v
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
93
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
94 else:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
95 yield match.start(), Comment, match.group()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
96
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
97 def get_body_tokens(self, match):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
98 pos_body_start = match.start()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
99 entire_body = match.group()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
100
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
101 # skip first newline
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
102 if entire_body[0] == '\n':
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
103 yield pos_body_start, Text.Whitespace, '\n'
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
104 pos_body_start = pos_body_start + 1
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
105 entire_body = entire_body[1:]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
106
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
107 # if it is not a mulitpart
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
108 if not self.content_type.startswith("multipart") or not self.boundary:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
109 for i, t, v in self.get_bodypart_tokens(entire_body):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
110 yield pos_body_start + i, t, v
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
111 return
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
112
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
113 # find boundary
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
114 bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
115 bdry_matcher = re.compile(bdry_pattern, re.MULTILINE)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
116
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
117 # some data has prefix text before first boundary
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
118 m = bdry_matcher.search(entire_body)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
119 if m:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
120 pos_part_start = pos_body_start + m.end()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
121 pos_iter_start = lpos_end = m.end()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
122 yield pos_body_start, Text, entire_body[:m.start()]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
123 yield pos_body_start + lpos_end, String.Delimiter, m.group()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
124 else:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
125 pos_part_start = pos_body_start
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
126 pos_iter_start = 0
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
127
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
128 # process tokens of each body part
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
129 for m in bdry_matcher.finditer(entire_body, pos_iter_start):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
130 # bodypart
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
131 lpos_start = pos_part_start - pos_body_start
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
132 lpos_end = m.start()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
133 part = entire_body[lpos_start:lpos_end]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
134 for i, t, v in self.get_bodypart_tokens(part):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
135 yield pos_part_start + i, t, v
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
136
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
137 # boundary
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
138 yield pos_body_start + lpos_end, String.Delimiter, m.group()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
139 pos_part_start = pos_body_start + m.end()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
140
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
141 # some data has suffix text after last boundary
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
142 lpos_start = pos_part_start - pos_body_start
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
143 if lpos_start != len(entire_body):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
144 yield pos_part_start, Text, entire_body[lpos_start:]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
145
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
146 def get_bodypart_tokens(self, text):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
147 # return if:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
148 # * no content
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
149 # * no content type specific
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
150 # * content encoding is not readable
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
151 # * max recurrsion exceed
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
152 if not text.strip() or not self.content_type:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
153 return [(0, Other, text)]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
154
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
155 cte = self.content_transfer_encoding
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
156 if cte and cte not in {"8bit", "7bit", "quoted-printable"}:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
157 return [(0, Other, text)]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
158
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
159 if self.max_nested_level == 0:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
160 return [(0, Other, text)]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
161
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
162 # get lexer
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
163 try:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
164 lexer = get_lexer_for_mimetype(self.content_type)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
165 except ClassNotFound:
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
166 return [(0, Other, text)]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
167
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
168 if isinstance(lexer, type(self)):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
169 lexer.max_nested_level = self.max_nested_level - 1
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
170
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
171 return lexer.get_tokens_unprocessed(text)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
172
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
173 def store_content_type(self, match):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
174 self.content_type = match.group(1)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
175
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
176 prefix_len = match.start(1) - match.start(0)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
177 yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
178 yield match.start(1), Name.Label, match.group(2)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
179 yield match.end(2), String.Delimiter, '/'
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
180 yield match.start(3), Name.Label, match.group(3)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
181
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
182 def get_content_type_subtokens(self, match):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
183 yield match.start(1), Text, match.group(1)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
184 yield match.start(2), Text.Whitespace, match.group(2)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
185 yield match.start(3), Name.Attribute, match.group(3)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
186 yield match.start(4), Operator, match.group(4)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
187 yield match.start(5), String, match.group(5)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
188
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
189 if match.group(3).lower() == "boundary":
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
190 boundary = match.group(5).strip()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
191 if boundary[0] == '"' and boundary[-1] == '"':
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
192 boundary = boundary[1:-1]
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
193 self.boundary = boundary
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
194
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
195 def store_content_transfer_encoding(self, match):
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
196 self.content_transfer_encoding = match.group(0).lower()
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
197 yield match.start(0), Name.Constant, match.group(0)
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
198
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
199 attention_headers = {"content-type", "content-transfer-encoding"}
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
200
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
201 tokens = {
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
202 "root": [
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
203 (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
204 (r"^$[\s\S]+", get_body_tokens),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
205 ],
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
206 "header": [
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
207 # folding
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
208 (r"\n[ \t]", Text.Whitespace),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
209 (r"\n(?![ \t])", Text.Whitespace, "#pop"),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
210 ],
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
211 "content-type": [
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
212 include("header"),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
213 (
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
214 r"^\s*((multipart|application|audio|font|image|model|text|video"
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
215 r"|message)/([\w-]+))",
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
216 store_content_type,
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
217 ),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
218 (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))',
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
219 get_content_type_subtokens),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
220 (r';[ \t]*\n(?![ \t])', Text, '#pop'),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
221 ],
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
222 "content-transfer-encoding": [
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
223 include("header"),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
224 (r"([\w-]+)", store_content_transfer_encoding),
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
225 ],
54c5cfbb1e29 Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents: 7701
diff changeset
226 }

eric ide

mercurial