Thu, 14 Jan 2021 18:14:15 +0100
Third Party packages
- updated Pygments to 2.7.4
7983
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
1 | # -*- coding: utf-8 -*- |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
2 | """ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
3 | pygments.lexers.mime |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
4 | ~~~~~~~~~~~~~~~~~~~~ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
5 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
6 | Lexer for Multipurpose Internet Mail Extensions (MIME) data. |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
7 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
8 | :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
9 | :license: BSD, see LICENSE for details. |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
10 | """ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
11 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
12 | import re |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
13 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
14 | from pygments.lexer import RegexLexer, include |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
15 | from pygments.lexers import get_lexer_for_mimetype |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
16 | from pygments.token import Text, Name, String, Operator, Comment, Other |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
17 | from pygments.util import get_int_opt, ClassNotFound |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
18 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
19 | __all__ = ["MIMELexer"] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
20 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
21 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
22 | class MIMELexer(RegexLexer): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
23 | """ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
24 | Lexer for Multipurpose Internet Mail Extensions (MIME) data. This lexer is |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
25 | designed to process the nested mulitpart data. |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
26 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
27 | It assumes that the given data contains both header and body (and is |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
28 | splitted by empty line). If no valid header is found, then the entire data |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
29 | would be treated as body. |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
30 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
31 | Additional options accepted: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
32 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
33 | `MIME-max-level` |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
34 | Max recurssion level for nested MIME structure. Any negative number |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
35 | would treated as unlimited. (default: -1) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
36 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
37 | `Content-Type` |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
38 | Treat the data as specific content type. Useful when header is |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
39 | missing, or this lexer would try to parse from header. (default: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
40 | `text/plain`) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
41 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
42 | `Multipart-Boundary` |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
43 | Set the default multipart boundary delimiter. This option is only used |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
44 | when `Content-Type` is `multipart` and header is missing. This lexer |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
45 | would try to parse from header by default. (default: None) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
46 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
47 | `Content-Transfer-Encoding` |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
48 | Treat the data as specific encoding. Or this lexer would try to parse |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
49 | from header by default. (default: None) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
50 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
51 | .. versionadded:: 2.5 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
52 | """ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
53 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
54 | name = "MIME" |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
55 | aliases = ["mime"] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
56 | mimetypes = ["multipart/mixed", |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
57 | "multipart/related", |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
58 | "multipart/alternative"] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
59 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
60 | def __init__(self, **options): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
61 | super().__init__(**options) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
62 | self.boundary = options.get("Multipart-Boundary") |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
63 | self.content_transfer_encoding = options.get("Content_Transfer_Encoding") |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
64 | self.content_type = options.get("Content_Type", "text/plain") |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
65 | self.max_nested_level = get_int_opt(options, "MIME-max-level", -1) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
66 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
67 | def analyse_text(text): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
68 | try: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
69 | header, body = text.strip().split("\n\n", 1) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
70 | if not body.strip(): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
71 | return 0.1 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
72 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
73 | invalid_headers = MIMELexer.tokens["header"].sub("", header) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
74 | if invalid_headers.strip(): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
75 | return 0.1 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
76 | else: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
77 | return 1 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
78 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
79 | except ValueError: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
80 | return 0.1 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
81 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
82 | def get_header_tokens(self, match): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
83 | field = match.group(1) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
84 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
85 | if field.lower() in self.attention_headers: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
86 | yield match.start(1), Name.Tag, field + ":" |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
87 | yield match.start(2), Text.Whitespace, match.group(2) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
88 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
89 | pos = match.end(2) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
90 | body = match.group(3) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
91 | for i, t, v in self.get_tokens_unprocessed(body, ("root", field.lower())): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
92 | yield pos + i, t, v |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
93 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
94 | else: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
95 | yield match.start(), Comment, match.group() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
96 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
97 | def get_body_tokens(self, match): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
98 | pos_body_start = match.start() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
99 | entire_body = match.group() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
100 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
101 | # skip first newline |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
102 | if entire_body[0] == '\n': |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
103 | yield pos_body_start, Text.Whitespace, '\n' |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
104 | pos_body_start = pos_body_start + 1 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
105 | entire_body = entire_body[1:] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
106 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
107 | # if it is not a mulitpart |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
108 | if not self.content_type.startswith("multipart") or not self.boundary: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
109 | for i, t, v in self.get_bodypart_tokens(entire_body): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
110 | yield pos_body_start + i, t, v |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
111 | return |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
112 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
113 | # find boundary |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
114 | bdry_pattern = r"^--%s(--)?\n" % re.escape(self.boundary) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
115 | bdry_matcher = re.compile(bdry_pattern, re.MULTILINE) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
116 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
117 | # some data has prefix text before first boundary |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
118 | m = bdry_matcher.search(entire_body) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
119 | if m: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
120 | pos_part_start = pos_body_start + m.end() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
121 | pos_iter_start = lpos_end = m.end() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
122 | yield pos_body_start, Text, entire_body[:m.start()] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
123 | yield pos_body_start + lpos_end, String.Delimiter, m.group() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
124 | else: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
125 | pos_part_start = pos_body_start |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
126 | pos_iter_start = 0 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
127 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
128 | # process tokens of each body part |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
129 | for m in bdry_matcher.finditer(entire_body, pos_iter_start): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
130 | # bodypart |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
131 | lpos_start = pos_part_start - pos_body_start |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
132 | lpos_end = m.start() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
133 | part = entire_body[lpos_start:lpos_end] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
134 | for i, t, v in self.get_bodypart_tokens(part): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
135 | yield pos_part_start + i, t, v |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
136 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
137 | # boundary |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
138 | yield pos_body_start + lpos_end, String.Delimiter, m.group() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
139 | pos_part_start = pos_body_start + m.end() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
140 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
141 | # some data has suffix text after last boundary |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
142 | lpos_start = pos_part_start - pos_body_start |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
143 | if lpos_start != len(entire_body): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
144 | yield pos_part_start, Text, entire_body[lpos_start:] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
145 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
146 | def get_bodypart_tokens(self, text): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
147 | # return if: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
148 | # * no content |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
149 | # * no content type specific |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
150 | # * content encoding is not readable |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
151 | # * max recurrsion exceed |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
152 | if not text.strip() or not self.content_type: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
153 | return [(0, Other, text)] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
154 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
155 | cte = self.content_transfer_encoding |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
156 | if cte and cte not in {"8bit", "7bit", "quoted-printable"}: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
157 | return [(0, Other, text)] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
158 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
159 | if self.max_nested_level == 0: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
160 | return [(0, Other, text)] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
161 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
162 | # get lexer |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
163 | try: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
164 | lexer = get_lexer_for_mimetype(self.content_type) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
165 | except ClassNotFound: |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
166 | return [(0, Other, text)] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
167 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
168 | if isinstance(lexer, type(self)): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
169 | lexer.max_nested_level = self.max_nested_level - 1 |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
170 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
171 | return lexer.get_tokens_unprocessed(text) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
172 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
173 | def store_content_type(self, match): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
174 | self.content_type = match.group(1) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
175 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
176 | prefix_len = match.start(1) - match.start(0) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
177 | yield match.start(0), Text.Whitespace, match.group(0)[:prefix_len] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
178 | yield match.start(1), Name.Label, match.group(2) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
179 | yield match.end(2), String.Delimiter, '/' |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
180 | yield match.start(3), Name.Label, match.group(3) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
181 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
182 | def get_content_type_subtokens(self, match): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
183 | yield match.start(1), Text, match.group(1) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
184 | yield match.start(2), Text.Whitespace, match.group(2) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
185 | yield match.start(3), Name.Attribute, match.group(3) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
186 | yield match.start(4), Operator, match.group(4) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
187 | yield match.start(5), String, match.group(5) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
188 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
189 | if match.group(3).lower() == "boundary": |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
190 | boundary = match.group(5).strip() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
191 | if boundary[0] == '"' and boundary[-1] == '"': |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
192 | boundary = boundary[1:-1] |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
193 | self.boundary = boundary |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
194 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
195 | def store_content_transfer_encoding(self, match): |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
196 | self.content_transfer_encoding = match.group(0).lower() |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
197 | yield match.start(0), Name.Constant, match.group(0) |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
198 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
199 | attention_headers = {"content-type", "content-transfer-encoding"} |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
200 | |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
201 | tokens = { |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
202 | "root": [ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
203 | (r"^([\w-]+):( *)([\s\S]*?\n)(?![ \t])", get_header_tokens), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
204 | (r"^$[\s\S]+", get_body_tokens), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
205 | ], |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
206 | "header": [ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
207 | # folding |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
208 | (r"\n[ \t]", Text.Whitespace), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
209 | (r"\n(?![ \t])", Text.Whitespace, "#pop"), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
210 | ], |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
211 | "content-type": [ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
212 | include("header"), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
213 | ( |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
214 | r"^\s*((multipart|application|audio|font|image|model|text|video" |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
215 | r"|message)/([\w-]+))", |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
216 | store_content_type, |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
217 | ), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
218 | (r'(;)((?:[ \t]|\n[ \t])*)([\w:-]+)(=)([\s\S]*?)(?=;|\n(?![ \t]))', |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
219 | get_content_type_subtokens), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
220 | (r';[ \t]*\n(?![ \t])', Text, '#pop'), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
221 | ], |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
222 | "content-transfer-encoding": [ |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
223 | include("header"), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
224 | (r"([\w-]+)", store_content_transfer_encoding), |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
225 | ], |
54c5cfbb1e29
Third Party packages
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
7701
diff
changeset
|
226 | } |