Sat, 04 Jul 2015 17:31:46 +0200
Changed the Python debugger backends to evaluate statements entered into the shell in the frame selected in the local variables viewer.
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
1 | """Better tokenizing for coverage.py.""" |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
2 | |
3495
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
3 | import codecs, keyword, re, sys, token, tokenize |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
4 | from .backward import set # pylint: disable=W0622 |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
5 | from .parser import generate_tokens |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
6 | |
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
7 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
8 | def phys_tokens(toks): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
9 | """Return all physical tokens, even line continuations. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
10 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
11 | tokenize.generate_tokens() doesn't return a token for the backslash that |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
12 | continues lines. This wrapper provides those tokens so that we can |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
13 | re-create a faithful representation of the original source. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
14 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
15 | Returns the same values as generate_tokens() |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
16 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
17 | """ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
18 | last_line = None |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
19 | last_lineno = -1 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
20 | last_ttype = None |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
21 | for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
22 | if last_lineno != elineno: |
3495
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
23 | if last_line and last_line.endswith("\\\n"): |
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
24 | # We are at the beginning of a new line, and the last line |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
25 | # ended with a backslash. We probably have to inject a |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
26 | # backslash token into the stream. Unfortunately, there's more |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
27 | # to figure out. This code:: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
28 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
29 | # usage = """\ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
30 | # HEY THERE |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
31 | # """ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
32 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
33 | # triggers this condition, but the token text is:: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
34 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
35 | # '"""\\\nHEY THERE\n"""' |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
36 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
37 | # so we need to figure out if the backslash is already in the |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
38 | # string token or not. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
39 | inject_backslash = True |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
40 | if last_ttype == tokenize.COMMENT: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
41 | # Comments like this \ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
42 | # should never result in a new token. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
43 | inject_backslash = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
44 | elif ttype == token.STRING: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
45 | if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\': |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
46 | # It's a multiline string and the first line ends with |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
47 | # a backslash, so we don't need to inject another. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
48 | inject_backslash = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
49 | if inject_backslash: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
50 | # Figure out what column the backslash is in. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
51 | ccol = len(last_line.split("\n")[-2]) - 1 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
52 | # Yield the token, with a fake token type. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
53 | yield ( |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
54 | 99999, "\\\n", |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
55 | (slineno, ccol), (slineno, ccol+2), |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
56 | last_line |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
57 | ) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
58 | last_line = ltext |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
59 | last_ttype = ttype |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
60 | yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
61 | last_lineno = elineno |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
62 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
63 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
64 | def source_token_lines(source): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
65 | """Generate a series of lines, one for each line in `source`. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
66 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
67 | Each line is a list of pairs, each pair is a token:: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
68 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
69 | [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
70 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
71 | Each pair has a token class, and the token text. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
72 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
73 | If you concatenate all the token texts, and then join them with newlines, |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
74 | you should have your original `source` back, with two differences: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
75 | trailing whitespace is not preserved, and a final line with no newline |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
76 | is indistinguishable from a final line with a newline. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
77 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
78 | """ |
3495
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
79 | ws_tokens = set([token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]) |
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
80 | line = [] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
81 | col = 0 |
3495
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
82 | source = source.expandtabs(8).replace('\r\n', '\n') |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
83 | tokgen = generate_tokens(source) |
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
84 | for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
85 | mark_start = True |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
86 | for part in re.split('(\n)', ttext): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
87 | if part == '\n': |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
88 | yield line |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
89 | line = [] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
90 | col = 0 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
91 | mark_end = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
92 | elif part == '': |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
93 | mark_end = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
94 | elif ttype in ws_tokens: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
95 | mark_end = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
96 | else: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
97 | if mark_start and scol > col: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
98 | line.append(("ws", " " * (scol - col))) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
99 | mark_start = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
100 | tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
101 | if ttype == token.NAME and keyword.iskeyword(ttext): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
102 | tok_class = "key" |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
103 | line.append((tok_class, part)) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
104 | mark_end = True |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
105 | scol = 0 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
106 | if mark_end: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
107 | col = ecol |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
108 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
109 | if line: |
790
2c0ea0163ef4
Marked the Python2 debugger client files with the new eflag: marker.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
32
diff
changeset
|
110 | yield line |
2c0ea0163ef4
Marked the Python2 debugger client files with the new eflag: marker.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
32
diff
changeset
|
111 | |
3495
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
112 | def source_encoding(source): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
113 | """Determine the encoding for `source` (a string), according to PEP 263. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
114 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
115 | Returns a string, the name of the encoding. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
116 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
117 | """ |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
118 | # Note: this function should never be called on Python 3, since py3 has |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
119 | # built-in tools to do this. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
120 | assert sys.version_info < (3, 0) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
121 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
122 | # This is mostly code adapted from Py3.2's tokenize module. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
123 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
124 | cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)") |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
125 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
126 | # Do this so the detect_encode code we copied will work. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
127 | readline = iter(source.splitlines(True)).next |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
128 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
129 | def _get_normal_name(orig_enc): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
130 | """Imitates get_normal_name in tokenizer.c.""" |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
131 | # Only care about the first 12 characters. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
132 | enc = orig_enc[:12].lower().replace("_", "-") |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
133 | if re.match(r"^utf-8($|-)", enc): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
134 | return "utf-8" |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
135 | if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
136 | return "iso-8859-1" |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
137 | return orig_enc |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
138 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
139 | # From detect_encode(): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
140 | # It detects the encoding from the presence of a utf-8 bom or an encoding |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
141 | # cookie as specified in pep-0263. If both a bom and a cookie are present, |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
142 | # but disagree, a SyntaxError will be raised. If the encoding cookie is an |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
143 | # invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found, |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
144 | # 'utf-8-sig' is returned. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
145 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
146 | # If no encoding is specified, then the default will be returned. The |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
147 | # default varied with version. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
148 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
149 | if sys.version_info <= (2, 4): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
150 | default = 'iso-8859-1' |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
151 | else: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
152 | default = 'ascii' |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
153 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
154 | bom_found = False |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
155 | encoding = None |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
156 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
157 | def read_or_stop(): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
158 | """Get the next source line, or ''.""" |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
159 | try: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
160 | return readline() |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
161 | except StopIteration: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
162 | return '' |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
163 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
164 | def find_cookie(line): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
165 | """Find an encoding cookie in `line`.""" |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
166 | try: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
167 | line_string = line.decode('ascii') |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
168 | except UnicodeDecodeError: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
169 | return None |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
170 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
171 | matches = cookie_re.findall(line_string) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
172 | if not matches: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
173 | return None |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
174 | encoding = _get_normal_name(matches[0]) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
175 | try: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
176 | codec = codecs.lookup(encoding) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
177 | except LookupError: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
178 | # This behaviour mimics the Python interpreter |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
179 | raise SyntaxError("unknown encoding: " + encoding) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
180 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
181 | if bom_found: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
182 | # codecs in 2.3 were raw tuples of functions, assume the best. |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
183 | codec_name = getattr(codec, 'name', encoding) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
184 | if codec_name != 'utf-8': |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
185 | # This behaviour mimics the Python interpreter |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
186 | raise SyntaxError('encoding problem: utf-8') |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
187 | encoding += '-sig' |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
188 | return encoding |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
189 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
190 | first = read_or_stop() |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
191 | if first.startswith(codecs.BOM_UTF8): |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
192 | bom_found = True |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
193 | first = first[3:] |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
194 | default = 'utf-8-sig' |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
195 | if not first: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
196 | return default |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
197 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
198 | encoding = find_cookie(first) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
199 | if encoding: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
200 | return encoding |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
201 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
202 | second = read_or_stop() |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
203 | if not second: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
204 | return default |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
205 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
206 | encoding = find_cookie(second) |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
207 | if encoding: |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
208 | return encoding |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
209 | |
fac17a82b431
updated coverage to 3.7.1
T.Rzepka <Tobias.Rzepka@gmail.com>
parents:
790
diff
changeset
|
210 | return default |
3499
f2d4b02c7e88
Modified the Python2 coverage files to include the Python2 eflags line and fixed an issue in both variants.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
3495
diff
changeset
|
211 | |
f2d4b02c7e88
Modified the Python2 coverage files to include the Python2 eflags line and fixed an issue in both variants.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
3495
diff
changeset
|
212 | # |
f2d4b02c7e88
Modified the Python2 coverage files to include the Python2 eflags line and fixed an issue in both variants.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
3495
diff
changeset
|
213 | # eflag: FileType = Python2 |