Thu, 07 Jan 2010 13:50:32 +0000
Updated coverage.py to version 3.2.
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
1 | """Better tokenizing for coverage.py.""" |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
2 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
3 | import keyword, re, token, tokenize |
32
01f04fbc1842
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
31
diff
changeset
|
4 | from .backward import StringIO # pylint: disable-msg=W0622 |
31
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
5 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
6 | def phys_tokens(toks): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
7 | """Return all physical tokens, even line continuations. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
8 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
9 | tokenize.generate_tokens() doesn't return a token for the backslash that |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
10 | continues lines. This wrapper provides those tokens so that we can |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
11 | re-create a faithful representation of the original source. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
12 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
13 | Returns the same values as generate_tokens() |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
14 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
15 | """ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
16 | last_line = None |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
17 | last_lineno = -1 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
18 | last_ttype = None |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
19 | for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
20 | if last_lineno != elineno: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
21 | if last_line and last_line[-2:] == "\\\n": |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
22 | # We are at the beginning of a new line, and the last line |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
23 | # ended with a backslash. We probably have to inject a |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
24 | # backslash token into the stream. Unfortunately, there's more |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
25 | # to figure out. This code:: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
26 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
27 | # usage = """\ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
28 | # HEY THERE |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
29 | # """ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
30 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
31 | # triggers this condition, but the token text is:: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
32 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
33 | # '"""\\\nHEY THERE\n"""' |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
34 | # |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
35 | # so we need to figure out if the backslash is already in the |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
36 | # string token or not. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
37 | inject_backslash = True |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
38 | if last_ttype == tokenize.COMMENT: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
39 | # Comments like this \ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
40 | # should never result in a new token. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
41 | inject_backslash = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
42 | elif ttype == token.STRING: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
43 | if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\': |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
44 | # It's a multiline string and the first line ends with |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
45 | # a backslash, so we don't need to inject another. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
46 | inject_backslash = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
47 | if inject_backslash: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
48 | # Figure out what column the backslash is in. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
49 | ccol = len(last_line.split("\n")[-2]) - 1 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
50 | # Yield the token, with a fake token type. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
51 | yield ( |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
52 | 99999, "\\\n", |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
53 | (slineno, ccol), (slineno, ccol+2), |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
54 | last_line |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
55 | ) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
56 | last_line = ltext |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
57 | last_ttype = ttype |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
58 | yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
59 | last_lineno = elineno |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
60 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
61 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
62 | def source_token_lines(source): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
63 | """Generate a series of lines, one for each line in `source`. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
64 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
65 | Each line is a list of pairs, each pair is a token:: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
66 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
67 | [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
68 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
69 | Each pair has a token class, and the token text. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
70 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
71 | If you concatenate all the token texts, and then join them with newlines, |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
72 | you should have your original `source` back, with two differences: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
73 | trailing whitespace is not preserved, and a final line with no newline |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
74 | is indistinguishable from a final line with a newline. |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
75 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
76 | """ |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
77 | ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
78 | line = [] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
79 | col = 0 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
80 | tokgen = tokenize.generate_tokens(StringIO(source.expandtabs(8)).readline) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
81 | for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
82 | mark_start = True |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
83 | for part in re.split('(\n)', ttext): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
84 | if part == '\n': |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
85 | yield line |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
86 | line = [] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
87 | col = 0 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
88 | mark_end = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
89 | elif part == '': |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
90 | mark_end = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
91 | elif ttype in ws_tokens: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
92 | mark_end = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
93 | else: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
94 | if mark_start and scol > col: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
95 | line.append(("ws", " " * (scol - col))) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
96 | mark_start = False |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
97 | tok_class = tokenize.tok_name.get(ttype, 'xx').lower()[:3] |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
98 | if ttype == token.NAME and keyword.iskeyword(ttext): |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
99 | tok_class = "key" |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
100 | line.append((tok_class, part)) |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
101 | mark_end = True |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
102 | scol = 0 |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
103 | if mark_end: |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
104 | col = ecol |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
105 | |
744cd0b4b8cd
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
diff
changeset
|
106 | if line: |
32
01f04fbc1842
Updated coverage.py to version 3.2.
Detlev Offenbach <detlev@die-offenbachs.de>
parents:
31
diff
changeset
|
107 | yield line |