|
1 """Code parsing for Coverage.""" |
|
2 |
|
3 import re, token, tokenize, types |
|
4 import io as StringIO |
|
5 |
|
6 from .misc import nice_pair, CoverageException |
|
7 from .backward import set # pylint: disable-msg=W0622 |
|
8 |
|
9 |
|
10 class CodeParser: |
|
11 """Parse code to find executable lines, excluded lines, etc.""" |
|
12 |
|
13 def __init__(self, show_tokens=False): |
|
14 self.show_tokens = show_tokens |
|
15 |
|
16 # The text lines of the parsed code. |
|
17 self.lines = None |
|
18 |
|
19 # The line numbers of excluded lines of code. |
|
20 self.excluded = set() |
|
21 |
|
22 # The line numbers of docstring lines. |
|
23 self.docstrings = set() |
|
24 |
|
25 # A dict mapping line numbers to (lo,hi) for multi-line statements. |
|
26 self.multiline = {} |
|
27 |
|
28 # The line numbers that start statements. |
|
29 self.statement_starts = set() |
|
30 |
|
31 def find_statement_starts(self, code): |
|
32 """Find the starts of statements in compiled code. |
|
33 |
|
34 Uses co_lnotab described in Python/compile.c to find line numbers that |
|
35 start statements, adding them to `self.statement_starts`. |
|
36 |
|
37 """ |
|
38 # Adapted from dis.py in the standard library. |
|
39 byte_increments = [ord(c) for c in code.co_lnotab[0::2]] |
|
40 line_increments = [ord(c) for c in code.co_lnotab[1::2]] |
|
41 |
|
42 last_line_num = None |
|
43 line_num = code.co_firstlineno |
|
44 for byte_incr, line_incr in zip(byte_increments, line_increments): |
|
45 if byte_incr: |
|
46 if line_num != last_line_num: |
|
47 self.statement_starts.add(line_num) |
|
48 last_line_num = line_num |
|
49 line_num += line_incr |
|
50 if line_num != last_line_num: |
|
51 self.statement_starts.add(line_num) |
|
52 |
|
53 def find_statements(self, code): |
|
54 """Find the statements in `code`. |
|
55 |
|
56 Update `self.statement_starts`, a set of line numbers that start |
|
57 statements. Recurses into all code objects reachable from `code`. |
|
58 |
|
59 """ |
|
60 # Adapted from trace.py in the standard library. |
|
61 |
|
62 # Get all of the lineno information from this code. |
|
63 self.find_statement_starts(code) |
|
64 |
|
65 # Check the constants for references to other code objects. |
|
66 for c in code.co_consts: |
|
67 if isinstance(c, types.CodeType): |
|
68 # Found another code object, so recurse into it. |
|
69 self.find_statements(c) |
|
70 |
|
71 def raw_parse(self, text=None, filename=None, exclude=None): |
|
72 """Parse `text` to find the interesting facts about its lines. |
|
73 |
|
74 A handful of member fields are updated. |
|
75 |
|
76 """ |
|
77 if not text: |
|
78 sourcef = open(filename, 'rU') |
|
79 text = sourcef.read() |
|
80 sourcef.close() |
|
81 text = text.replace('\r\n', '\n') |
|
82 self.lines = text.split('\n') |
|
83 |
|
84 # Find lines which match an exclusion pattern. |
|
85 if exclude: |
|
86 re_exclude = re.compile(exclude) |
|
87 for i, ltext in enumerate(self.lines): |
|
88 if re_exclude.search(ltext): |
|
89 self.excluded.add(i+1) |
|
90 |
|
91 # Tokenize, to find excluded suites, to find docstrings, and to find |
|
92 # multi-line statements. |
|
93 indent = 0 |
|
94 exclude_indent = 0 |
|
95 excluding = False |
|
96 prev_toktype = token.INDENT |
|
97 first_line = None |
|
98 |
|
99 tokgen = tokenize.generate_tokens(io.StringIO(text).readline) |
|
100 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
|
101 if self.show_tokens: |
|
102 print(("%10s %5s %-20r %r" % ( |
|
103 tokenize.tok_name.get(toktype, toktype), |
|
104 nice_pair((slineno, elineno)), ttext, ltext |
|
105 ))) |
|
106 if toktype == token.INDENT: |
|
107 indent += 1 |
|
108 elif toktype == token.DEDENT: |
|
109 indent -= 1 |
|
110 elif toktype == token.OP and ttext == ':': |
|
111 if not excluding and elineno in self.excluded: |
|
112 # Start excluding a suite. We trigger off of the colon |
|
113 # token so that the #pragma comment will be recognized on |
|
114 # the same line as the colon. |
|
115 exclude_indent = indent |
|
116 excluding = True |
|
117 elif toktype == token.STRING and prev_toktype == token.INDENT: |
|
118 # Strings that are first on an indented line are docstrings. |
|
119 # (a trick from trace.py in the stdlib.) |
|
120 for i in range(slineno, elineno+1): |
|
121 self.docstrings.add(i) |
|
122 elif toktype == token.NEWLINE: |
|
123 if first_line is not None and elineno != first_line: |
|
124 # We're at the end of a line, and we've ended on a |
|
125 # different line than the first line of the statement, |
|
126 # so record a multi-line range. |
|
127 rng = (first_line, elineno) |
|
128 for l in range(first_line, elineno+1): |
|
129 self.multiline[l] = rng |
|
130 first_line = None |
|
131 |
|
132 if ttext.strip() and toktype != tokenize.COMMENT: |
|
133 # A non-whitespace token. |
|
134 if first_line is None: |
|
135 # The token is not whitespace, and is the first in a |
|
136 # statement. |
|
137 first_line = slineno |
|
138 # Check whether to end an excluded suite. |
|
139 if excluding and indent <= exclude_indent: |
|
140 excluding = False |
|
141 if excluding: |
|
142 self.excluded.add(elineno) |
|
143 |
|
144 prev_toktype = toktype |
|
145 |
|
146 # Find the starts of the executable statements. |
|
147 filename = filename or "<code>" |
|
148 try: |
|
149 # Python 2.3 and 2.4 don't like partial last lines, so be sure the |
|
150 # text ends nicely for them. |
|
151 text += '\n' |
|
152 code = compile(text, filename, "exec") |
|
153 except SyntaxError as synerr: |
|
154 raise CoverageException( |
|
155 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
|
156 (filename, synerr.msg, synerr.lineno) |
|
157 ) |
|
158 |
|
159 self.find_statements(code) |
|
160 |
|
161 def map_to_first_line(self, lines, ignore=None): |
|
162 """Map the line numbers in `lines` to the correct first line of the |
|
163 statement. |
|
164 |
|
165 Skip any line mentioned in `ignore`. |
|
166 |
|
167 Returns a sorted list of the first lines. |
|
168 |
|
169 """ |
|
170 ignore = ignore or [] |
|
171 lset = set() |
|
172 for l in lines: |
|
173 if l in ignore: |
|
174 continue |
|
175 rng = self.multiline.get(l) |
|
176 if rng: |
|
177 new_l = rng[0] |
|
178 else: |
|
179 new_l = l |
|
180 if new_l not in ignore: |
|
181 lset.add(new_l) |
|
182 lines = list(lset) |
|
183 lines.sort() |
|
184 return lines |
|
185 |
|
186 def parse_source(self, text=None, filename=None, exclude=None): |
|
187 """Parse source text to find executable lines, excluded lines, etc. |
|
188 |
|
189 Source can be provided as `text`, the text itself, or `filename`, from |
|
190 which text will be read. Excluded lines are those that match `exclude`, |
|
191 a regex. |
|
192 |
|
193 Return values are 1) a sorted list of executable line numbers, |
|
194 2) a sorted list of excluded line numbers, and 3) a dict mapping line |
|
195 numbers to pairs (lo,hi) for multi-line statements. |
|
196 |
|
197 """ |
|
198 self.raw_parse(text, filename, exclude) |
|
199 |
|
200 excluded_lines = self.map_to_first_line(self.excluded) |
|
201 ignore = excluded_lines + list(self.docstrings) |
|
202 lines = self.map_to_first_line(self.statement_starts, ignore) |
|
203 |
|
204 return lines, excluded_lines, self.multiline |
|
205 |
|
206 def print_parse_results(self): |
|
207 """Print the results of the parsing.""" |
|
208 for i, ltext in enumerate(self.lines): |
|
209 lineno = i+1 |
|
210 m0 = m1 = m2 = ' ' |
|
211 if lineno in self.statement_starts: |
|
212 m0 = '-' |
|
213 if lineno in self.docstrings: |
|
214 m1 = '"' |
|
215 if lineno in self.excluded: |
|
216 m2 = 'x' |
|
217 print(("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext))) |
|
218 |
|
219 |
|
220 if __name__ == '__main__': |
|
221 import sys |
|
222 |
|
223 parser = CodeParser(show_tokens=True) |
|
224 parser.raw_parse(filename=sys.argv[1], exclude=r"no\s*cover") |
|
225 parser.print_parse_results() |