1 """Code parsing for Coverage.""" |
1 """Code parsing for Coverage.""" |
2 |
2 |
3 import re, token, tokenize, types |
3 import glob, opcode, os, re, sys, token, tokenize |
4 import io as StringIO |
4 |
5 |
5 from .backward import set, sorted, StringIO # pylint: disable-msg=W0622 |
6 from .misc import nice_pair, CoverageException |
6 from .bytecode import ByteCodes, CodeObjects |
7 from .backward import set # pylint: disable-msg=W0622 |
7 from .misc import nice_pair, CoverageException, NoSource, expensive |
8 |
8 |
9 |
9 |
10 class CodeParser: |
10 class CodeParser(object): |
11 """Parse code to find executable lines, excluded lines, etc.""" |
11 """Parse code to find executable lines, excluded lines, etc.""" |
12 |
12 |
13 def __init__(self, show_tokens=False): |
13 def __init__(self, text=None, filename=None, exclude=None): |
14 self.show_tokens = show_tokens |
14 """ |
|
15 Source can be provided as `text`, the text itself, or `filename`, from |
|
16 which text will be read. Excluded lines are those that match |
|
17 `exclude`, a regex. |
|
18 |
|
19 """ |
|
20 assert text or filename, "CodeParser needs either text or filename" |
|
21 self.filename = filename or "<code>" |
|
22 self.text = text |
|
23 if not self.text: |
|
24 try: |
|
25 sourcef = open(self.filename, 'rU') |
|
26 self.text = sourcef.read() |
|
27 sourcef.close() |
|
28 except IOError: |
|
29 _, err, _ = sys.exc_info() |
|
30 raise NoSource( |
|
31 "No source for code: %r: %s" % (self.filename, err) |
|
32 ) |
|
33 self.text = self.text.replace('\r\n', '\n') |
|
34 |
|
35 self.exclude = exclude |
|
36 |
|
37 self.show_tokens = False |
15 |
38 |
16 # The text lines of the parsed code. |
39 # The text lines of the parsed code. |
17 self.lines = None |
40 self.lines = self.text.split('\n') |
18 |
41 |
19 # The line numbers of excluded lines of code. |
42 # The line numbers of excluded lines of code. |
20 self.excluded = set() |
43 self.excluded = set() |
21 |
44 |
22 # The line numbers of docstring lines. |
45 # The line numbers of docstring lines. |
23 self.docstrings = set() |
46 self.docstrings = set() |
24 |
47 |
|
48 # The line numbers of class definitions. |
|
49 self.classdefs = set() |
|
50 |
25 # A dict mapping line numbers to (lo,hi) for multi-line statements. |
51 # A dict mapping line numbers to (lo,hi) for multi-line statements. |
26 self.multiline = {} |
52 self.multiline = {} |
27 |
53 |
28 # The line numbers that start statements. |
54 # The line numbers that start statements. |
29 self.statement_starts = set() |
55 self.statement_starts = set() |
30 |
56 |
31 def find_statement_starts(self, code): |
57 # Lazily-created ByteParser |
32 """Find the starts of statements in compiled code. |
58 self._byte_parser = None |
33 |
59 |
34 Uses co_lnotab described in Python/compile.c to find line numbers that |
60 def _get_byte_parser(self): |
35 start statements, adding them to `self.statement_starts`. |
61 """Create a ByteParser on demand.""" |
36 |
62 if not self._byte_parser: |
37 """ |
63 self._byte_parser = \ |
38 # Adapted from dis.py in the standard library. |
64 ByteParser(text=self.text, filename=self.filename) |
39 byte_increments = [ord(c) for c in code.co_lnotab[0::2]] |
65 return self._byte_parser |
40 line_increments = [ord(c) for c in code.co_lnotab[1::2]] |
66 byte_parser = property(_get_byte_parser) |
41 |
67 |
42 last_line_num = None |
68 def _raw_parse(self): |
43 line_num = code.co_firstlineno |
69 """Parse the source to find the interesting facts about its lines. |
44 for byte_incr, line_incr in zip(byte_increments, line_increments): |
70 |
45 if byte_incr: |
|
46 if line_num != last_line_num: |
|
47 self.statement_starts.add(line_num) |
|
48 last_line_num = line_num |
|
49 line_num += line_incr |
|
50 if line_num != last_line_num: |
|
51 self.statement_starts.add(line_num) |
|
52 |
|
53 def find_statements(self, code): |
|
54 """Find the statements in `code`. |
|
55 |
|
56 Update `self.statement_starts`, a set of line numbers that start |
|
57 statements. Recurses into all code objects reachable from `code`. |
|
58 |
|
59 """ |
|
60 # Adapted from trace.py in the standard library. |
|
61 |
|
62 # Get all of the lineno information from this code. |
|
63 self.find_statement_starts(code) |
|
64 |
|
65 # Check the constants for references to other code objects. |
|
66 for c in code.co_consts: |
|
67 if isinstance(c, types.CodeType): |
|
68 # Found another code object, so recurse into it. |
|
69 self.find_statements(c) |
|
70 |
|
71 def raw_parse(self, text=None, filename=None, exclude=None): |
|
72 """Parse `text` to find the interesting facts about its lines. |
|
73 |
|
74 A handful of member fields are updated. |
71 A handful of member fields are updated. |
75 |
72 |
76 """ |
73 """ |
77 if not text: |
|
78 sourcef = open(filename, 'rU') |
|
79 text = sourcef.read() |
|
80 sourcef.close() |
|
81 text = text.replace('\r\n', '\n') |
|
82 self.lines = text.split('\n') |
|
83 |
|
84 # Find lines which match an exclusion pattern. |
74 # Find lines which match an exclusion pattern. |
85 if exclude: |
75 if self.exclude: |
86 re_exclude = re.compile(exclude) |
76 re_exclude = re.compile(self.exclude) |
87 for i, ltext in enumerate(self.lines): |
77 for i, ltext in enumerate(self.lines): |
88 if re_exclude.search(ltext): |
78 if re_exclude.search(ltext): |
89 self.excluded.add(i+1) |
79 self.excluded.add(i+1) |
90 |
80 |
91 # Tokenize, to find excluded suites, to find docstrings, and to find |
81 # Tokenize, to find excluded suites, to find docstrings, and to find |
92 # multi-line statements. |
82 # multi-line statements. |
93 indent = 0 |
83 indent = 0 |
94 exclude_indent = 0 |
84 exclude_indent = 0 |
95 excluding = False |
85 excluding = False |
96 prev_toktype = token.INDENT |
86 prev_toktype = token.INDENT |
97 first_line = None |
87 first_line = None |
98 |
88 |
99 tokgen = tokenize.generate_tokens(io.StringIO(text).readline) |
89 tokgen = tokenize.generate_tokens(StringIO(self.text).readline) |
100 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
90 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
101 if self.show_tokens: |
91 if self.show_tokens: # pragma: no cover |
102 print(("%10s %5s %-20r %r" % ( |
92 print("%10s %5s %-20r %r" % ( |
103 tokenize.tok_name.get(toktype, toktype), |
93 tokenize.tok_name.get(toktype, toktype), |
104 nice_pair((slineno, elineno)), ttext, ltext |
94 nice_pair((slineno, elineno)), ttext, ltext |
105 ))) |
95 )) |
106 if toktype == token.INDENT: |
96 if toktype == token.INDENT: |
107 indent += 1 |
97 indent += 1 |
108 elif toktype == token.DEDENT: |
98 elif toktype == token.DEDENT: |
109 indent -= 1 |
99 indent -= 1 |
|
100 elif toktype == token.NAME and ttext == 'class': |
|
101 # Class definitions look like branches in the byte code, so |
|
102 # we need to exclude them. The simplest way is to note the |
|
103 # lines with the 'class' keyword. |
|
104 self.classdefs.add(slineno) |
110 elif toktype == token.OP and ttext == ':': |
105 elif toktype == token.OP and ttext == ':': |
111 if not excluding and elineno in self.excluded: |
106 if not excluding and elineno in self.excluded: |
112 # Start excluding a suite. We trigger off of the colon |
107 # Start excluding a suite. We trigger off of the colon |
113 # token so that the #pragma comment will be recognized on |
108 # token so that the #pragma comment will be recognized on |
114 # the same line as the colon. |
109 # the same line as the colon. |
115 exclude_indent = indent |
110 exclude_indent = indent |
116 excluding = True |
111 excluding = True |
117 elif toktype == token.STRING and prev_toktype == token.INDENT: |
112 elif toktype == token.STRING and prev_toktype == token.INDENT: |
118 # Strings that are first on an indented line are docstrings. |
113 # Strings that are first on an indented line are docstrings. |
119 # (a trick from trace.py in the stdlib.) |
114 # (a trick from trace.py in the stdlib.) This works for |
|
115 # 99.9999% of cases. For the rest (!) see: |
|
116 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 |
120 for i in range(slineno, elineno+1): |
117 for i in range(slineno, elineno+1): |
121 self.docstrings.add(i) |
118 self.docstrings.add(i) |
122 elif toktype == token.NEWLINE: |
119 elif toktype == token.NEWLINE: |
123 if first_line is not None and elineno != first_line: |
120 if first_line is not None and elineno != first_line: |
124 # We're at the end of a line, and we've ended on a |
121 # We're at the end of a line, and we've ended on a |
138 # Check whether to end an excluded suite. |
135 # Check whether to end an excluded suite. |
139 if excluding and indent <= exclude_indent: |
136 if excluding and indent <= exclude_indent: |
140 excluding = False |
137 excluding = False |
141 if excluding: |
138 if excluding: |
142 self.excluded.add(elineno) |
139 self.excluded.add(elineno) |
143 |
140 |
144 prev_toktype = toktype |
141 prev_toktype = toktype |
145 |
142 |
146 # Find the starts of the executable statements. |
143 # Find the starts of the executable statements. |
147 filename = filename or "<code>" |
144 self.statement_starts.update(self.byte_parser._find_statements()) |
148 try: |
145 |
149 # Python 2.3 and 2.4 don't like partial last lines, so be sure the |
146 def first_line(self, line): |
150 # text ends nicely for them. |
147 """Return the first line number of the statement including `line`.""" |
151 text += '\n' |
148 rng = self.multiline.get(line) |
152 code = compile(text, filename, "exec") |
149 if rng: |
153 except SyntaxError as synerr: |
150 first_line = rng[0] |
154 raise CoverageException( |
151 else: |
155 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
152 first_line = line |
156 (filename, synerr.msg, synerr.lineno) |
153 return first_line |
157 ) |
154 |
158 |
155 def first_lines(self, lines, ignore=None): |
159 self.find_statements(code) |
|
160 |
|
161 def map_to_first_line(self, lines, ignore=None): |
|
162 """Map the line numbers in `lines` to the correct first line of the |
156 """Map the line numbers in `lines` to the correct first line of the |
163 statement. |
157 statement. |
164 |
158 |
165 Skip any line mentioned in `ignore`. |
159 Skip any line mentioned in `ignore`. |
166 |
160 |
167 Returns a sorted list of the first lines. |
161 Returns a sorted list of the first lines. |
168 |
162 |
169 """ |
163 """ |
170 ignore = ignore or [] |
164 ignore = ignore or [] |
171 lset = set() |
165 lset = set() |
172 for l in lines: |
166 for l in lines: |
173 if l in ignore: |
167 if l in ignore: |
174 continue |
168 continue |
175 rng = self.multiline.get(l) |
169 new_l = self.first_line(l) |
176 if rng: |
|
177 new_l = rng[0] |
|
178 else: |
|
179 new_l = l |
|
180 if new_l not in ignore: |
170 if new_l not in ignore: |
181 lset.add(new_l) |
171 lset.add(new_l) |
182 lines = list(lset) |
172 return sorted(lset) |
183 lines.sort() |
173 |
184 return lines |
174 def parse_source(self): |
185 |
|
186 def parse_source(self, text=None, filename=None, exclude=None): |
|
187 """Parse source text to find executable lines, excluded lines, etc. |
175 """Parse source text to find executable lines, excluded lines, etc. |
188 |
176 |
189 Source can be provided as `text`, the text itself, or `filename`, from |
177 Return values are 1) a sorted list of executable line numbers, and |
190 which text will be read. Excluded lines are those that match `exclude`, |
178 2) a sorted list of excluded line numbers. |
191 a regex. |
179 |
192 |
180 Reported line numbers are normalized to the first line of multi-line |
193 Return values are 1) a sorted list of executable line numbers, |
181 statements. |
194 2) a sorted list of excluded line numbers, and 3) a dict mapping line |
182 |
195 numbers to pairs (lo,hi) for multi-line statements. |
183 """ |
196 |
184 self._raw_parse() |
197 """ |
185 |
198 self.raw_parse(text, filename, exclude) |
186 excluded_lines = self.first_lines(self.excluded) |
199 |
|
200 excluded_lines = self.map_to_first_line(self.excluded) |
|
201 ignore = excluded_lines + list(self.docstrings) |
187 ignore = excluded_lines + list(self.docstrings) |
202 lines = self.map_to_first_line(self.statement_starts, ignore) |
188 lines = self.first_lines(self.statement_starts, ignore) |
203 |
189 |
204 return lines, excluded_lines, self.multiline |
190 return lines, excluded_lines |
205 |
191 |
206 def print_parse_results(self): |
192 def arcs(self): |
207 """Print the results of the parsing.""" |
193 """Get information about the arcs available in the code. |
208 for i, ltext in enumerate(self.lines): |
194 |
209 lineno = i+1 |
195 Returns a sorted list of line number pairs. Line numbers have been |
210 m0 = m1 = m2 = ' ' |
196 normalized to the first line of multiline statements. |
211 if lineno in self.statement_starts: |
197 |
212 m0 = '-' |
198 """ |
213 if lineno in self.docstrings: |
199 all_arcs = [] |
214 m1 = '"' |
200 for l1, l2 in self.byte_parser._all_arcs(): |
215 if lineno in self.excluded: |
201 fl1 = self.first_line(l1) |
216 m2 = 'x' |
202 fl2 = self.first_line(l2) |
217 print(("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext))) |
203 if fl1 != fl2: |
218 |
204 all_arcs.append((fl1, fl2)) |
|
205 return sorted(all_arcs) |
|
206 arcs = expensive(arcs) |
|
207 |
|
208 def exit_counts(self): |
|
209 """Get a mapping from line numbers to count of exits from that line. |
|
210 |
|
211 Excluded lines are excluded. |
|
212 |
|
213 """ |
|
214 excluded_lines = self.first_lines(self.excluded) |
|
215 exit_counts = {} |
|
216 for l1, l2 in self.arcs(): |
|
217 if l1 == -1: |
|
218 # Don't ever report -1 as a line number |
|
219 continue |
|
220 if l1 in excluded_lines: |
|
221 # Don't report excluded lines as line numbers. |
|
222 continue |
|
223 if l2 in excluded_lines: |
|
224 # Arcs to excluded lines shouldn't count. |
|
225 continue |
|
226 if l1 not in exit_counts: |
|
227 exit_counts[l1] = 0 |
|
228 exit_counts[l1] += 1 |
|
229 |
|
230 # Class definitions have one extra exit, so remove one for each: |
|
231 for l in self.classdefs: |
|
232 # Ensure key is there: classdefs can include excluded lines. |
|
233 if l in exit_counts: |
|
234 exit_counts[l] -= 1 |
|
235 |
|
236 return exit_counts |
|
237 exit_counts = expensive(exit_counts) |
|
238 |
|
239 |
|
240 ## Opcodes that guide the ByteParser. |
|
241 |
|
242 def _opcode(name): |
|
243 """Return the opcode by name from the opcode module.""" |
|
244 return opcode.opmap[name] |
|
245 |
|
246 def _opcode_set(*names): |
|
247 """Return a set of opcodes by the names in `names`.""" |
|
248 return set([_opcode(name) for name in names]) |
|
249 |
|
250 # Opcodes that leave the code object. |
|
251 OPS_CODE_END = _opcode_set('RETURN_VALUE') |
|
252 |
|
253 # Opcodes that unconditionally end the code chunk. |
|
254 OPS_CHUNK_END = _opcode_set( |
|
255 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', |
|
256 'BREAK_LOOP', 'CONTINUE_LOOP', |
|
257 ) |
|
258 |
|
259 # Opcodes that push a block on the block stack. |
|
260 OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY') |
|
261 |
|
262 # Block types for exception handling. |
|
263 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
|
264 |
|
265 # Opcodes that pop a block from the block stack. |
|
266 OPS_POP_BLOCK = _opcode_set('POP_BLOCK') |
|
267 |
|
268 # Opcodes that have a jump destination, but aren't really a jump. |
|
269 OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
|
270 |
|
271 # Individual opcodes we need below. |
|
272 OP_BREAK_LOOP = _opcode('BREAK_LOOP') |
|
273 OP_END_FINALLY = _opcode('END_FINALLY') |
|
274 OP_COMPARE_OP = _opcode('COMPARE_OP') |
|
275 COMPARE_EXCEPTION = 10 # just have to get this const from the code. |
|
276 OP_LOAD_CONST = _opcode('LOAD_CONST') |
|
277 OP_RETURN_VALUE = _opcode('RETURN_VALUE') |
|
278 |
|
279 |
|
280 class ByteParser(object): |
|
281 """Parse byte codes to understand the structure of code.""" |
|
282 |
|
283 def __init__(self, code=None, text=None, filename=None): |
|
284 if code: |
|
285 self.code = code |
|
286 else: |
|
287 if not text: |
|
288 assert filename, "If no code or text, need a filename" |
|
289 sourcef = open(filename, 'rU') |
|
290 text = sourcef.read() |
|
291 sourcef.close() |
|
292 |
|
293 try: |
|
294 # Python 2.3 and 2.4 don't like partial last lines, so be sure |
|
295 # the text ends nicely for them. |
|
296 self.code = compile(text + '\n', filename, "exec") |
|
297 except SyntaxError: |
|
298 _, synerr, _ = sys.exc_info() |
|
299 raise CoverageException( |
|
300 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
|
301 (filename, synerr.msg, synerr.lineno) |
|
302 ) |
|
303 |
|
304 def child_parsers(self): |
|
305 """Iterate over all the code objects nested within this one. |
|
306 |
|
307 The iteration includes `self` as its first value. |
|
308 |
|
309 """ |
|
310 return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) |
|
311 |
|
312 # Getting numbers from the lnotab value changed in Py3.0. |
|
313 if sys.hexversion >= 0x03000000: |
|
314 def _lnotab_increments(self, lnotab): |
|
315 """Return a list of ints from the lnotab bytes in 3.x""" |
|
316 return list(lnotab) |
|
317 else: |
|
318 def _lnotab_increments(self, lnotab): |
|
319 """Return a list of ints from the lnotab string in 2.x""" |
|
320 return [ord(c) for c in lnotab] |
|
321 |
|
322 def _bytes_lines(self): |
|
323 """Map byte offsets to line numbers in `code`. |
|
324 |
|
325 Uses co_lnotab described in Python/compile.c to map byte offsets to |
|
326 line numbers. Returns a list: [(b0, l0), (b1, l1), ...] |
|
327 |
|
328 """ |
|
329 # Adapted from dis.py in the standard library. |
|
330 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) |
|
331 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) |
|
332 |
|
333 bytes_lines = [] |
|
334 last_line_num = None |
|
335 line_num = self.code.co_firstlineno |
|
336 byte_num = 0 |
|
337 for byte_incr, line_incr in zip(byte_increments, line_increments): |
|
338 if byte_incr: |
|
339 if line_num != last_line_num: |
|
340 bytes_lines.append((byte_num, line_num)) |
|
341 last_line_num = line_num |
|
342 byte_num += byte_incr |
|
343 line_num += line_incr |
|
344 if line_num != last_line_num: |
|
345 bytes_lines.append((byte_num, line_num)) |
|
346 return bytes_lines |
|
347 |
|
348 def _find_statements(self): |
|
349 """Find the statements in `self.code`. |
|
350 |
|
351 Return a set of line numbers that start statements. Recurses into all |
|
352 code objects reachable from `self.code`. |
|
353 |
|
354 """ |
|
355 stmts = set() |
|
356 for bp in self.child_parsers(): |
|
357 # Get all of the lineno information from this code. |
|
358 for _, l in bp._bytes_lines(): |
|
359 stmts.add(l) |
|
360 return stmts |
|
361 |
|
362 def _disassemble(self): # pragma: no cover |
|
363 """Disassemble code, for ad-hoc experimenting.""" |
|
364 |
|
365 import dis |
|
366 |
|
367 for bp in self.child_parsers(): |
|
368 print("\n%s: " % bp.code) |
|
369 dis.dis(bp.code) |
|
370 print("Bytes lines: %r" % bp._bytes_lines()) |
|
371 |
|
372 print("") |
|
373 |
|
374 def _split_into_chunks(self): |
|
375 """Split the code object into a list of `Chunk` objects. |
|
376 |
|
377 Each chunk is only entered at its first instruction, though there can |
|
378 be many exits from a chunk. |
|
379 |
|
380 Returns a list of `Chunk` objects. |
|
381 |
|
382 """ |
|
383 |
|
384 # The list of chunks so far, and the one we're working on. |
|
385 chunks = [] |
|
386 chunk = None |
|
387 bytes_lines_map = dict(self._bytes_lines()) |
|
388 |
|
389 # The block stack: loops and try blocks get pushed here for the |
|
390 # implicit jumps that can occur. |
|
391 # Each entry is a tuple: (block type, destination) |
|
392 block_stack = [] |
|
393 |
|
394 # Some op codes are followed by branches that should be ignored. This |
|
395 # is a count of how many ignores are left. |
|
396 ignore_branch = 0 |
|
397 |
|
398 # We have to handle the last two bytecodes specially. |
|
399 ult = penult = None |
|
400 |
|
401 for bc in ByteCodes(self.code.co_code): |
|
402 # Maybe have to start a new block |
|
403 if bc.offset in bytes_lines_map: |
|
404 if chunk: |
|
405 chunk.exits.add(bc.offset) |
|
406 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) |
|
407 chunks.append(chunk) |
|
408 |
|
409 if not chunk: |
|
410 chunk = Chunk(bc.offset) |
|
411 chunks.append(chunk) |
|
412 |
|
413 # Look at the opcode |
|
414 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: |
|
415 if ignore_branch: |
|
416 # Someone earlier wanted us to ignore this branch. |
|
417 ignore_branch -= 1 |
|
418 else: |
|
419 # The opcode has a jump, it's an exit for this chunk. |
|
420 chunk.exits.add(bc.jump_to) |
|
421 |
|
422 if bc.op in OPS_CODE_END: |
|
423 # The opcode can exit the code object. |
|
424 chunk.exits.add(-1) |
|
425 if bc.op in OPS_PUSH_BLOCK: |
|
426 # The opcode adds a block to the block_stack. |
|
427 block_stack.append((bc.op, bc.jump_to)) |
|
428 if bc.op in OPS_POP_BLOCK: |
|
429 # The opcode pops a block from the block stack. |
|
430 block_stack.pop() |
|
431 if bc.op in OPS_CHUNK_END: |
|
432 # This opcode forces the end of the chunk. |
|
433 if bc.op == OP_BREAK_LOOP: |
|
434 # A break is implicit: jump where the top of the |
|
435 # block_stack points. |
|
436 chunk.exits.add(block_stack[-1][1]) |
|
437 chunk = None |
|
438 if bc.op == OP_END_FINALLY: |
|
439 if block_stack: |
|
440 # A break that goes through a finally will jump to whatever |
|
441 # block is on top of the stack. |
|
442 chunk.exits.add(block_stack[-1][1]) |
|
443 # For the finally clause we need to find the closest exception |
|
444 # block, and use its jump target as an exit. |
|
445 for iblock in range(len(block_stack)-1, -1, -1): |
|
446 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: |
|
447 chunk.exits.add(block_stack[iblock][1]) |
|
448 break |
|
449 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: |
|
450 # This is an except clause. We want to overlook the next |
|
451 # branch, so that except's don't count as branches. |
|
452 ignore_branch += 1 |
|
453 |
|
454 penult = ult |
|
455 ult = bc |
|
456 |
|
457 |
|
458 if chunks: |
|
459 # The last two bytecodes could be a dummy "return None" that |
|
460 # shouldn't be counted as real code. Every Python code object seems |
|
461 # to end with a return, and a "return None" is inserted if there |
|
462 # isn't an explicit return in the source. |
|
463 if ult and penult: |
|
464 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: |
|
465 if self.code.co_consts[penult.arg] is None: |
|
466 # This is "return None", but is it dummy? A real line |
|
467 # would be a last chunk all by itself. |
|
468 if chunks[-1].byte != penult.offset: |
|
469 # Split the last chunk |
|
470 last_chunk = chunks[-1] |
|
471 last_chunk.exits.remove(-1) |
|
472 last_chunk.exits.add(penult.offset) |
|
473 chunk = Chunk(penult.offset) |
|
474 chunk.exits.add(-1) |
|
475 chunks.append(chunk) |
|
476 |
|
477 # Give all the chunks a length. |
|
478 chunks[-1].length = bc.next_offset - chunks[-1].byte |
|
479 for i in range(len(chunks)-1): |
|
480 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
|
481 |
|
482 return chunks |
|
483 |
|
484 def _arcs(self): |
|
485 """Find the executable arcs in the code. |
|
486 |
|
487 Returns a set of pairs, (from,to). From and to are integer line |
|
488 numbers. If from is -1, then the arc is an entrance into the code |
|
489 object. If to is -1, the arc is an exit from the code object. |
|
490 |
|
491 """ |
|
492 chunks = self._split_into_chunks() |
|
493 |
|
494 # A map from byte offsets to chunks jumped into. |
|
495 byte_chunks = dict([(c.byte, c) for c in chunks]) |
|
496 |
|
497 # Build a map from byte offsets to actual lines reached. |
|
498 byte_lines = {-1:[-1]} |
|
499 bytes_to_add = set([c.byte for c in chunks]) |
|
500 |
|
501 while bytes_to_add: |
|
502 byte_to_add = bytes_to_add.pop() |
|
503 if byte_to_add in byte_lines or byte_to_add == -1: |
|
504 continue |
|
505 |
|
506 # Which lines does this chunk lead to? |
|
507 bytes_considered = set() |
|
508 bytes_to_consider = [byte_to_add] |
|
509 lines = set() |
|
510 |
|
511 while bytes_to_consider: |
|
512 byte = bytes_to_consider.pop() |
|
513 bytes_considered.add(byte) |
|
514 |
|
515 # Find chunk for byte |
|
516 try: |
|
517 ch = byte_chunks[byte] |
|
518 except KeyError: |
|
519 for ch in chunks: |
|
520 if ch.byte <= byte < ch.byte+ch.length: |
|
521 break |
|
522 else: |
|
523 # No chunk for this byte! |
|
524 raise Exception("Couldn't find chunk @ %d" % byte) |
|
525 byte_chunks[byte] = ch |
|
526 |
|
527 if ch.line: |
|
528 lines.add(ch.line) |
|
529 else: |
|
530 for ex in ch.exits: |
|
531 if ex == -1: |
|
532 lines.add(-1) |
|
533 elif ex not in bytes_considered: |
|
534 bytes_to_consider.append(ex) |
|
535 |
|
536 bytes_to_add.update(ch.exits) |
|
537 |
|
538 byte_lines[byte_to_add] = lines |
|
539 |
|
540 # Figure out for each chunk where the exits go. |
|
541 arcs = set() |
|
542 for chunk in chunks: |
|
543 if chunk.line: |
|
544 for ex in chunk.exits: |
|
545 for exit_line in byte_lines[ex]: |
|
546 if chunk.line != exit_line: |
|
547 arcs.add((chunk.line, exit_line)) |
|
548 for line in byte_lines[0]: |
|
549 arcs.add((-1, line)) |
|
550 |
|
551 return arcs |
|
552 |
|
553 def _all_chunks(self): |
|
554 """Returns a list of `Chunk` objects for this code and its children. |
|
555 |
|
556 See `_split_into_chunks` for details. |
|
557 |
|
558 """ |
|
559 chunks = [] |
|
560 for bp in self.child_parsers(): |
|
561 chunks.extend(bp._split_into_chunks()) |
|
562 |
|
563 return chunks |
|
564 |
|
565 def _all_arcs(self): |
|
566 """Get the set of all arcs in this code object and its children. |
|
567 |
|
568 See `_arcs` for details. |
|
569 |
|
570 """ |
|
571 arcs = set() |
|
572 for bp in self.child_parsers(): |
|
573 arcs.update(bp._arcs()) |
|
574 |
|
575 return arcs |
|
576 |
|
577 |
|
578 class Chunk(object): |
|
579 """A sequence of bytecodes with a single entrance. |
|
580 |
|
581 To analyze byte code, we have to divide it into chunks, sequences of byte |
|
582 codes such that each basic block has only one entrance, the first |
|
583 instruction in the block. |
|
584 |
|
585 This is almost the CS concept of `basic block`_, except that we're willing |
|
586 to have many exits from a chunk, and "basic block" is a more cumbersome |
|
587 term. |
|
588 |
|
589 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
|
590 |
|
591 An exit of -1 means the chunk can leave the code (return). |
|
592 |
|
593 """ |
|
594 def __init__(self, byte, line=0): |
|
595 self.byte = byte |
|
596 self.line = line |
|
597 self.length = 0 |
|
598 self.exits = set() |
|
599 |
|
600 def __repr__(self): |
|
601 return "<%d+%d @%d %r>" % ( |
|
602 self.byte, self.length, self.line, list(self.exits) |
|
603 ) |
|
604 |
|
605 |
|
606 class AdHocMain(object): # pragma: no cover |
|
607 """An ad-hoc main for code parsing experiments.""" |
|
608 |
|
609 def main(self, args): |
|
610 """A main function for trying the code from the command line.""" |
|
611 |
|
612 from optparse import OptionParser |
|
613 |
|
614 parser = OptionParser() |
|
615 parser.add_option( |
|
616 "-c", action="store_true", dest="chunks", |
|
617 help="Show basic block chunks" |
|
618 ) |
|
619 parser.add_option( |
|
620 "-d", action="store_true", dest="dis", |
|
621 help="Disassemble" |
|
622 ) |
|
623 parser.add_option( |
|
624 "-R", action="store_true", dest="recursive", |
|
625 help="Recurse to find source files" |
|
626 ) |
|
627 parser.add_option( |
|
628 "-s", action="store_true", dest="source", |
|
629 help="Show analyzed source" |
|
630 ) |
|
631 parser.add_option( |
|
632 "-t", action="store_true", dest="tokens", |
|
633 help="Show tokens" |
|
634 ) |
|
635 |
|
636 options, args = parser.parse_args() |
|
637 if options.recursive: |
|
638 if args: |
|
639 root = args[0] |
|
640 else: |
|
641 root = "." |
|
642 for root, _, _ in os.walk(root): |
|
643 for f in glob.glob(root + "/*.py"): |
|
644 self.adhoc_one_file(options, f) |
|
645 else: |
|
646 self.adhoc_one_file(options, args[0]) |
|
647 |
|
648 def adhoc_one_file(self, options, filename): |
|
649 """Process just one file.""" |
|
650 |
|
651 if options.dis or options.chunks: |
|
652 try: |
|
653 bp = ByteParser(filename=filename) |
|
654 except CoverageException: |
|
655 _, err, _ = sys.exc_info() |
|
656 print("%s" % (err,)) |
|
657 return |
|
658 |
|
659 if options.dis: |
|
660 print("Main code:") |
|
661 bp._disassemble() |
|
662 |
|
663 if options.chunks: |
|
664 chunks = bp._all_chunks() |
|
665 if options.recursive: |
|
666 print("%6d: %s" % (len(chunks), filename)) |
|
667 else: |
|
668 print("Chunks: %r" % chunks) |
|
669 arcs = bp._all_arcs() |
|
670 print("Arcs: %r" % sorted(arcs)) |
|
671 |
|
672 if options.source or options.tokens: |
|
673 cp = CodeParser(filename=filename, exclude=r"no\s*cover") |
|
674 cp.show_tokens = options.tokens |
|
675 cp._raw_parse() |
|
676 |
|
677 if options.source: |
|
678 if options.chunks: |
|
679 arc_width, arc_chars = self.arc_ascii_art(arcs) |
|
680 else: |
|
681 arc_width, arc_chars = 0, {} |
|
682 |
|
683 exit_counts = cp.exit_counts() |
|
684 |
|
685 for i, ltext in enumerate(cp.lines): |
|
686 lineno = i+1 |
|
687 m0 = m1 = m2 = m3 = a = ' ' |
|
688 if lineno in cp.statement_starts: |
|
689 m0 = '-' |
|
690 exits = exit_counts.get(lineno, 0) |
|
691 if exits > 1: |
|
692 m1 = str(exits) |
|
693 if lineno in cp.docstrings: |
|
694 m2 = '"' |
|
695 if lineno in cp.classdefs: |
|
696 m2 = 'C' |
|
697 if lineno in cp.excluded: |
|
698 m3 = 'x' |
|
699 a = arc_chars.get(lineno, '').ljust(arc_width) |
|
700 print("%4d %s%s%s%s%s %s" % |
|
701 (lineno, m0, m1, m2, m3, a, ltext) |
|
702 ) |
|
703 |
|
704 def arc_ascii_art(self, arcs): |
|
705 """Draw arcs as ascii art. |
|
706 |
|
707 Returns a width of characters needed to draw all the arcs, and a |
|
708 dictionary mapping line numbers to ascii strings to draw for that line. |
|
709 |
|
710 """ |
|
711 arc_chars = {} |
|
712 for lfrom, lto in sorted(arcs): |
|
713 if lfrom == -1: |
|
714 arc_chars[lto] = arc_chars.get(lto, '') + 'v' |
|
715 elif lto == -1: |
|
716 arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' |
|
717 else: |
|
718 if lfrom == lto-1: |
|
719 # Don't show obvious arcs. |
|
720 continue |
|
721 if lfrom < lto: |
|
722 l1, l2 = lfrom, lto |
|
723 else: |
|
724 l1, l2 = lto, lfrom |
|
725 w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) |
|
726 for l in range(l1, l2+1): |
|
727 if l == lfrom: |
|
728 ch = '<' |
|
729 elif l == lto: |
|
730 ch = '>' |
|
731 else: |
|
732 ch = '|' |
|
733 arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch |
|
734 arc_width = 0 |
|
735 |
|
736 if arc_chars: |
|
737 arc_width = max([len(a) for a in arc_chars.values()]) |
|
738 else: |
|
739 arc_width = 0 |
|
740 |
|
741 return arc_width, arc_chars |
219 |
742 |
220 if __name__ == '__main__': |
743 if __name__ == '__main__': |
221 import sys |
744 AdHocMain().main(sys.argv[1:]) |
222 |
|
223 parser = CodeParser(show_tokens=True) |
|
224 parser.raw_parse(filename=sys.argv[1], exclude=r"no\s*cover") |
|
225 parser.print_parse_results() |
|