1 """Code parsing for Coverage.""" |
1 """Code parsing for Coverage.""" |
2 |
2 |
3 import glob, opcode, os, re, sys, token, tokenize |
3 import dis, re, sys, token, tokenize |
4 |
4 |
5 from .backward import set, sorted, StringIO # pylint: disable-msg=W0622 |
5 from .backward import set, sorted, StringIO # pylint: disable=W0622 |
|
6 from .backward import open_source, range # pylint: disable=W0622 |
|
7 from .backward import reversed # pylint: disable=W0622 |
|
8 from .backward import bytes_to_ints |
6 from .bytecode import ByteCodes, CodeObjects |
9 from .bytecode import ByteCodes, CodeObjects |
7 from .misc import nice_pair, CoverageException, NoSource, expensive |
10 from .misc import nice_pair, expensive, join_regex |
|
11 from .misc import CoverageException, NoSource, NotPython |
8 |
12 |
9 |
13 |
10 class CodeParser(object): |
14 class CodeParser(object): |
11 """Parse code to find executable lines, excluded lines, etc.""" |
15 """Parse code to find executable lines, excluded lines, etc.""" |
12 |
16 |
13 def __init__(self, text=None, filename=None, exclude=None): |
17 def __init__(self, text=None, filename=None, exclude=None): |
14 """ |
18 """ |
15 Source can be provided as `text`, the text itself, or `filename`, from |
19 Source can be provided as `text`, the text itself, or `filename`, from |
16 which text will be read. Excluded lines are those that match |
20 which the text will be read. Excluded lines are those that match |
17 `exclude`, a regex. |
21 `exclude`, a regex. |
18 |
22 |
19 """ |
23 """ |
20 assert text or filename, "CodeParser needs either text or filename" |
24 assert text or filename, "CodeParser needs either text or filename" |
21 self.filename = filename or "<code>" |
25 self.filename = filename or "<code>" |
22 self.text = text |
26 self.text = text |
23 if not self.text: |
27 if not self.text: |
24 try: |
28 try: |
25 sourcef = open(self.filename, 'rU') |
29 sourcef = open_source(self.filename) |
26 self.text = sourcef.read() |
30 try: |
27 sourcef.close() |
31 self.text = sourcef.read() |
|
32 finally: |
|
33 sourcef.close() |
28 except IOError: |
34 except IOError: |
29 _, err, _ = sys.exc_info() |
35 _, err, _ = sys.exc_info() |
30 raise NoSource( |
36 raise NoSource( |
31 "No source for code: %r: %s" % (self.filename, err) |
37 "No source for code: '%s': %s" % (self.filename, err) |
32 ) |
38 ) |
33 self.text = self.text.replace('\r\n', '\n') |
39 |
|
40 # Scrap the BOM if it exists. |
|
41 if self.text and ord(self.text[0]) == 0xfeff: |
|
42 self.text = self.text[1:] |
34 |
43 |
35 self.exclude = exclude |
44 self.exclude = exclude |
36 |
45 |
37 self.show_tokens = False |
46 self.show_tokens = False |
38 |
47 |
63 self._byte_parser = \ |
72 self._byte_parser = \ |
64 ByteParser(text=self.text, filename=self.filename) |
73 ByteParser(text=self.text, filename=self.filename) |
65 return self._byte_parser |
74 return self._byte_parser |
66 byte_parser = property(_get_byte_parser) |
75 byte_parser = property(_get_byte_parser) |
67 |
76 |
|
77 def lines_matching(self, *regexes): |
|
78 """Find the lines matching one of a list of regexes. |
|
79 |
|
80 Returns a set of line numbers, the lines that contain a match for one |
|
81 of the regexes in `regexes`. The entire line needn't match, just a |
|
82 part of it. |
|
83 |
|
84 """ |
|
85 regex_c = re.compile(join_regex(regexes)) |
|
86 matches = set() |
|
87 for i, ltext in enumerate(self.lines): |
|
88 if regex_c.search(ltext): |
|
89 matches.add(i+1) |
|
90 return matches |
|
91 |
68 def _raw_parse(self): |
92 def _raw_parse(self): |
69 """Parse the source to find the interesting facts about its lines. |
93 """Parse the source to find the interesting facts about its lines. |
70 |
94 |
71 A handful of member fields are updated. |
95 A handful of member fields are updated. |
72 |
96 |
73 """ |
97 """ |
74 # Find lines which match an exclusion pattern. |
98 # Find lines which match an exclusion pattern. |
75 if self.exclude: |
99 if self.exclude: |
76 re_exclude = re.compile(self.exclude) |
100 self.excluded = self.lines_matching(self.exclude) |
77 for i, ltext in enumerate(self.lines): |
|
78 if re_exclude.search(ltext): |
|
79 self.excluded.add(i+1) |
|
80 |
101 |
81 # Tokenize, to find excluded suites, to find docstrings, and to find |
102 # Tokenize, to find excluded suites, to find docstrings, and to find |
82 # multi-line statements. |
103 # multi-line statements. |
83 indent = 0 |
104 indent = 0 |
84 exclude_indent = 0 |
105 exclude_indent = 0 |
85 excluding = False |
106 excluding = False |
86 prev_toktype = token.INDENT |
107 prev_toktype = token.INDENT |
87 first_line = None |
108 first_line = None |
88 |
109 empty = True |
89 tokgen = tokenize.generate_tokens(StringIO(self.text).readline) |
110 |
|
111 tokgen = generate_tokens(self.text) |
90 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
91 if self.show_tokens: # pragma: no cover |
113 if self.show_tokens: # pragma: not covered |
92 print("%10s %5s %-20r %r" % ( |
114 print("%10s %5s %-20r %r" % ( |
93 tokenize.tok_name.get(toktype, toktype), |
115 tokenize.tok_name.get(toktype, toktype), |
94 nice_pair((slineno, elineno)), ttext, ltext |
116 nice_pair((slineno, elineno)), ttext, ltext |
95 )) |
117 )) |
96 if toktype == token.INDENT: |
118 if toktype == token.INDENT: |
139 self.excluded.add(elineno) |
161 self.excluded.add(elineno) |
140 |
162 |
141 prev_toktype = toktype |
163 prev_toktype = toktype |
142 |
164 |
143 # Find the starts of the executable statements. |
165 # Find the starts of the executable statements. |
144 self.statement_starts.update(self.byte_parser._find_statements()) |
166 if not empty: |
|
167 self.statement_starts.update(self.byte_parser._find_statements()) |
145 |
168 |
146 def first_line(self, line): |
169 def first_line(self, line): |
147 """Return the first line number of the statement including `line`.""" |
170 """Return the first line number of the statement including `line`.""" |
148 rng = self.multiline.get(line) |
171 rng = self.multiline.get(line) |
149 if rng: |
172 if rng: |
150 first_line = rng[0] |
173 first_line = rng[0] |
151 else: |
174 else: |
152 first_line = line |
175 first_line = line |
153 return first_line |
176 return first_line |
154 |
177 |
155 def first_lines(self, lines, ignore=None): |
178 def first_lines(self, lines, *ignores): |
156 """Map the line numbers in `lines` to the correct first line of the |
179 """Map the line numbers in `lines` to the correct first line of the |
157 statement. |
180 statement. |
158 |
181 |
159 Skip any line mentioned in `ignore`. |
182 Skip any line mentioned in any of the sequences in `ignores`. |
160 |
183 |
161 Returns a sorted list of the first lines. |
184 Returns a set of the first lines. |
162 |
185 |
163 """ |
186 """ |
164 ignore = ignore or [] |
187 ignore = set() |
|
188 for ign in ignores: |
|
189 ignore.update(ign) |
165 lset = set() |
190 lset = set() |
166 for l in lines: |
191 for l in lines: |
167 if l in ignore: |
192 if l in ignore: |
168 continue |
193 continue |
169 new_l = self.first_line(l) |
194 new_l = self.first_line(l) |
170 if new_l not in ignore: |
195 if new_l not in ignore: |
171 lset.add(new_l) |
196 lset.add(new_l) |
172 return sorted(lset) |
197 return lset |
173 |
198 |
174 def parse_source(self): |
199 def parse_source(self): |
175 """Parse source text to find executable lines, excluded lines, etc. |
200 """Parse source text to find executable lines, excluded lines, etc. |
176 |
201 |
177 Return values are 1) a sorted list of executable line numbers, and |
202 Return values are 1) a set of executable line numbers, and 2) a set of |
178 2) a sorted list of excluded line numbers. |
203 excluded line numbers. |
179 |
204 |
180 Reported line numbers are normalized to the first line of multi-line |
205 Reported line numbers are normalized to the first line of multi-line |
181 statements. |
206 statements. |
182 |
207 |
183 """ |
208 """ |
184 self._raw_parse() |
209 try: |
|
210 self._raw_parse() |
|
211 except (tokenize.TokenError, IndentationError): |
|
212 _, tokerr, _ = sys.exc_info() |
|
213 msg, lineno = tokerr.args |
|
214 raise NotPython( |
|
215 "Couldn't parse '%s' as Python source: '%s' at %s" % |
|
216 (self.filename, msg, lineno) |
|
217 ) |
185 |
218 |
186 excluded_lines = self.first_lines(self.excluded) |
219 excluded_lines = self.first_lines(self.excluded) |
187 ignore = excluded_lines + list(self.docstrings) |
220 lines = self.first_lines( |
188 lines = self.first_lines(self.statement_starts, ignore) |
221 self.statement_starts, |
|
222 excluded_lines, |
|
223 self.docstrings |
|
224 ) |
189 |
225 |
190 return lines, excluded_lines |
226 return lines, excluded_lines |
191 |
227 |
192 def arcs(self): |
228 def arcs(self): |
193 """Get information about the arcs available in the code. |
229 """Get information about the arcs available in the code. |
238 |
274 |
239 |
275 |
240 ## Opcodes that guide the ByteParser. |
276 ## Opcodes that guide the ByteParser. |
241 |
277 |
242 def _opcode(name): |
278 def _opcode(name): |
243 """Return the opcode by name from the opcode module.""" |
279 """Return the opcode by name from the dis module.""" |
244 return opcode.opmap[name] |
280 return dis.opmap[name] |
245 |
281 |
246 def _opcode_set(*names): |
282 def _opcode_set(*names): |
247 """Return a set of opcodes by the names in `names`.""" |
283 """Return a set of opcodes by the names in `names`.""" |
248 return set([_opcode(name) for name in names]) |
284 s = set() |
|
285 for name in names: |
|
286 try: |
|
287 s.add(_opcode(name)) |
|
288 except KeyError: |
|
289 pass |
|
290 return s |
249 |
291 |
250 # Opcodes that leave the code object. |
292 # Opcodes that leave the code object. |
251 OPS_CODE_END = _opcode_set('RETURN_VALUE') |
293 OPS_CODE_END = _opcode_set('RETURN_VALUE') |
252 |
294 |
253 # Opcodes that unconditionally end the code chunk. |
295 # Opcodes that unconditionally end the code chunk. |
254 OPS_CHUNK_END = _opcode_set( |
296 OPS_CHUNK_END = _opcode_set( |
255 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', |
297 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', |
256 'BREAK_LOOP', 'CONTINUE_LOOP', |
298 'BREAK_LOOP', 'CONTINUE_LOOP', |
257 ) |
299 ) |
258 |
300 |
|
301 # Opcodes that unconditionally begin a new code chunk. By starting new chunks |
|
302 # with unconditional jump instructions, we neatly deal with jumps to jumps |
|
303 # properly. |
|
304 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') |
|
305 |
259 # Opcodes that push a block on the block stack. |
306 # Opcodes that push a block on the block stack. |
260 OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY') |
307 OPS_PUSH_BLOCK = _opcode_set( |
|
308 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' |
|
309 ) |
261 |
310 |
262 # Block types for exception handling. |
311 # Block types for exception handling. |
263 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
312 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
264 |
313 |
265 # Opcodes that pop a block from the block stack. |
314 # Opcodes that pop a block from the block stack. |
266 OPS_POP_BLOCK = _opcode_set('POP_BLOCK') |
315 OPS_POP_BLOCK = _opcode_set('POP_BLOCK') |
267 |
316 |
268 # Opcodes that have a jump destination, but aren't really a jump. |
317 # Opcodes that have a jump destination, but aren't really a jump. |
269 OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
318 OPS_NO_JUMP = OPS_PUSH_BLOCK |
270 |
319 |
271 # Individual opcodes we need below. |
320 # Individual opcodes we need below. |
272 OP_BREAK_LOOP = _opcode('BREAK_LOOP') |
321 OP_BREAK_LOOP = _opcode('BREAK_LOOP') |
273 OP_END_FINALLY = _opcode('END_FINALLY') |
322 OP_END_FINALLY = _opcode('END_FINALLY') |
274 OP_COMPARE_OP = _opcode('COMPARE_OP') |
323 OP_COMPARE_OP = _opcode('COMPARE_OP') |
281 """Parse byte codes to understand the structure of code.""" |
330 """Parse byte codes to understand the structure of code.""" |
282 |
331 |
283 def __init__(self, code=None, text=None, filename=None): |
332 def __init__(self, code=None, text=None, filename=None): |
284 if code: |
333 if code: |
285 self.code = code |
334 self.code = code |
|
335 self.text = text |
286 else: |
336 else: |
287 if not text: |
337 if not text: |
288 assert filename, "If no code or text, need a filename" |
338 assert filename, "If no code or text, need a filename" |
289 sourcef = open(filename, 'rU') |
339 sourcef = open_source(filename) |
290 text = sourcef.read() |
340 try: |
291 sourcef.close() |
341 text = sourcef.read() |
|
342 finally: |
|
343 sourcef.close() |
|
344 self.text = text |
292 |
345 |
293 try: |
346 try: |
294 # Python 2.3 and 2.4 don't like partial last lines, so be sure |
347 # Python 2.3 and 2.4 don't like partial last lines, so be sure |
295 # the text ends nicely for them. |
348 # the text ends nicely for them. |
296 self.code = compile(text + '\n', filename, "exec") |
349 self.code = compile(text + '\n', filename, "exec") |
297 except SyntaxError: |
350 except SyntaxError: |
298 _, synerr, _ = sys.exc_info() |
351 _, synerr, _ = sys.exc_info() |
299 raise CoverageException( |
352 raise NotPython( |
300 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
353 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
301 (filename, synerr.msg, synerr.lineno) |
354 (filename, synerr.msg, synerr.lineno) |
302 ) |
355 ) |
303 |
356 |
|
357 # Alternative Python implementations don't always provide all the |
|
358 # attributes on code objects that we need to do the analysis. |
|
359 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: |
|
360 if not hasattr(self.code, attr): |
|
361 raise CoverageException( |
|
362 "This implementation of Python doesn't support code " |
|
363 "analysis.\n" |
|
364 "Run coverage.py under CPython for this command." |
|
365 ) |
|
366 |
304 def child_parsers(self): |
367 def child_parsers(self): |
305 """Iterate over all the code objects nested within this one. |
368 """Iterate over all the code objects nested within this one. |
306 |
369 |
307 The iteration includes `self` as its first value. |
370 The iteration includes `self` as its first value. |
308 |
371 |
309 """ |
372 """ |
310 return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) |
373 children = CodeObjects(self.code) |
311 |
374 return [ByteParser(code=c, text=self.text) for c in children] |
312 # Getting numbers from the lnotab value changed in Py3.0. |
|
313 if sys.hexversion >= 0x03000000: |
|
314 def _lnotab_increments(self, lnotab): |
|
315 """Return a list of ints from the lnotab bytes in 3.x""" |
|
316 return list(lnotab) |
|
317 else: |
|
318 def _lnotab_increments(self, lnotab): |
|
319 """Return a list of ints from the lnotab string in 2.x""" |
|
320 return [ord(c) for c in lnotab] |
|
321 |
375 |
322 def _bytes_lines(self): |
376 def _bytes_lines(self): |
323 """Map byte offsets to line numbers in `code`. |
377 """Map byte offsets to line numbers in `code`. |
324 |
378 |
325 Uses co_lnotab described in Python/compile.c to map byte offsets to |
379 Uses co_lnotab described in Python/compile.c to map byte offsets to |
326 line numbers. Returns a list: [(b0, l0), (b1, l1), ...] |
380 line numbers. Produces a sequence: (b0, l0), (b1, l1), ... |
|
381 |
|
382 Only byte offsets that correspond to line numbers are included in the |
|
383 results. |
327 |
384 |
328 """ |
385 """ |
329 # Adapted from dis.py in the standard library. |
386 # Adapted from dis.py in the standard library. |
330 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) |
387 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2]) |
331 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) |
388 line_increments = bytes_to_ints(self.code.co_lnotab[1::2]) |
332 |
389 |
333 bytes_lines = [] |
|
334 last_line_num = None |
390 last_line_num = None |
335 line_num = self.code.co_firstlineno |
391 line_num = self.code.co_firstlineno |
336 byte_num = 0 |
392 byte_num = 0 |
337 for byte_incr, line_incr in zip(byte_increments, line_increments): |
393 for byte_incr, line_incr in zip(byte_increments, line_increments): |
338 if byte_incr: |
394 if byte_incr: |
339 if line_num != last_line_num: |
395 if line_num != last_line_num: |
340 bytes_lines.append((byte_num, line_num)) |
396 yield (byte_num, line_num) |
341 last_line_num = line_num |
397 last_line_num = line_num |
342 byte_num += byte_incr |
398 byte_num += byte_incr |
343 line_num += line_incr |
399 line_num += line_incr |
344 if line_num != last_line_num: |
400 if line_num != last_line_num: |
345 bytes_lines.append((byte_num, line_num)) |
401 yield (byte_num, line_num) |
346 return bytes_lines |
|
347 |
402 |
348 def _find_statements(self): |
403 def _find_statements(self): |
349 """Find the statements in `self.code`. |
404 """Find the statements in `self.code`. |
350 |
405 |
351 Return a set of line numbers that start statements. Recurses into all |
406 Produce a sequence of line numbers that start statements. Recurses |
352 code objects reachable from `self.code`. |
407 into all code objects reachable from `self.code`. |
353 |
408 |
354 """ |
409 """ |
355 stmts = set() |
|
356 for bp in self.child_parsers(): |
410 for bp in self.child_parsers(): |
357 # Get all of the lineno information from this code. |
411 # Get all of the lineno information from this code. |
358 for _, l in bp._bytes_lines(): |
412 for _, l in bp._bytes_lines(): |
359 stmts.add(l) |
413 yield l |
360 return stmts |
414 |
361 |
415 def _block_stack_repr(self, block_stack): |
362 def _disassemble(self): # pragma: no cover |
416 """Get a string version of `block_stack`, for debugging.""" |
363 """Disassemble code, for ad-hoc experimenting.""" |
417 blocks = ", ".join( |
364 |
418 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack] |
365 import dis |
419 ) |
366 |
420 return "[" + blocks + "]" |
367 for bp in self.child_parsers(): |
|
368 print("\n%s: " % bp.code) |
|
369 dis.dis(bp.code) |
|
370 print("Bytes lines: %r" % bp._bytes_lines()) |
|
371 |
|
372 print("") |
|
373 |
421 |
374 def _split_into_chunks(self): |
422 def _split_into_chunks(self): |
375 """Split the code object into a list of `Chunk` objects. |
423 """Split the code object into a list of `Chunk` objects. |
376 |
424 |
377 Each chunk is only entered at its first instruction, though there can |
425 Each chunk is only entered at its first instruction, though there can |
378 be many exits from a chunk. |
426 be many exits from a chunk. |
379 |
427 |
380 Returns a list of `Chunk` objects. |
428 Returns a list of `Chunk` objects. |
381 |
429 |
382 """ |
430 """ |
383 |
|
384 # The list of chunks so far, and the one we're working on. |
431 # The list of chunks so far, and the one we're working on. |
385 chunks = [] |
432 chunks = [] |
386 chunk = None |
433 chunk = None |
|
434 |
|
435 # A dict mapping byte offsets of line starts to the line numbers. |
387 bytes_lines_map = dict(self._bytes_lines()) |
436 bytes_lines_map = dict(self._bytes_lines()) |
388 |
437 |
389 # The block stack: loops and try blocks get pushed here for the |
438 # The block stack: loops and try blocks get pushed here for the |
390 # implicit jumps that can occur. |
439 # implicit jumps that can occur. |
391 # Each entry is a tuple: (block type, destination) |
440 # Each entry is a tuple: (block type, destination) |
396 ignore_branch = 0 |
445 ignore_branch = 0 |
397 |
446 |
398 # We have to handle the last two bytecodes specially. |
447 # We have to handle the last two bytecodes specially. |
399 ult = penult = None |
448 ult = penult = None |
400 |
449 |
401 for bc in ByteCodes(self.code.co_code): |
450 # Get a set of all of the jump-to points. |
402 # Maybe have to start a new block |
451 jump_to = set() |
|
452 bytecodes = list(ByteCodes(self.code.co_code)) |
|
453 for bc in bytecodes: |
|
454 if bc.jump_to >= 0: |
|
455 jump_to.add(bc.jump_to) |
|
456 |
|
457 chunk_lineno = 0 |
|
458 |
|
459 # Walk the byte codes building chunks. |
|
460 for bc in bytecodes: |
|
461 # Maybe have to start a new chunk |
|
462 start_new_chunk = False |
|
463 first_chunk = False |
403 if bc.offset in bytes_lines_map: |
464 if bc.offset in bytes_lines_map: |
|
465 # Start a new chunk for each source line number. |
|
466 start_new_chunk = True |
|
467 chunk_lineno = bytes_lines_map[bc.offset] |
|
468 first_chunk = True |
|
469 elif bc.offset in jump_to: |
|
470 # To make chunks have a single entrance, we have to make a new |
|
471 # chunk when we get to a place some bytecode jumps to. |
|
472 start_new_chunk = True |
|
473 elif bc.op in OPS_CHUNK_BEGIN: |
|
474 # Jumps deserve their own unnumbered chunk. This fixes |
|
475 # problems with jumps to jumps getting confused. |
|
476 start_new_chunk = True |
|
477 |
|
478 if not chunk or start_new_chunk: |
404 if chunk: |
479 if chunk: |
405 chunk.exits.add(bc.offset) |
480 chunk.exits.add(bc.offset) |
406 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) |
481 chunk = Chunk(bc.offset, chunk_lineno, first_chunk) |
407 chunks.append(chunk) |
|
408 |
|
409 if not chunk: |
|
410 chunk = Chunk(bc.offset) |
|
411 chunks.append(chunk) |
482 chunks.append(chunk) |
412 |
483 |
413 # Look at the opcode |
484 # Look at the opcode |
414 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: |
485 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: |
415 if ignore_branch: |
486 if ignore_branch: |
434 # A break is implicit: jump where the top of the |
505 # A break is implicit: jump where the top of the |
435 # block_stack points. |
506 # block_stack points. |
436 chunk.exits.add(block_stack[-1][1]) |
507 chunk.exits.add(block_stack[-1][1]) |
437 chunk = None |
508 chunk = None |
438 if bc.op == OP_END_FINALLY: |
509 if bc.op == OP_END_FINALLY: |
439 if block_stack: |
|
440 # A break that goes through a finally will jump to whatever |
|
441 # block is on top of the stack. |
|
442 chunk.exits.add(block_stack[-1][1]) |
|
443 # For the finally clause we need to find the closest exception |
510 # For the finally clause we need to find the closest exception |
444 # block, and use its jump target as an exit. |
511 # block, and use its jump target as an exit. |
445 for iblock in range(len(block_stack)-1, -1, -1): |
512 for block in reversed(block_stack): |
446 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: |
513 if block[0] in OPS_EXCEPT_BLOCKS: |
447 chunk.exits.add(block_stack[iblock][1]) |
514 chunk.exits.add(block[1]) |
448 break |
515 break |
449 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: |
516 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: |
450 # This is an except clause. We want to overlook the next |
517 # This is an except clause. We want to overlook the next |
451 # branch, so that except's don't count as branches. |
518 # branch, so that except's don't count as branches. |
452 ignore_branch += 1 |
519 ignore_branch += 1 |
453 |
520 |
454 penult = ult |
521 penult = ult |
455 ult = bc |
522 ult = bc |
456 |
|
457 |
523 |
458 if chunks: |
524 if chunks: |
459 # The last two bytecodes could be a dummy "return None" that |
525 # The last two bytecodes could be a dummy "return None" that |
460 # shouldn't be counted as real code. Every Python code object seems |
526 # shouldn't be counted as real code. Every Python code object seems |
461 # to end with a return, and a "return None" is inserted if there |
527 # to end with a return, and a "return None" is inserted if there |
464 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: |
530 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: |
465 if self.code.co_consts[penult.arg] is None: |
531 if self.code.co_consts[penult.arg] is None: |
466 # This is "return None", but is it dummy? A real line |
532 # This is "return None", but is it dummy? A real line |
467 # would be a last chunk all by itself. |
533 # would be a last chunk all by itself. |
468 if chunks[-1].byte != penult.offset: |
534 if chunks[-1].byte != penult.offset: |
|
535 ex = -self.code.co_firstlineno |
469 # Split the last chunk |
536 # Split the last chunk |
470 last_chunk = chunks[-1] |
537 last_chunk = chunks[-1] |
471 last_chunk.exits.remove(-1) |
538 last_chunk.exits.remove(ex) |
472 last_chunk.exits.add(penult.offset) |
539 last_chunk.exits.add(penult.offset) |
473 chunk = Chunk(penult.offset) |
540 chunk = Chunk( |
474 chunk.exits.add(-1) |
541 penult.offset, last_chunk.line, False |
|
542 ) |
|
543 chunk.exits.add(ex) |
475 chunks.append(chunk) |
544 chunks.append(chunk) |
476 |
545 |
477 # Give all the chunks a length. |
546 # Give all the chunks a length. |
478 chunks[-1].length = bc.next_offset - chunks[-1].byte |
547 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 |
479 for i in range(len(chunks)-1): |
548 for i in range(len(chunks)-1): |
480 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
549 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
481 |
550 |
|
551 #self.validate_chunks(chunks) |
482 return chunks |
552 return chunks |
|
553 |
|
554 def validate_chunks(self, chunks): |
|
555 """Validate the rule that chunks have a single entrance.""" |
|
556 # starts is the entrances to the chunks |
|
557 starts = set([ch.byte for ch in chunks]) |
|
558 for ch in chunks: |
|
559 assert all([(ex in starts or ex < 0) for ex in ch.exits]) |
483 |
560 |
484 def _arcs(self): |
561 def _arcs(self): |
485 """Find the executable arcs in the code. |
562 """Find the executable arcs in the code. |
486 |
563 |
487 Returns a set of pairs, (from,to). From and to are integer line |
564 Yields pairs: (from,to). From and to are integer line numbers. If |
488 numbers. If from is -1, then the arc is an entrance into the code |
565 from is < 0, then the arc is an entrance into the code object. If to |
489 object. If to is -1, the arc is an exit from the code object. |
566 is < 0, the arc is an exit from the code object. |
490 |
567 |
491 """ |
568 """ |
492 chunks = self._split_into_chunks() |
569 chunks = self._split_into_chunks() |
493 |
570 |
494 # A map from byte offsets to chunks jumped into. |
571 # A map from byte offsets to chunks jumped into. |
495 byte_chunks = dict([(c.byte, c) for c in chunks]) |
572 byte_chunks = dict([(c.byte, c) for c in chunks]) |
496 |
573 |
497 # Build a map from byte offsets to actual lines reached. |
574 # There's always an entrance at the first chunk. |
498 byte_lines = {-1:[-1]} |
575 yield (-1, byte_chunks[0].line) |
499 bytes_to_add = set([c.byte for c in chunks]) |
576 |
500 |
577 # Traverse from the first chunk in each line, and yield arcs where |
501 while bytes_to_add: |
578 # the trace function will be invoked. |
502 byte_to_add = bytes_to_add.pop() |
579 for chunk in chunks: |
503 if byte_to_add in byte_lines or byte_to_add == -1: |
580 if not chunk.first: |
504 continue |
581 continue |
505 |
582 |
506 # Which lines does this chunk lead to? |
583 chunks_considered = set() |
507 bytes_considered = set() |
584 chunks_to_consider = [chunk] |
508 bytes_to_consider = [byte_to_add] |
585 while chunks_to_consider: |
509 lines = set() |
586 # Get the chunk we're considering, and make sure we don't |
510 |
587 # consider it again |
511 while bytes_to_consider: |
588 this_chunk = chunks_to_consider.pop() |
512 byte = bytes_to_consider.pop() |
589 chunks_considered.add(this_chunk) |
513 bytes_considered.add(byte) |
590 |
514 |
591 # For each exit, add the line number if the trace function |
515 # Find chunk for byte |
592 # would be triggered, or add the chunk to those being |
516 try: |
593 # considered if not. |
517 ch = byte_chunks[byte] |
594 for ex in this_chunk.exits: |
518 except KeyError: |
595 if ex < 0: |
519 for ch in chunks: |
596 yield (chunk.line, ex) |
520 if ch.byte <= byte < ch.byte+ch.length: |
|
521 break |
|
522 else: |
597 else: |
523 # No chunk for this byte! |
598 next_chunk = byte_chunks[ex] |
524 raise Exception("Couldn't find chunk @ %d" % byte) |
599 if next_chunk in chunks_considered: |
525 byte_chunks[byte] = ch |
600 continue |
526 |
601 |
527 if ch.line: |
602 # The trace function is invoked if visiting the first |
528 lines.add(ch.line) |
603 # bytecode in a line, or if the transition is a |
529 else: |
604 # backward jump. |
530 for ex in ch.exits: |
605 backward_jump = next_chunk.byte < this_chunk.byte |
531 if ex == -1: |
606 if next_chunk.first or backward_jump: |
532 lines.add(-1) |
607 if next_chunk.line != chunk.line: |
533 elif ex not in bytes_considered: |
608 yield (chunk.line, next_chunk.line) |
534 bytes_to_consider.append(ex) |
609 else: |
535 |
610 chunks_to_consider.append(next_chunk) |
536 bytes_to_add.update(ch.exits) |
|
537 |
|
538 byte_lines[byte_to_add] = lines |
|
539 |
|
540 # Figure out for each chunk where the exits go. |
|
541 arcs = set() |
|
542 for chunk in chunks: |
|
543 if chunk.line: |
|
544 for ex in chunk.exits: |
|
545 for exit_line in byte_lines[ex]: |
|
546 if chunk.line != exit_line: |
|
547 arcs.add((chunk.line, exit_line)) |
|
548 for line in byte_lines[0]: |
|
549 arcs.add((-1, line)) |
|
550 |
|
551 return arcs |
|
552 |
611 |
553 def _all_chunks(self): |
612 def _all_chunks(self): |
554 """Returns a list of `Chunk` objects for this code and its children. |
613 """Returns a list of `Chunk` objects for this code and its children. |
555 |
614 |
556 See `_split_into_chunks` for details. |
615 See `_split_into_chunks` for details. |
574 |
633 |
575 return arcs |
634 return arcs |
576 |
635 |
577 |
636 |
578 class Chunk(object): |
637 class Chunk(object): |
579 """A sequence of bytecodes with a single entrance. |
638 """A sequence of byte codes with a single entrance. |
580 |
639 |
581 To analyze byte code, we have to divide it into chunks, sequences of byte |
640 To analyze byte code, we have to divide it into chunks, sequences of byte |
582 codes such that each basic block has only one entrance, the first |
641 codes such that each chunk has only one entrance, the first instruction in |
583 instruction in the block. |
642 the block. |
584 |
643 |
585 This is almost the CS concept of `basic block`_, except that we're willing |
644 This is almost the CS concept of `basic block`_, except that we're willing |
586 to have many exits from a chunk, and "basic block" is a more cumbersome |
645 to have many exits from a chunk, and "basic block" is a more cumbersome |
587 term. |
646 term. |
588 |
647 |
589 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
648 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
590 |
649 |
591 An exit of -1 means the chunk can leave the code (return). |
650 `line` is the source line number containing this chunk. |
|
651 |
|
652 `first` is true if this is the first chunk in the source line. |
|
653 |
|
654 An exit < 0 means the chunk can leave the code (return). The exit is |
|
655 the negative of the starting line number of the code block. |
592 |
656 |
593 """ |
657 """ |
594 def __init__(self, byte, line=0): |
658 def __init__(self, byte, line, first): |
595 self.byte = byte |
659 self.byte = byte |
596 self.line = line |
660 self.line = line |
|
661 self.first = first |
597 self.length = 0 |
662 self.length = 0 |
598 self.exits = set() |
663 self.exits = set() |
599 |
664 |
600 def __repr__(self): |
665 def __repr__(self): |
601 return "<%d+%d @%d %r>" % ( |
666 if self.first: |
602 self.byte, self.length, self.line, list(self.exits) |
667 bang = "!" |
|
668 else: |
|
669 bang = "" |
|
670 return "<%d+%d @%d%s %r>" % ( |
|
671 self.byte, self.length, self.line, bang, list(self.exits) |
603 ) |
672 ) |
604 |
673 |
605 |
674 |
606 class AdHocMain(object): # pragma: no cover |
675 class CachedTokenizer(object): |
607 """An ad-hoc main for code parsing experiments.""" |
676 """A one-element cache around tokenize.generate_tokens. |
608 |
677 |
609 def main(self, args): |
678 When reporting, coverage.py tokenizes files twice, once to find the |
610 """A main function for trying the code from the command line.""" |
679 structure of the file, and once to syntax-color it. Tokenizing is |
611 |
680 expensive, and easily cached. |
612 from optparse import OptionParser |
681 |
613 |
682 This is a one-element cache so that our twice-in-a-row tokenizing doesn't |
614 parser = OptionParser() |
683 actually tokenize twice. |
615 parser.add_option( |
684 |
616 "-c", action="store_true", dest="chunks", |
685 """ |
617 help="Show basic block chunks" |
686 def __init__(self): |
|
687 self.last_text = None |
|
688 self.last_tokens = None |
|
689 |
|
690 def generate_tokens(self, text): |
|
691 """A stand-in for `tokenize.generate_tokens`.""" |
|
692 if text != self.last_text: |
|
693 self.last_text = text |
|
694 self.last_tokens = list( |
|
695 tokenize.generate_tokens(StringIO(text).readline) |
618 ) |
696 ) |
619 parser.add_option( |
697 return self.last_tokens |
620 "-d", action="store_true", dest="dis", |
698 |
621 help="Disassemble" |
699 # Create our generate_tokens cache as a callable replacement function. |
622 ) |
700 generate_tokens = CachedTokenizer().generate_tokens |
623 parser.add_option( |
|
624 "-R", action="store_true", dest="recursive", |
|
625 help="Recurse to find source files" |
|
626 ) |
|
627 parser.add_option( |
|
628 "-s", action="store_true", dest="source", |
|
629 help="Show analyzed source" |
|
630 ) |
|
631 parser.add_option( |
|
632 "-t", action="store_true", dest="tokens", |
|
633 help="Show tokens" |
|
634 ) |
|
635 |
|
636 options, args = parser.parse_args() |
|
637 if options.recursive: |
|
638 if args: |
|
639 root = args[0] |
|
640 else: |
|
641 root = "." |
|
642 for root, _, _ in os.walk(root): |
|
643 for f in glob.glob(root + "/*.py"): |
|
644 self.adhoc_one_file(options, f) |
|
645 else: |
|
646 self.adhoc_one_file(options, args[0]) |
|
647 |
|
648 def adhoc_one_file(self, options, filename): |
|
649 """Process just one file.""" |
|
650 |
|
651 if options.dis or options.chunks: |
|
652 try: |
|
653 bp = ByteParser(filename=filename) |
|
654 except CoverageException: |
|
655 _, err, _ = sys.exc_info() |
|
656 print("%s" % (err,)) |
|
657 return |
|
658 |
|
659 if options.dis: |
|
660 print("Main code:") |
|
661 bp._disassemble() |
|
662 |
|
663 if options.chunks: |
|
664 chunks = bp._all_chunks() |
|
665 if options.recursive: |
|
666 print("%6d: %s" % (len(chunks), filename)) |
|
667 else: |
|
668 print("Chunks: %r" % chunks) |
|
669 arcs = bp._all_arcs() |
|
670 print("Arcs: %r" % sorted(arcs)) |
|
671 |
|
672 if options.source or options.tokens: |
|
673 cp = CodeParser(filename=filename, exclude=r"no\s*cover") |
|
674 cp.show_tokens = options.tokens |
|
675 cp._raw_parse() |
|
676 |
|
677 if options.source: |
|
678 if options.chunks: |
|
679 arc_width, arc_chars = self.arc_ascii_art(arcs) |
|
680 else: |
|
681 arc_width, arc_chars = 0, {} |
|
682 |
|
683 exit_counts = cp.exit_counts() |
|
684 |
|
685 for i, ltext in enumerate(cp.lines): |
|
686 lineno = i+1 |
|
687 m0 = m1 = m2 = m3 = a = ' ' |
|
688 if lineno in cp.statement_starts: |
|
689 m0 = '-' |
|
690 exits = exit_counts.get(lineno, 0) |
|
691 if exits > 1: |
|
692 m1 = str(exits) |
|
693 if lineno in cp.docstrings: |
|
694 m2 = '"' |
|
695 if lineno in cp.classdefs: |
|
696 m2 = 'C' |
|
697 if lineno in cp.excluded: |
|
698 m3 = 'x' |
|
699 a = arc_chars.get(lineno, '').ljust(arc_width) |
|
700 print("%4d %s%s%s%s%s %s" % |
|
701 (lineno, m0, m1, m2, m3, a, ltext) |
|
702 ) |
|
703 |
|
704 def arc_ascii_art(self, arcs): |
|
705 """Draw arcs as ascii art. |
|
706 |
|
707 Returns a width of characters needed to draw all the arcs, and a |
|
708 dictionary mapping line numbers to ascii strings to draw for that line. |
|
709 |
|
710 """ |
|
711 arc_chars = {} |
|
712 for lfrom, lto in sorted(arcs): |
|
713 if lfrom == -1: |
|
714 arc_chars[lto] = arc_chars.get(lto, '') + 'v' |
|
715 elif lto == -1: |
|
716 arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' |
|
717 else: |
|
718 if lfrom == lto-1: |
|
719 # Don't show obvious arcs. |
|
720 continue |
|
721 if lfrom < lto: |
|
722 l1, l2 = lfrom, lto |
|
723 else: |
|
724 l1, l2 = lto, lfrom |
|
725 w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) |
|
726 for l in range(l1, l2+1): |
|
727 if l == lfrom: |
|
728 ch = '<' |
|
729 elif l == lto: |
|
730 ch = '>' |
|
731 else: |
|
732 ch = '|' |
|
733 arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch |
|
734 arc_width = 0 |
|
735 |
|
736 if arc_chars: |
|
737 arc_width = max([len(a) for a in arc_chars.values()]) |
|
738 else: |
|
739 arc_width = 0 |
|
740 |
|
741 return arc_width, arc_chars |
|
742 |
|
743 if __name__ == '__main__': |
|
744 AdHocMain().main(sys.argv[1:]) |
|
745 |
|
746 # |
|
747 # eflag: FileType = Python2 |
|