DebugClients/Python/coverage/parser.py

branch
Py2 comp.
changeset 3495
fac17a82b431
parent 790
2c0ea0163ef4
child 3499
f2d4b02c7e88
equal deleted inserted replaced
3485:f1cbc18f88b2 3495:fac17a82b431
1 """Code parsing for Coverage.""" 1 """Code parsing for Coverage."""
2 2
3 import glob, opcode, os, re, sys, token, tokenize 3 import dis, re, sys, token, tokenize
4 4
5 from .backward import set, sorted, StringIO # pylint: disable-msg=W0622 5 from .backward import set, sorted, StringIO # pylint: disable=W0622
6 from .backward import open_source, range # pylint: disable=W0622
7 from .backward import reversed # pylint: disable=W0622
8 from .backward import bytes_to_ints
6 from .bytecode import ByteCodes, CodeObjects 9 from .bytecode import ByteCodes, CodeObjects
7 from .misc import nice_pair, CoverageException, NoSource, expensive 10 from .misc import nice_pair, expensive, join_regex
11 from .misc import CoverageException, NoSource, NotPython
8 12
9 13
10 class CodeParser(object): 14 class CodeParser(object):
11 """Parse code to find executable lines, excluded lines, etc.""" 15 """Parse code to find executable lines, excluded lines, etc."""
12 16
13 def __init__(self, text=None, filename=None, exclude=None): 17 def __init__(self, text=None, filename=None, exclude=None):
14 """ 18 """
15 Source can be provided as `text`, the text itself, or `filename`, from 19 Source can be provided as `text`, the text itself, or `filename`, from
16 which text will be read. Excluded lines are those that match 20 which the text will be read. Excluded lines are those that match
17 `exclude`, a regex. 21 `exclude`, a regex.
18 22
19 """ 23 """
20 assert text or filename, "CodeParser needs either text or filename" 24 assert text or filename, "CodeParser needs either text or filename"
21 self.filename = filename or "<code>" 25 self.filename = filename or "<code>"
22 self.text = text 26 self.text = text
23 if not self.text: 27 if not self.text:
24 try: 28 try:
25 sourcef = open(self.filename, 'rU') 29 sourcef = open_source(self.filename)
26 self.text = sourcef.read() 30 try:
27 sourcef.close() 31 self.text = sourcef.read()
32 finally:
33 sourcef.close()
28 except IOError: 34 except IOError:
29 _, err, _ = sys.exc_info() 35 _, err, _ = sys.exc_info()
30 raise NoSource( 36 raise NoSource(
31 "No source for code: %r: %s" % (self.filename, err) 37 "No source for code: '%s': %s" % (self.filename, err)
32 ) 38 )
33 self.text = self.text.replace('\r\n', '\n') 39
40 # Scrap the BOM if it exists.
41 if self.text and ord(self.text[0]) == 0xfeff:
42 self.text = self.text[1:]
34 43
35 self.exclude = exclude 44 self.exclude = exclude
36 45
37 self.show_tokens = False 46 self.show_tokens = False
38 47
63 self._byte_parser = \ 72 self._byte_parser = \
64 ByteParser(text=self.text, filename=self.filename) 73 ByteParser(text=self.text, filename=self.filename)
65 return self._byte_parser 74 return self._byte_parser
66 byte_parser = property(_get_byte_parser) 75 byte_parser = property(_get_byte_parser)
67 76
77 def lines_matching(self, *regexes):
78 """Find the lines matching one of a list of regexes.
79
80 Returns a set of line numbers, the lines that contain a match for one
81 of the regexes in `regexes`. The entire line needn't match, just a
82 part of it.
83
84 """
85 regex_c = re.compile(join_regex(regexes))
86 matches = set()
87 for i, ltext in enumerate(self.lines):
88 if regex_c.search(ltext):
89 matches.add(i+1)
90 return matches
91
68 def _raw_parse(self): 92 def _raw_parse(self):
69 """Parse the source to find the interesting facts about its lines. 93 """Parse the source to find the interesting facts about its lines.
70 94
71 A handful of member fields are updated. 95 A handful of member fields are updated.
72 96
73 """ 97 """
74 # Find lines which match an exclusion pattern. 98 # Find lines which match an exclusion pattern.
75 if self.exclude: 99 if self.exclude:
76 re_exclude = re.compile(self.exclude) 100 self.excluded = self.lines_matching(self.exclude)
77 for i, ltext in enumerate(self.lines):
78 if re_exclude.search(ltext):
79 self.excluded.add(i+1)
80 101
81 # Tokenize, to find excluded suites, to find docstrings, and to find 102 # Tokenize, to find excluded suites, to find docstrings, and to find
82 # multi-line statements. 103 # multi-line statements.
83 indent = 0 104 indent = 0
84 exclude_indent = 0 105 exclude_indent = 0
85 excluding = False 106 excluding = False
86 prev_toktype = token.INDENT 107 prev_toktype = token.INDENT
87 first_line = None 108 first_line = None
88 109 empty = True
89 tokgen = tokenize.generate_tokens(StringIO(self.text).readline) 110
111 tokgen = generate_tokens(self.text)
90 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: 112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
91 if self.show_tokens: # pragma: no cover 113 if self.show_tokens: # pragma: not covered
92 print("%10s %5s %-20r %r" % ( 114 print("%10s %5s %-20r %r" % (
93 tokenize.tok_name.get(toktype, toktype), 115 tokenize.tok_name.get(toktype, toktype),
94 nice_pair((slineno, elineno)), ttext, ltext 116 nice_pair((slineno, elineno)), ttext, ltext
95 )) 117 ))
96 if toktype == token.INDENT: 118 if toktype == token.INDENT:
112 elif toktype == token.STRING and prev_toktype == token.INDENT: 134 elif toktype == token.STRING and prev_toktype == token.INDENT:
113 # Strings that are first on an indented line are docstrings. 135 # Strings that are first on an indented line are docstrings.
114 # (a trick from trace.py in the stdlib.) This works for 136 # (a trick from trace.py in the stdlib.) This works for
115 # 99.9999% of cases. For the rest (!) see: 137 # 99.9999% of cases. For the rest (!) see:
116 # http://stackoverflow.com/questions/1769332/x/1769794#1769794 138 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
117 for i in range(slineno, elineno+1): 139 self.docstrings.update(range(slineno, elineno+1))
118 self.docstrings.add(i)
119 elif toktype == token.NEWLINE: 140 elif toktype == token.NEWLINE:
120 if first_line is not None and elineno != first_line: 141 if first_line is not None and elineno != first_line:
121 # We're at the end of a line, and we've ended on a 142 # We're at the end of a line, and we've ended on a
122 # different line than the first line of the statement, 143 # different line than the first line of the statement,
123 # so record a multi-line range. 144 # so record a multi-line range.
126 self.multiline[l] = rng 147 self.multiline[l] = rng
127 first_line = None 148 first_line = None
128 149
129 if ttext.strip() and toktype != tokenize.COMMENT: 150 if ttext.strip() and toktype != tokenize.COMMENT:
130 # A non-whitespace token. 151 # A non-whitespace token.
152 empty = False
131 if first_line is None: 153 if first_line is None:
132 # The token is not whitespace, and is the first in a 154 # The token is not whitespace, and is the first in a
133 # statement. 155 # statement.
134 first_line = slineno 156 first_line = slineno
135 # Check whether to end an excluded suite. 157 # Check whether to end an excluded suite.
139 self.excluded.add(elineno) 161 self.excluded.add(elineno)
140 162
141 prev_toktype = toktype 163 prev_toktype = toktype
142 164
143 # Find the starts of the executable statements. 165 # Find the starts of the executable statements.
144 self.statement_starts.update(self.byte_parser._find_statements()) 166 if not empty:
167 self.statement_starts.update(self.byte_parser._find_statements())
145 168
146 def first_line(self, line): 169 def first_line(self, line):
147 """Return the first line number of the statement including `line`.""" 170 """Return the first line number of the statement including `line`."""
148 rng = self.multiline.get(line) 171 rng = self.multiline.get(line)
149 if rng: 172 if rng:
150 first_line = rng[0] 173 first_line = rng[0]
151 else: 174 else:
152 first_line = line 175 first_line = line
153 return first_line 176 return first_line
154 177
155 def first_lines(self, lines, ignore=None): 178 def first_lines(self, lines, *ignores):
156 """Map the line numbers in `lines` to the correct first line of the 179 """Map the line numbers in `lines` to the correct first line of the
157 statement. 180 statement.
158 181
159 Skip any line mentioned in `ignore`. 182 Skip any line mentioned in any of the sequences in `ignores`.
160 183
161 Returns a sorted list of the first lines. 184 Returns a set of the first lines.
162 185
163 """ 186 """
164 ignore = ignore or [] 187 ignore = set()
188 for ign in ignores:
189 ignore.update(ign)
165 lset = set() 190 lset = set()
166 for l in lines: 191 for l in lines:
167 if l in ignore: 192 if l in ignore:
168 continue 193 continue
169 new_l = self.first_line(l) 194 new_l = self.first_line(l)
170 if new_l not in ignore: 195 if new_l not in ignore:
171 lset.add(new_l) 196 lset.add(new_l)
172 return sorted(lset) 197 return lset
173 198
174 def parse_source(self): 199 def parse_source(self):
175 """Parse source text to find executable lines, excluded lines, etc. 200 """Parse source text to find executable lines, excluded lines, etc.
176 201
177 Return values are 1) a sorted list of executable line numbers, and 202 Return values are 1) a set of executable line numbers, and 2) a set of
178 2) a sorted list of excluded line numbers. 203 excluded line numbers.
179 204
180 Reported line numbers are normalized to the first line of multi-line 205 Reported line numbers are normalized to the first line of multi-line
181 statements. 206 statements.
182 207
183 """ 208 """
184 self._raw_parse() 209 try:
210 self._raw_parse()
211 except (tokenize.TokenError, IndentationError):
212 _, tokerr, _ = sys.exc_info()
213 msg, lineno = tokerr.args
214 raise NotPython(
215 "Couldn't parse '%s' as Python source: '%s' at %s" %
216 (self.filename, msg, lineno)
217 )
185 218
186 excluded_lines = self.first_lines(self.excluded) 219 excluded_lines = self.first_lines(self.excluded)
187 ignore = excluded_lines + list(self.docstrings) 220 lines = self.first_lines(
188 lines = self.first_lines(self.statement_starts, ignore) 221 self.statement_starts,
222 excluded_lines,
223 self.docstrings
224 )
189 225
190 return lines, excluded_lines 226 return lines, excluded_lines
191 227
192 def arcs(self): 228 def arcs(self):
193 """Get information about the arcs available in the code. 229 """Get information about the arcs available in the code.
212 248
213 """ 249 """
214 excluded_lines = self.first_lines(self.excluded) 250 excluded_lines = self.first_lines(self.excluded)
215 exit_counts = {} 251 exit_counts = {}
216 for l1, l2 in self.arcs(): 252 for l1, l2 in self.arcs():
217 if l1 == -1: 253 if l1 < 0:
218 # Don't ever report -1 as a line number 254 # Don't ever report -1 as a line number
219 continue 255 continue
220 if l1 in excluded_lines: 256 if l1 in excluded_lines:
221 # Don't report excluded lines as line numbers. 257 # Don't report excluded lines as line numbers.
222 continue 258 continue
238 274
239 275
240 ## Opcodes that guide the ByteParser. 276 ## Opcodes that guide the ByteParser.
241 277
242 def _opcode(name): 278 def _opcode(name):
243 """Return the opcode by name from the opcode module.""" 279 """Return the opcode by name from the dis module."""
244 return opcode.opmap[name] 280 return dis.opmap[name]
245 281
246 def _opcode_set(*names): 282 def _opcode_set(*names):
247 """Return a set of opcodes by the names in `names`.""" 283 """Return a set of opcodes by the names in `names`."""
248 return set([_opcode(name) for name in names]) 284 s = set()
285 for name in names:
286 try:
287 s.add(_opcode(name))
288 except KeyError:
289 pass
290 return s
249 291
250 # Opcodes that leave the code object. 292 # Opcodes that leave the code object.
251 OPS_CODE_END = _opcode_set('RETURN_VALUE') 293 OPS_CODE_END = _opcode_set('RETURN_VALUE')
252 294
253 # Opcodes that unconditionally end the code chunk. 295 # Opcodes that unconditionally end the code chunk.
254 OPS_CHUNK_END = _opcode_set( 296 OPS_CHUNK_END = _opcode_set(
255 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', 297 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
256 'BREAK_LOOP', 'CONTINUE_LOOP', 298 'BREAK_LOOP', 'CONTINUE_LOOP',
257 ) 299 )
258 300
301 # Opcodes that unconditionally begin a new code chunk. By starting new chunks
302 # with unconditional jump instructions, we neatly deal with jumps to jumps
303 # properly.
304 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
305
259 # Opcodes that push a block on the block stack. 306 # Opcodes that push a block on the block stack.
260 OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY') 307 OPS_PUSH_BLOCK = _opcode_set(
308 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
309 )
261 310
262 # Block types for exception handling. 311 # Block types for exception handling.
263 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') 312 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
264 313
265 # Opcodes that pop a block from the block stack. 314 # Opcodes that pop a block from the block stack.
266 OPS_POP_BLOCK = _opcode_set('POP_BLOCK') 315 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
267 316
268 # Opcodes that have a jump destination, but aren't really a jump. 317 # Opcodes that have a jump destination, but aren't really a jump.
269 OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') 318 OPS_NO_JUMP = OPS_PUSH_BLOCK
270 319
271 # Individual opcodes we need below. 320 # Individual opcodes we need below.
272 OP_BREAK_LOOP = _opcode('BREAK_LOOP') 321 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
273 OP_END_FINALLY = _opcode('END_FINALLY') 322 OP_END_FINALLY = _opcode('END_FINALLY')
274 OP_COMPARE_OP = _opcode('COMPARE_OP') 323 OP_COMPARE_OP = _opcode('COMPARE_OP')
281 """Parse byte codes to understand the structure of code.""" 330 """Parse byte codes to understand the structure of code."""
282 331
283 def __init__(self, code=None, text=None, filename=None): 332 def __init__(self, code=None, text=None, filename=None):
284 if code: 333 if code:
285 self.code = code 334 self.code = code
335 self.text = text
286 else: 336 else:
287 if not text: 337 if not text:
288 assert filename, "If no code or text, need a filename" 338 assert filename, "If no code or text, need a filename"
289 sourcef = open(filename, 'rU') 339 sourcef = open_source(filename)
290 text = sourcef.read() 340 try:
291 sourcef.close() 341 text = sourcef.read()
342 finally:
343 sourcef.close()
344 self.text = text
292 345
293 try: 346 try:
294 # Python 2.3 and 2.4 don't like partial last lines, so be sure 347 # Python 2.3 and 2.4 don't like partial last lines, so be sure
295 # the text ends nicely for them. 348 # the text ends nicely for them.
296 self.code = compile(text + '\n', filename, "exec") 349 self.code = compile(text + '\n', filename, "exec")
297 except SyntaxError: 350 except SyntaxError:
298 _, synerr, _ = sys.exc_info() 351 _, synerr, _ = sys.exc_info()
299 raise CoverageException( 352 raise NotPython(
300 "Couldn't parse '%s' as Python source: '%s' at line %d" % 353 "Couldn't parse '%s' as Python source: '%s' at line %d" %
301 (filename, synerr.msg, synerr.lineno) 354 (filename, synerr.msg, synerr.lineno)
302 ) 355 )
303 356
357 # Alternative Python implementations don't always provide all the
358 # attributes on code objects that we need to do the analysis.
359 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
360 if not hasattr(self.code, attr):
361 raise CoverageException(
362 "This implementation of Python doesn't support code "
363 "analysis.\n"
364 "Run coverage.py under CPython for this command."
365 )
366
304 def child_parsers(self): 367 def child_parsers(self):
305 """Iterate over all the code objects nested within this one. 368 """Iterate over all the code objects nested within this one.
306 369
307 The iteration includes `self` as its first value. 370 The iteration includes `self` as its first value.
308 371
309 """ 372 """
310 return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) 373 children = CodeObjects(self.code)
311 374 return [ByteParser(code=c, text=self.text) for c in children]
312 # Getting numbers from the lnotab value changed in Py3.0.
313 if sys.hexversion >= 0x03000000:
314 def _lnotab_increments(self, lnotab):
315 """Return a list of ints from the lnotab bytes in 3.x"""
316 return list(lnotab)
317 else:
318 def _lnotab_increments(self, lnotab):
319 """Return a list of ints from the lnotab string in 2.x"""
320 return [ord(c) for c in lnotab]
321 375
322 def _bytes_lines(self): 376 def _bytes_lines(self):
323 """Map byte offsets to line numbers in `code`. 377 """Map byte offsets to line numbers in `code`.
324 378
325 Uses co_lnotab described in Python/compile.c to map byte offsets to 379 Uses co_lnotab described in Python/compile.c to map byte offsets to
326 line numbers. Returns a list: [(b0, l0), (b1, l1), ...] 380 line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
381
382 Only byte offsets that correspond to line numbers are included in the
383 results.
327 384
328 """ 385 """
329 # Adapted from dis.py in the standard library. 386 # Adapted from dis.py in the standard library.
330 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) 387 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
331 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) 388 line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
332 389
333 bytes_lines = []
334 last_line_num = None 390 last_line_num = None
335 line_num = self.code.co_firstlineno 391 line_num = self.code.co_firstlineno
336 byte_num = 0 392 byte_num = 0
337 for byte_incr, line_incr in zip(byte_increments, line_increments): 393 for byte_incr, line_incr in zip(byte_increments, line_increments):
338 if byte_incr: 394 if byte_incr:
339 if line_num != last_line_num: 395 if line_num != last_line_num:
340 bytes_lines.append((byte_num, line_num)) 396 yield (byte_num, line_num)
341 last_line_num = line_num 397 last_line_num = line_num
342 byte_num += byte_incr 398 byte_num += byte_incr
343 line_num += line_incr 399 line_num += line_incr
344 if line_num != last_line_num: 400 if line_num != last_line_num:
345 bytes_lines.append((byte_num, line_num)) 401 yield (byte_num, line_num)
346 return bytes_lines
347 402
348 def _find_statements(self): 403 def _find_statements(self):
349 """Find the statements in `self.code`. 404 """Find the statements in `self.code`.
350 405
351 Return a set of line numbers that start statements. Recurses into all 406 Produce a sequence of line numbers that start statements. Recurses
352 code objects reachable from `self.code`. 407 into all code objects reachable from `self.code`.
353 408
354 """ 409 """
355 stmts = set()
356 for bp in self.child_parsers(): 410 for bp in self.child_parsers():
357 # Get all of the lineno information from this code. 411 # Get all of the lineno information from this code.
358 for _, l in bp._bytes_lines(): 412 for _, l in bp._bytes_lines():
359 stmts.add(l) 413 yield l
360 return stmts 414
361 415 def _block_stack_repr(self, block_stack):
362 def _disassemble(self): # pragma: no cover 416 """Get a string version of `block_stack`, for debugging."""
363 """Disassemble code, for ad-hoc experimenting.""" 417 blocks = ", ".join(
364 418 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
365 import dis 419 )
366 420 return "[" + blocks + "]"
367 for bp in self.child_parsers():
368 print("\n%s: " % bp.code)
369 dis.dis(bp.code)
370 print("Bytes lines: %r" % bp._bytes_lines())
371
372 print("")
373 421
374 def _split_into_chunks(self): 422 def _split_into_chunks(self):
375 """Split the code object into a list of `Chunk` objects. 423 """Split the code object into a list of `Chunk` objects.
376 424
377 Each chunk is only entered at its first instruction, though there can 425 Each chunk is only entered at its first instruction, though there can
378 be many exits from a chunk. 426 be many exits from a chunk.
379 427
380 Returns a list of `Chunk` objects. 428 Returns a list of `Chunk` objects.
381 429
382 """ 430 """
383
384 # The list of chunks so far, and the one we're working on. 431 # The list of chunks so far, and the one we're working on.
385 chunks = [] 432 chunks = []
386 chunk = None 433 chunk = None
434
435 # A dict mapping byte offsets of line starts to the line numbers.
387 bytes_lines_map = dict(self._bytes_lines()) 436 bytes_lines_map = dict(self._bytes_lines())
388 437
389 # The block stack: loops and try blocks get pushed here for the 438 # The block stack: loops and try blocks get pushed here for the
390 # implicit jumps that can occur. 439 # implicit jumps that can occur.
391 # Each entry is a tuple: (block type, destination) 440 # Each entry is a tuple: (block type, destination)
396 ignore_branch = 0 445 ignore_branch = 0
397 446
398 # We have to handle the last two bytecodes specially. 447 # We have to handle the last two bytecodes specially.
399 ult = penult = None 448 ult = penult = None
400 449
401 for bc in ByteCodes(self.code.co_code): 450 # Get a set of all of the jump-to points.
402 # Maybe have to start a new block 451 jump_to = set()
452 bytecodes = list(ByteCodes(self.code.co_code))
453 for bc in bytecodes:
454 if bc.jump_to >= 0:
455 jump_to.add(bc.jump_to)
456
457 chunk_lineno = 0
458
459 # Walk the byte codes building chunks.
460 for bc in bytecodes:
461 # Maybe have to start a new chunk
462 start_new_chunk = False
463 first_chunk = False
403 if bc.offset in bytes_lines_map: 464 if bc.offset in bytes_lines_map:
465 # Start a new chunk for each source line number.
466 start_new_chunk = True
467 chunk_lineno = bytes_lines_map[bc.offset]
468 first_chunk = True
469 elif bc.offset in jump_to:
470 # To make chunks have a single entrance, we have to make a new
471 # chunk when we get to a place some bytecode jumps to.
472 start_new_chunk = True
473 elif bc.op in OPS_CHUNK_BEGIN:
474 # Jumps deserve their own unnumbered chunk. This fixes
475 # problems with jumps to jumps getting confused.
476 start_new_chunk = True
477
478 if not chunk or start_new_chunk:
404 if chunk: 479 if chunk:
405 chunk.exits.add(bc.offset) 480 chunk.exits.add(bc.offset)
406 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) 481 chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
407 chunks.append(chunk)
408
409 if not chunk:
410 chunk = Chunk(bc.offset)
411 chunks.append(chunk) 482 chunks.append(chunk)
412 483
413 # Look at the opcode 484 # Look at the opcode
414 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP: 485 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
415 if ignore_branch: 486 if ignore_branch:
419 # The opcode has a jump, it's an exit for this chunk. 490 # The opcode has a jump, it's an exit for this chunk.
420 chunk.exits.add(bc.jump_to) 491 chunk.exits.add(bc.jump_to)
421 492
422 if bc.op in OPS_CODE_END: 493 if bc.op in OPS_CODE_END:
423 # The opcode can exit the code object. 494 # The opcode can exit the code object.
424 chunk.exits.add(-1) 495 chunk.exits.add(-self.code.co_firstlineno)
425 if bc.op in OPS_PUSH_BLOCK: 496 if bc.op in OPS_PUSH_BLOCK:
426 # The opcode adds a block to the block_stack. 497 # The opcode adds a block to the block_stack.
427 block_stack.append((bc.op, bc.jump_to)) 498 block_stack.append((bc.op, bc.jump_to))
428 if bc.op in OPS_POP_BLOCK: 499 if bc.op in OPS_POP_BLOCK:
429 # The opcode pops a block from the block stack. 500 # The opcode pops a block from the block stack.
434 # A break is implicit: jump where the top of the 505 # A break is implicit: jump where the top of the
435 # block_stack points. 506 # block_stack points.
436 chunk.exits.add(block_stack[-1][1]) 507 chunk.exits.add(block_stack[-1][1])
437 chunk = None 508 chunk = None
438 if bc.op == OP_END_FINALLY: 509 if bc.op == OP_END_FINALLY:
439 if block_stack:
440 # A break that goes through a finally will jump to whatever
441 # block is on top of the stack.
442 chunk.exits.add(block_stack[-1][1])
443 # For the finally clause we need to find the closest exception 510 # For the finally clause we need to find the closest exception
444 # block, and use its jump target as an exit. 511 # block, and use its jump target as an exit.
445 for iblock in range(len(block_stack)-1, -1, -1): 512 for block in reversed(block_stack):
446 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: 513 if block[0] in OPS_EXCEPT_BLOCKS:
447 chunk.exits.add(block_stack[iblock][1]) 514 chunk.exits.add(block[1])
448 break 515 break
449 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: 516 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
450 # This is an except clause. We want to overlook the next 517 # This is an except clause. We want to overlook the next
451 # branch, so that except's don't count as branches. 518 # branch, so that except's don't count as branches.
452 ignore_branch += 1 519 ignore_branch += 1
453 520
454 penult = ult 521 penult = ult
455 ult = bc 522 ult = bc
456
457 523
458 if chunks: 524 if chunks:
459 # The last two bytecodes could be a dummy "return None" that 525 # The last two bytecodes could be a dummy "return None" that
460 # shouldn't be counted as real code. Every Python code object seems 526 # shouldn't be counted as real code. Every Python code object seems
461 # to end with a return, and a "return None" is inserted if there 527 # to end with a return, and a "return None" is inserted if there
464 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE: 530 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
465 if self.code.co_consts[penult.arg] is None: 531 if self.code.co_consts[penult.arg] is None:
466 # This is "return None", but is it dummy? A real line 532 # This is "return None", but is it dummy? A real line
467 # would be a last chunk all by itself. 533 # would be a last chunk all by itself.
468 if chunks[-1].byte != penult.offset: 534 if chunks[-1].byte != penult.offset:
535 ex = -self.code.co_firstlineno
469 # Split the last chunk 536 # Split the last chunk
470 last_chunk = chunks[-1] 537 last_chunk = chunks[-1]
471 last_chunk.exits.remove(-1) 538 last_chunk.exits.remove(ex)
472 last_chunk.exits.add(penult.offset) 539 last_chunk.exits.add(penult.offset)
473 chunk = Chunk(penult.offset) 540 chunk = Chunk(
474 chunk.exits.add(-1) 541 penult.offset, last_chunk.line, False
542 )
543 chunk.exits.add(ex)
475 chunks.append(chunk) 544 chunks.append(chunk)
476 545
477 # Give all the chunks a length. 546 # Give all the chunks a length.
478 chunks[-1].length = bc.next_offset - chunks[-1].byte 547 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
479 for i in range(len(chunks)-1): 548 for i in range(len(chunks)-1):
480 chunks[i].length = chunks[i+1].byte - chunks[i].byte 549 chunks[i].length = chunks[i+1].byte - chunks[i].byte
481 550
551 #self.validate_chunks(chunks)
482 return chunks 552 return chunks
553
554 def validate_chunks(self, chunks):
555 """Validate the rule that chunks have a single entrance."""
556 # starts is the entrances to the chunks
557 starts = set([ch.byte for ch in chunks])
558 for ch in chunks:
559 assert all([(ex in starts or ex < 0) for ex in ch.exits])
483 560
484 def _arcs(self): 561 def _arcs(self):
485 """Find the executable arcs in the code. 562 """Find the executable arcs in the code.
486 563
487 Returns a set of pairs, (from,to). From and to are integer line 564 Yields pairs: (from,to). From and to are integer line numbers. If
488 numbers. If from is -1, then the arc is an entrance into the code 565 from is < 0, then the arc is an entrance into the code object. If to
489 object. If to is -1, the arc is an exit from the code object. 566 is < 0, the arc is an exit from the code object.
490 567
491 """ 568 """
492 chunks = self._split_into_chunks() 569 chunks = self._split_into_chunks()
493 570
494 # A map from byte offsets to chunks jumped into. 571 # A map from byte offsets to chunks jumped into.
495 byte_chunks = dict([(c.byte, c) for c in chunks]) 572 byte_chunks = dict([(c.byte, c) for c in chunks])
496 573
497 # Build a map from byte offsets to actual lines reached. 574 # There's always an entrance at the first chunk.
498 byte_lines = {-1:[-1]} 575 yield (-1, byte_chunks[0].line)
499 bytes_to_add = set([c.byte for c in chunks]) 576
500 577 # Traverse from the first chunk in each line, and yield arcs where
501 while bytes_to_add: 578 # the trace function will be invoked.
502 byte_to_add = bytes_to_add.pop() 579 for chunk in chunks:
503 if byte_to_add in byte_lines or byte_to_add == -1: 580 if not chunk.first:
504 continue 581 continue
505 582
506 # Which lines does this chunk lead to? 583 chunks_considered = set()
507 bytes_considered = set() 584 chunks_to_consider = [chunk]
508 bytes_to_consider = [byte_to_add] 585 while chunks_to_consider:
509 lines = set() 586 # Get the chunk we're considering, and make sure we don't
510 587 # consider it again
511 while bytes_to_consider: 588 this_chunk = chunks_to_consider.pop()
512 byte = bytes_to_consider.pop() 589 chunks_considered.add(this_chunk)
513 bytes_considered.add(byte) 590
514 591 # For each exit, add the line number if the trace function
515 # Find chunk for byte 592 # would be triggered, or add the chunk to those being
516 try: 593 # considered if not.
517 ch = byte_chunks[byte] 594 for ex in this_chunk.exits:
518 except KeyError: 595 if ex < 0:
519 for ch in chunks: 596 yield (chunk.line, ex)
520 if ch.byte <= byte < ch.byte+ch.length:
521 break
522 else: 597 else:
523 # No chunk for this byte! 598 next_chunk = byte_chunks[ex]
524 raise Exception("Couldn't find chunk @ %d" % byte) 599 if next_chunk in chunks_considered:
525 byte_chunks[byte] = ch 600 continue
526 601
527 if ch.line: 602 # The trace function is invoked if visiting the first
528 lines.add(ch.line) 603 # bytecode in a line, or if the transition is a
529 else: 604 # backward jump.
530 for ex in ch.exits: 605 backward_jump = next_chunk.byte < this_chunk.byte
531 if ex == -1: 606 if next_chunk.first or backward_jump:
532 lines.add(-1) 607 if next_chunk.line != chunk.line:
533 elif ex not in bytes_considered: 608 yield (chunk.line, next_chunk.line)
534 bytes_to_consider.append(ex) 609 else:
535 610 chunks_to_consider.append(next_chunk)
536 bytes_to_add.update(ch.exits)
537
538 byte_lines[byte_to_add] = lines
539
540 # Figure out for each chunk where the exits go.
541 arcs = set()
542 for chunk in chunks:
543 if chunk.line:
544 for ex in chunk.exits:
545 for exit_line in byte_lines[ex]:
546 if chunk.line != exit_line:
547 arcs.add((chunk.line, exit_line))
548 for line in byte_lines[0]:
549 arcs.add((-1, line))
550
551 return arcs
552 611
553 def _all_chunks(self): 612 def _all_chunks(self):
554 """Returns a list of `Chunk` objects for this code and its children. 613 """Returns a list of `Chunk` objects for this code and its children.
555 614
556 See `_split_into_chunks` for details. 615 See `_split_into_chunks` for details.
574 633
575 return arcs 634 return arcs
576 635
577 636
578 class Chunk(object): 637 class Chunk(object):
579 """A sequence of bytecodes with a single entrance. 638 """A sequence of byte codes with a single entrance.
580 639
581 To analyze byte code, we have to divide it into chunks, sequences of byte 640 To analyze byte code, we have to divide it into chunks, sequences of byte
582 codes such that each basic block has only one entrance, the first 641 codes such that each chunk has only one entrance, the first instruction in
583 instruction in the block. 642 the block.
584 643
585 This is almost the CS concept of `basic block`_, except that we're willing 644 This is almost the CS concept of `basic block`_, except that we're willing
586 to have many exits from a chunk, and "basic block" is a more cumbersome 645 to have many exits from a chunk, and "basic block" is a more cumbersome
587 term. 646 term.
588 647
589 .. _basic block: http://en.wikipedia.org/wiki/Basic_block 648 .. _basic block: http://en.wikipedia.org/wiki/Basic_block
590 649
591 An exit of -1 means the chunk can leave the code (return). 650 `line` is the source line number containing this chunk.
651
652 `first` is true if this is the first chunk in the source line.
653
654 An exit < 0 means the chunk can leave the code (return). The exit is
655 the negative of the starting line number of the code block.
592 656
593 """ 657 """
594 def __init__(self, byte, line=0): 658 def __init__(self, byte, line, first):
595 self.byte = byte 659 self.byte = byte
596 self.line = line 660 self.line = line
661 self.first = first
597 self.length = 0 662 self.length = 0
598 self.exits = set() 663 self.exits = set()
599 664
600 def __repr__(self): 665 def __repr__(self):
601 return "<%d+%d @%d %r>" % ( 666 if self.first:
602 self.byte, self.length, self.line, list(self.exits) 667 bang = "!"
668 else:
669 bang = ""
670 return "<%d+%d @%d%s %r>" % (
671 self.byte, self.length, self.line, bang, list(self.exits)
603 ) 672 )
604 673
605 674
606 class AdHocMain(object): # pragma: no cover 675 class CachedTokenizer(object):
607 """An ad-hoc main for code parsing experiments.""" 676 """A one-element cache around tokenize.generate_tokens.
608 677
609 def main(self, args): 678 When reporting, coverage.py tokenizes files twice, once to find the
610 """A main function for trying the code from the command line.""" 679 structure of the file, and once to syntax-color it. Tokenizing is
611 680 expensive, and easily cached.
612 from optparse import OptionParser 681
613 682 This is a one-element cache so that our twice-in-a-row tokenizing doesn't
614 parser = OptionParser() 683 actually tokenize twice.
615 parser.add_option( 684
616 "-c", action="store_true", dest="chunks", 685 """
617 help="Show basic block chunks" 686 def __init__(self):
687 self.last_text = None
688 self.last_tokens = None
689
690 def generate_tokens(self, text):
691 """A stand-in for `tokenize.generate_tokens`."""
692 if text != self.last_text:
693 self.last_text = text
694 self.last_tokens = list(
695 tokenize.generate_tokens(StringIO(text).readline)
618 ) 696 )
619 parser.add_option( 697 return self.last_tokens
620 "-d", action="store_true", dest="dis", 698
621 help="Disassemble" 699 # Create our generate_tokens cache as a callable replacement function.
622 ) 700 generate_tokens = CachedTokenizer().generate_tokens
623 parser.add_option(
624 "-R", action="store_true", dest="recursive",
625 help="Recurse to find source files"
626 )
627 parser.add_option(
628 "-s", action="store_true", dest="source",
629 help="Show analyzed source"
630 )
631 parser.add_option(
632 "-t", action="store_true", dest="tokens",
633 help="Show tokens"
634 )
635
636 options, args = parser.parse_args()
637 if options.recursive:
638 if args:
639 root = args[0]
640 else:
641 root = "."
642 for root, _, _ in os.walk(root):
643 for f in glob.glob(root + "/*.py"):
644 self.adhoc_one_file(options, f)
645 else:
646 self.adhoc_one_file(options, args[0])
647
648 def adhoc_one_file(self, options, filename):
649 """Process just one file."""
650
651 if options.dis or options.chunks:
652 try:
653 bp = ByteParser(filename=filename)
654 except CoverageException:
655 _, err, _ = sys.exc_info()
656 print("%s" % (err,))
657 return
658
659 if options.dis:
660 print("Main code:")
661 bp._disassemble()
662
663 if options.chunks:
664 chunks = bp._all_chunks()
665 if options.recursive:
666 print("%6d: %s" % (len(chunks), filename))
667 else:
668 print("Chunks: %r" % chunks)
669 arcs = bp._all_arcs()
670 print("Arcs: %r" % sorted(arcs))
671
672 if options.source or options.tokens:
673 cp = CodeParser(filename=filename, exclude=r"no\s*cover")
674 cp.show_tokens = options.tokens
675 cp._raw_parse()
676
677 if options.source:
678 if options.chunks:
679 arc_width, arc_chars = self.arc_ascii_art(arcs)
680 else:
681 arc_width, arc_chars = 0, {}
682
683 exit_counts = cp.exit_counts()
684
685 for i, ltext in enumerate(cp.lines):
686 lineno = i+1
687 m0 = m1 = m2 = m3 = a = ' '
688 if lineno in cp.statement_starts:
689 m0 = '-'
690 exits = exit_counts.get(lineno, 0)
691 if exits > 1:
692 m1 = str(exits)
693 if lineno in cp.docstrings:
694 m2 = '"'
695 if lineno in cp.classdefs:
696 m2 = 'C'
697 if lineno in cp.excluded:
698 m3 = 'x'
699 a = arc_chars.get(lineno, '').ljust(arc_width)
700 print("%4d %s%s%s%s%s %s" %
701 (lineno, m0, m1, m2, m3, a, ltext)
702 )
703
704 def arc_ascii_art(self, arcs):
705 """Draw arcs as ascii art.
706
707 Returns a width of characters needed to draw all the arcs, and a
708 dictionary mapping line numbers to ascii strings to draw for that line.
709
710 """
711 arc_chars = {}
712 for lfrom, lto in sorted(arcs):
713 if lfrom == -1:
714 arc_chars[lto] = arc_chars.get(lto, '') + 'v'
715 elif lto == -1:
716 arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
717 else:
718 if lfrom == lto-1:
719 # Don't show obvious arcs.
720 continue
721 if lfrom < lto:
722 l1, l2 = lfrom, lto
723 else:
724 l1, l2 = lto, lfrom
725 w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)])
726 for l in range(l1, l2+1):
727 if l == lfrom:
728 ch = '<'
729 elif l == lto:
730 ch = '>'
731 else:
732 ch = '|'
733 arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch
734 arc_width = 0
735
736 if arc_chars:
737 arc_width = max([len(a) for a in arc_chars.values()])
738 else:
739 arc_width = 0
740
741 return arc_width, arc_chars
742
743 if __name__ == '__main__':
744 AdHocMain().main(sys.argv[1:])
745
746 #
747 # eflag: FileType = Python2

eric ide

mercurial