DebugClients/Python3/coverage/parser.py

changeset 29
391dc0bc4ae5
parent 0
de9c2efb9d02
child 3495
fac17a82b431
equal deleted inserted replaced
28:dde24fc7f7ba 29:391dc0bc4ae5
1 """Code parsing for Coverage.""" 1 """Code parsing for Coverage."""
2 2
3 import re, token, tokenize, types 3 import glob, opcode, os, re, sys, token, tokenize
4 import io as StringIO 4
5 5 from .backward import set, sorted, StringIO # pylint: disable-msg=W0622
6 from .misc import nice_pair, CoverageException 6 from .bytecode import ByteCodes, CodeObjects
7 from .backward import set # pylint: disable-msg=W0622 7 from .misc import nice_pair, CoverageException, NoSource, expensive
8 8
9 9
10 class CodeParser: 10 class CodeParser(object):
11 """Parse code to find executable lines, excluded lines, etc.""" 11 """Parse code to find executable lines, excluded lines, etc."""
12 12
13 def __init__(self, show_tokens=False): 13 def __init__(self, text=None, filename=None, exclude=None):
14 self.show_tokens = show_tokens 14 """
15 Source can be provided as `text`, the text itself, or `filename`, from
16 which text will be read. Excluded lines are those that match
17 `exclude`, a regex.
18
19 """
20 assert text or filename, "CodeParser needs either text or filename"
21 self.filename = filename or "<code>"
22 self.text = text
23 if not self.text:
24 try:
25 sourcef = open(self.filename, 'rU')
26 self.text = sourcef.read()
27 sourcef.close()
28 except IOError:
29 _, err, _ = sys.exc_info()
30 raise NoSource(
31 "No source for code: %r: %s" % (self.filename, err)
32 )
33 self.text = self.text.replace('\r\n', '\n')
34
35 self.exclude = exclude
36
37 self.show_tokens = False
15 38
16 # The text lines of the parsed code. 39 # The text lines of the parsed code.
17 self.lines = None 40 self.lines = self.text.split('\n')
18 41
19 # The line numbers of excluded lines of code. 42 # The line numbers of excluded lines of code.
20 self.excluded = set() 43 self.excluded = set()
21 44
22 # The line numbers of docstring lines. 45 # The line numbers of docstring lines.
23 self.docstrings = set() 46 self.docstrings = set()
24 47
48 # The line numbers of class definitions.
49 self.classdefs = set()
50
25 # A dict mapping line numbers to (lo,hi) for multi-line statements. 51 # A dict mapping line numbers to (lo,hi) for multi-line statements.
26 self.multiline = {} 52 self.multiline = {}
27 53
28 # The line numbers that start statements. 54 # The line numbers that start statements.
29 self.statement_starts = set() 55 self.statement_starts = set()
30 56
31 def find_statement_starts(self, code): 57 # Lazily-created ByteParser
32 """Find the starts of statements in compiled code. 58 self._byte_parser = None
33 59
34 Uses co_lnotab described in Python/compile.c to find line numbers that 60 def _get_byte_parser(self):
35 start statements, adding them to `self.statement_starts`. 61 """Create a ByteParser on demand."""
36 62 if not self._byte_parser:
37 """ 63 self._byte_parser = \
38 # Adapted from dis.py in the standard library. 64 ByteParser(text=self.text, filename=self.filename)
39 byte_increments = [ord(c) for c in code.co_lnotab[0::2]] 65 return self._byte_parser
40 line_increments = [ord(c) for c in code.co_lnotab[1::2]] 66 byte_parser = property(_get_byte_parser)
41 67
42 last_line_num = None 68 def _raw_parse(self):
43 line_num = code.co_firstlineno 69 """Parse the source to find the interesting facts about its lines.
44 for byte_incr, line_incr in zip(byte_increments, line_increments): 70
45 if byte_incr:
46 if line_num != last_line_num:
47 self.statement_starts.add(line_num)
48 last_line_num = line_num
49 line_num += line_incr
50 if line_num != last_line_num:
51 self.statement_starts.add(line_num)
52
53 def find_statements(self, code):
54 """Find the statements in `code`.
55
56 Update `self.statement_starts`, a set of line numbers that start
57 statements. Recurses into all code objects reachable from `code`.
58
59 """
60 # Adapted from trace.py in the standard library.
61
62 # Get all of the lineno information from this code.
63 self.find_statement_starts(code)
64
65 # Check the constants for references to other code objects.
66 for c in code.co_consts:
67 if isinstance(c, types.CodeType):
68 # Found another code object, so recurse into it.
69 self.find_statements(c)
70
71 def raw_parse(self, text=None, filename=None, exclude=None):
72 """Parse `text` to find the interesting facts about its lines.
73
74 A handful of member fields are updated. 71 A handful of member fields are updated.
75 72
76 """ 73 """
77 if not text:
78 sourcef = open(filename, 'rU')
79 text = sourcef.read()
80 sourcef.close()
81 text = text.replace('\r\n', '\n')
82 self.lines = text.split('\n')
83
84 # Find lines which match an exclusion pattern. 74 # Find lines which match an exclusion pattern.
85 if exclude: 75 if self.exclude:
86 re_exclude = re.compile(exclude) 76 re_exclude = re.compile(self.exclude)
87 for i, ltext in enumerate(self.lines): 77 for i, ltext in enumerate(self.lines):
88 if re_exclude.search(ltext): 78 if re_exclude.search(ltext):
89 self.excluded.add(i+1) 79 self.excluded.add(i+1)
90 80
91 # Tokenize, to find excluded suites, to find docstrings, and to find 81 # Tokenize, to find excluded suites, to find docstrings, and to find
92 # multi-line statements. 82 # multi-line statements.
93 indent = 0 83 indent = 0
94 exclude_indent = 0 84 exclude_indent = 0
95 excluding = False 85 excluding = False
96 prev_toktype = token.INDENT 86 prev_toktype = token.INDENT
97 first_line = None 87 first_line = None
98 88
99 tokgen = tokenize.generate_tokens(io.StringIO(text).readline) 89 tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
100 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: 90 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
101 if self.show_tokens: 91 if self.show_tokens: # pragma: no cover
102 print(("%10s %5s %-20r %r" % ( 92 print("%10s %5s %-20r %r" % (
103 tokenize.tok_name.get(toktype, toktype), 93 tokenize.tok_name.get(toktype, toktype),
104 nice_pair((slineno, elineno)), ttext, ltext 94 nice_pair((slineno, elineno)), ttext, ltext
105 ))) 95 ))
106 if toktype == token.INDENT: 96 if toktype == token.INDENT:
107 indent += 1 97 indent += 1
108 elif toktype == token.DEDENT: 98 elif toktype == token.DEDENT:
109 indent -= 1 99 indent -= 1
100 elif toktype == token.NAME and ttext == 'class':
101 # Class definitions look like branches in the byte code, so
102 # we need to exclude them. The simplest way is to note the
103 # lines with the 'class' keyword.
104 self.classdefs.add(slineno)
110 elif toktype == token.OP and ttext == ':': 105 elif toktype == token.OP and ttext == ':':
111 if not excluding and elineno in self.excluded: 106 if not excluding and elineno in self.excluded:
112 # Start excluding a suite. We trigger off of the colon 107 # Start excluding a suite. We trigger off of the colon
113 # token so that the #pragma comment will be recognized on 108 # token so that the #pragma comment will be recognized on
114 # the same line as the colon. 109 # the same line as the colon.
115 exclude_indent = indent 110 exclude_indent = indent
116 excluding = True 111 excluding = True
117 elif toktype == token.STRING and prev_toktype == token.INDENT: 112 elif toktype == token.STRING and prev_toktype == token.INDENT:
118 # Strings that are first on an indented line are docstrings. 113 # Strings that are first on an indented line are docstrings.
119 # (a trick from trace.py in the stdlib.) 114 # (a trick from trace.py in the stdlib.) This works for
115 # 99.9999% of cases. For the rest (!) see:
116 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
120 for i in range(slineno, elineno+1): 117 for i in range(slineno, elineno+1):
121 self.docstrings.add(i) 118 self.docstrings.add(i)
122 elif toktype == token.NEWLINE: 119 elif toktype == token.NEWLINE:
123 if first_line is not None and elineno != first_line: 120 if first_line is not None and elineno != first_line:
124 # We're at the end of a line, and we've ended on a 121 # We're at the end of a line, and we've ended on a
126 # so record a multi-line range. 123 # so record a multi-line range.
127 rng = (first_line, elineno) 124 rng = (first_line, elineno)
128 for l in range(first_line, elineno+1): 125 for l in range(first_line, elineno+1):
129 self.multiline[l] = rng 126 self.multiline[l] = rng
130 first_line = None 127 first_line = None
131 128
132 if ttext.strip() and toktype != tokenize.COMMENT: 129 if ttext.strip() and toktype != tokenize.COMMENT:
133 # A non-whitespace token. 130 # A non-whitespace token.
134 if first_line is None: 131 if first_line is None:
135 # The token is not whitespace, and is the first in a 132 # The token is not whitespace, and is the first in a
136 # statement. 133 # statement.
138 # Check whether to end an excluded suite. 135 # Check whether to end an excluded suite.
139 if excluding and indent <= exclude_indent: 136 if excluding and indent <= exclude_indent:
140 excluding = False 137 excluding = False
141 if excluding: 138 if excluding:
142 self.excluded.add(elineno) 139 self.excluded.add(elineno)
143 140
144 prev_toktype = toktype 141 prev_toktype = toktype
145 142
146 # Find the starts of the executable statements. 143 # Find the starts of the executable statements.
147 filename = filename or "<code>" 144 self.statement_starts.update(self.byte_parser._find_statements())
148 try: 145
149 # Python 2.3 and 2.4 don't like partial last lines, so be sure the 146 def first_line(self, line):
150 # text ends nicely for them. 147 """Return the first line number of the statement including `line`."""
151 text += '\n' 148 rng = self.multiline.get(line)
152 code = compile(text, filename, "exec") 149 if rng:
153 except SyntaxError as synerr: 150 first_line = rng[0]
154 raise CoverageException( 151 else:
155 "Couldn't parse '%s' as Python source: '%s' at line %d" % 152 first_line = line
156 (filename, synerr.msg, synerr.lineno) 153 return first_line
157 ) 154
158 155 def first_lines(self, lines, ignore=None):
159 self.find_statements(code)
160
161 def map_to_first_line(self, lines, ignore=None):
162 """Map the line numbers in `lines` to the correct first line of the 156 """Map the line numbers in `lines` to the correct first line of the
163 statement. 157 statement.
164 158
165 Skip any line mentioned in `ignore`. 159 Skip any line mentioned in `ignore`.
166 160
167 Returns a sorted list of the first lines. 161 Returns a sorted list of the first lines.
168 162
169 """ 163 """
170 ignore = ignore or [] 164 ignore = ignore or []
171 lset = set() 165 lset = set()
172 for l in lines: 166 for l in lines:
173 if l in ignore: 167 if l in ignore:
174 continue 168 continue
175 rng = self.multiline.get(l) 169 new_l = self.first_line(l)
176 if rng:
177 new_l = rng[0]
178 else:
179 new_l = l
180 if new_l not in ignore: 170 if new_l not in ignore:
181 lset.add(new_l) 171 lset.add(new_l)
182 lines = list(lset) 172 return sorted(lset)
183 lines.sort() 173
184 return lines 174 def parse_source(self):
185
186 def parse_source(self, text=None, filename=None, exclude=None):
187 """Parse source text to find executable lines, excluded lines, etc. 175 """Parse source text to find executable lines, excluded lines, etc.
188 176
189 Source can be provided as `text`, the text itself, or `filename`, from 177 Return values are 1) a sorted list of executable line numbers, and
190 which text will be read. Excluded lines are those that match `exclude`, 178 2) a sorted list of excluded line numbers.
191 a regex. 179
192 180 Reported line numbers are normalized to the first line of multi-line
193 Return values are 1) a sorted list of executable line numbers, 181 statements.
194 2) a sorted list of excluded line numbers, and 3) a dict mapping line 182
195 numbers to pairs (lo,hi) for multi-line statements. 183 """
196 184 self._raw_parse()
197 """ 185
198 self.raw_parse(text, filename, exclude) 186 excluded_lines = self.first_lines(self.excluded)
199
200 excluded_lines = self.map_to_first_line(self.excluded)
201 ignore = excluded_lines + list(self.docstrings) 187 ignore = excluded_lines + list(self.docstrings)
202 lines = self.map_to_first_line(self.statement_starts, ignore) 188 lines = self.first_lines(self.statement_starts, ignore)
203 189
204 return lines, excluded_lines, self.multiline 190 return lines, excluded_lines
205 191
206 def print_parse_results(self): 192 def arcs(self):
207 """Print the results of the parsing.""" 193 """Get information about the arcs available in the code.
208 for i, ltext in enumerate(self.lines): 194
209 lineno = i+1 195 Returns a sorted list of line number pairs. Line numbers have been
210 m0 = m1 = m2 = ' ' 196 normalized to the first line of multiline statements.
211 if lineno in self.statement_starts: 197
212 m0 = '-' 198 """
213 if lineno in self.docstrings: 199 all_arcs = []
214 m1 = '"' 200 for l1, l2 in self.byte_parser._all_arcs():
215 if lineno in self.excluded: 201 fl1 = self.first_line(l1)
216 m2 = 'x' 202 fl2 = self.first_line(l2)
217 print(("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext))) 203 if fl1 != fl2:
218 204 all_arcs.append((fl1, fl2))
205 return sorted(all_arcs)
206 arcs = expensive(arcs)
207
208 def exit_counts(self):
209 """Get a mapping from line numbers to count of exits from that line.
210
211 Excluded lines are excluded.
212
213 """
214 excluded_lines = self.first_lines(self.excluded)
215 exit_counts = {}
216 for l1, l2 in self.arcs():
217 if l1 == -1:
218 # Don't ever report -1 as a line number
219 continue
220 if l1 in excluded_lines:
221 # Don't report excluded lines as line numbers.
222 continue
223 if l2 in excluded_lines:
224 # Arcs to excluded lines shouldn't count.
225 continue
226 if l1 not in exit_counts:
227 exit_counts[l1] = 0
228 exit_counts[l1] += 1
229
230 # Class definitions have one extra exit, so remove one for each:
231 for l in self.classdefs:
232 # Ensure key is there: classdefs can include excluded lines.
233 if l in exit_counts:
234 exit_counts[l] -= 1
235
236 return exit_counts
237 exit_counts = expensive(exit_counts)
238
239
240 ## Opcodes that guide the ByteParser.
241
242 def _opcode(name):
243 """Return the opcode by name from the opcode module."""
244 return opcode.opmap[name]
245
246 def _opcode_set(*names):
247 """Return a set of opcodes by the names in `names`."""
248 return set([_opcode(name) for name in names])
249
250 # Opcodes that leave the code object.
251 OPS_CODE_END = _opcode_set('RETURN_VALUE')
252
253 # Opcodes that unconditionally end the code chunk.
254 OPS_CHUNK_END = _opcode_set(
255 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
256 'BREAK_LOOP', 'CONTINUE_LOOP',
257 )
258
259 # Opcodes that push a block on the block stack.
260 OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY')
261
262 # Block types for exception handling.
263 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
264
265 # Opcodes that pop a block from the block stack.
266 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
267
268 # Opcodes that have a jump destination, but aren't really a jump.
269 OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
270
271 # Individual opcodes we need below.
272 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
273 OP_END_FINALLY = _opcode('END_FINALLY')
274 OP_COMPARE_OP = _opcode('COMPARE_OP')
275 COMPARE_EXCEPTION = 10 # just have to get this const from the code.
276 OP_LOAD_CONST = _opcode('LOAD_CONST')
277 OP_RETURN_VALUE = _opcode('RETURN_VALUE')
278
279
280 class ByteParser(object):
281 """Parse byte codes to understand the structure of code."""
282
283 def __init__(self, code=None, text=None, filename=None):
284 if code:
285 self.code = code
286 else:
287 if not text:
288 assert filename, "If no code or text, need a filename"
289 sourcef = open(filename, 'rU')
290 text = sourcef.read()
291 sourcef.close()
292
293 try:
294 # Python 2.3 and 2.4 don't like partial last lines, so be sure
295 # the text ends nicely for them.
296 self.code = compile(text + '\n', filename, "exec")
297 except SyntaxError:
298 _, synerr, _ = sys.exc_info()
299 raise CoverageException(
300 "Couldn't parse '%s' as Python source: '%s' at line %d" %
301 (filename, synerr.msg, synerr.lineno)
302 )
303
304 def child_parsers(self):
305 """Iterate over all the code objects nested within this one.
306
307 The iteration includes `self` as its first value.
308
309 """
310 return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
311
312 # Getting numbers from the lnotab value changed in Py3.0.
313 if sys.hexversion >= 0x03000000:
314 def _lnotab_increments(self, lnotab):
315 """Return a list of ints from the lnotab bytes in 3.x"""
316 return list(lnotab)
317 else:
318 def _lnotab_increments(self, lnotab):
319 """Return a list of ints from the lnotab string in 2.x"""
320 return [ord(c) for c in lnotab]
321
322 def _bytes_lines(self):
323 """Map byte offsets to line numbers in `code`.
324
325 Uses co_lnotab described in Python/compile.c to map byte offsets to
326 line numbers. Returns a list: [(b0, l0), (b1, l1), ...]
327
328 """
329 # Adapted from dis.py in the standard library.
330 byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
331 line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
332
333 bytes_lines = []
334 last_line_num = None
335 line_num = self.code.co_firstlineno
336 byte_num = 0
337 for byte_incr, line_incr in zip(byte_increments, line_increments):
338 if byte_incr:
339 if line_num != last_line_num:
340 bytes_lines.append((byte_num, line_num))
341 last_line_num = line_num
342 byte_num += byte_incr
343 line_num += line_incr
344 if line_num != last_line_num:
345 bytes_lines.append((byte_num, line_num))
346 return bytes_lines
347
348 def _find_statements(self):
349 """Find the statements in `self.code`.
350
351 Return a set of line numbers that start statements. Recurses into all
352 code objects reachable from `self.code`.
353
354 """
355 stmts = set()
356 for bp in self.child_parsers():
357 # Get all of the lineno information from this code.
358 for _, l in bp._bytes_lines():
359 stmts.add(l)
360 return stmts
361
362 def _disassemble(self): # pragma: no cover
363 """Disassemble code, for ad-hoc experimenting."""
364
365 import dis
366
367 for bp in self.child_parsers():
368 print("\n%s: " % bp.code)
369 dis.dis(bp.code)
370 print("Bytes lines: %r" % bp._bytes_lines())
371
372 print("")
373
374 def _split_into_chunks(self):
375 """Split the code object into a list of `Chunk` objects.
376
377 Each chunk is only entered at its first instruction, though there can
378 be many exits from a chunk.
379
380 Returns a list of `Chunk` objects.
381
382 """
383
384 # The list of chunks so far, and the one we're working on.
385 chunks = []
386 chunk = None
387 bytes_lines_map = dict(self._bytes_lines())
388
389 # The block stack: loops and try blocks get pushed here for the
390 # implicit jumps that can occur.
391 # Each entry is a tuple: (block type, destination)
392 block_stack = []
393
394 # Some op codes are followed by branches that should be ignored. This
395 # is a count of how many ignores are left.
396 ignore_branch = 0
397
398 # We have to handle the last two bytecodes specially.
399 ult = penult = None
400
401 for bc in ByteCodes(self.code.co_code):
402 # Maybe have to start a new block
403 if bc.offset in bytes_lines_map:
404 if chunk:
405 chunk.exits.add(bc.offset)
406 chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
407 chunks.append(chunk)
408
409 if not chunk:
410 chunk = Chunk(bc.offset)
411 chunks.append(chunk)
412
413 # Look at the opcode
414 if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
415 if ignore_branch:
416 # Someone earlier wanted us to ignore this branch.
417 ignore_branch -= 1
418 else:
419 # The opcode has a jump, it's an exit for this chunk.
420 chunk.exits.add(bc.jump_to)
421
422 if bc.op in OPS_CODE_END:
423 # The opcode can exit the code object.
424 chunk.exits.add(-1)
425 if bc.op in OPS_PUSH_BLOCK:
426 # The opcode adds a block to the block_stack.
427 block_stack.append((bc.op, bc.jump_to))
428 if bc.op in OPS_POP_BLOCK:
429 # The opcode pops a block from the block stack.
430 block_stack.pop()
431 if bc.op in OPS_CHUNK_END:
432 # This opcode forces the end of the chunk.
433 if bc.op == OP_BREAK_LOOP:
434 # A break is implicit: jump where the top of the
435 # block_stack points.
436 chunk.exits.add(block_stack[-1][1])
437 chunk = None
438 if bc.op == OP_END_FINALLY:
439 if block_stack:
440 # A break that goes through a finally will jump to whatever
441 # block is on top of the stack.
442 chunk.exits.add(block_stack[-1][1])
443 # For the finally clause we need to find the closest exception
444 # block, and use its jump target as an exit.
445 for iblock in range(len(block_stack)-1, -1, -1):
446 if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
447 chunk.exits.add(block_stack[iblock][1])
448 break
449 if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
450 # This is an except clause. We want to overlook the next
451 # branch, so that except's don't count as branches.
452 ignore_branch += 1
453
454 penult = ult
455 ult = bc
456
457
458 if chunks:
459 # The last two bytecodes could be a dummy "return None" that
460 # shouldn't be counted as real code. Every Python code object seems
461 # to end with a return, and a "return None" is inserted if there
462 # isn't an explicit return in the source.
463 if ult and penult:
464 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
465 if self.code.co_consts[penult.arg] is None:
466 # This is "return None", but is it dummy? A real line
467 # would be a last chunk all by itself.
468 if chunks[-1].byte != penult.offset:
469 # Split the last chunk
470 last_chunk = chunks[-1]
471 last_chunk.exits.remove(-1)
472 last_chunk.exits.add(penult.offset)
473 chunk = Chunk(penult.offset)
474 chunk.exits.add(-1)
475 chunks.append(chunk)
476
477 # Give all the chunks a length.
478 chunks[-1].length = bc.next_offset - chunks[-1].byte
479 for i in range(len(chunks)-1):
480 chunks[i].length = chunks[i+1].byte - chunks[i].byte
481
482 return chunks
483
484 def _arcs(self):
485 """Find the executable arcs in the code.
486
487 Returns a set of pairs, (from,to). From and to are integer line
488 numbers. If from is -1, then the arc is an entrance into the code
489 object. If to is -1, the arc is an exit from the code object.
490
491 """
492 chunks = self._split_into_chunks()
493
494 # A map from byte offsets to chunks jumped into.
495 byte_chunks = dict([(c.byte, c) for c in chunks])
496
497 # Build a map from byte offsets to actual lines reached.
498 byte_lines = {-1:[-1]}
499 bytes_to_add = set([c.byte for c in chunks])
500
501 while bytes_to_add:
502 byte_to_add = bytes_to_add.pop()
503 if byte_to_add in byte_lines or byte_to_add == -1:
504 continue
505
506 # Which lines does this chunk lead to?
507 bytes_considered = set()
508 bytes_to_consider = [byte_to_add]
509 lines = set()
510
511 while bytes_to_consider:
512 byte = bytes_to_consider.pop()
513 bytes_considered.add(byte)
514
515 # Find chunk for byte
516 try:
517 ch = byte_chunks[byte]
518 except KeyError:
519 for ch in chunks:
520 if ch.byte <= byte < ch.byte+ch.length:
521 break
522 else:
523 # No chunk for this byte!
524 raise Exception("Couldn't find chunk @ %d" % byte)
525 byte_chunks[byte] = ch
526
527 if ch.line:
528 lines.add(ch.line)
529 else:
530 for ex in ch.exits:
531 if ex == -1:
532 lines.add(-1)
533 elif ex not in bytes_considered:
534 bytes_to_consider.append(ex)
535
536 bytes_to_add.update(ch.exits)
537
538 byte_lines[byte_to_add] = lines
539
540 # Figure out for each chunk where the exits go.
541 arcs = set()
542 for chunk in chunks:
543 if chunk.line:
544 for ex in chunk.exits:
545 for exit_line in byte_lines[ex]:
546 if chunk.line != exit_line:
547 arcs.add((chunk.line, exit_line))
548 for line in byte_lines[0]:
549 arcs.add((-1, line))
550
551 return arcs
552
553 def _all_chunks(self):
554 """Returns a list of `Chunk` objects for this code and its children.
555
556 See `_split_into_chunks` for details.
557
558 """
559 chunks = []
560 for bp in self.child_parsers():
561 chunks.extend(bp._split_into_chunks())
562
563 return chunks
564
565 def _all_arcs(self):
566 """Get the set of all arcs in this code object and its children.
567
568 See `_arcs` for details.
569
570 """
571 arcs = set()
572 for bp in self.child_parsers():
573 arcs.update(bp._arcs())
574
575 return arcs
576
577
578 class Chunk(object):
579 """A sequence of bytecodes with a single entrance.
580
581 To analyze byte code, we have to divide it into chunks, sequences of byte
582 codes such that each basic block has only one entrance, the first
583 instruction in the block.
584
585 This is almost the CS concept of `basic block`_, except that we're willing
586 to have many exits from a chunk, and "basic block" is a more cumbersome
587 term.
588
589 .. _basic block: http://en.wikipedia.org/wiki/Basic_block
590
591 An exit of -1 means the chunk can leave the code (return).
592
593 """
594 def __init__(self, byte, line=0):
595 self.byte = byte
596 self.line = line
597 self.length = 0
598 self.exits = set()
599
600 def __repr__(self):
601 return "<%d+%d @%d %r>" % (
602 self.byte, self.length, self.line, list(self.exits)
603 )
604
605
606 class AdHocMain(object): # pragma: no cover
607 """An ad-hoc main for code parsing experiments."""
608
609 def main(self, args):
610 """A main function for trying the code from the command line."""
611
612 from optparse import OptionParser
613
614 parser = OptionParser()
615 parser.add_option(
616 "-c", action="store_true", dest="chunks",
617 help="Show basic block chunks"
618 )
619 parser.add_option(
620 "-d", action="store_true", dest="dis",
621 help="Disassemble"
622 )
623 parser.add_option(
624 "-R", action="store_true", dest="recursive",
625 help="Recurse to find source files"
626 )
627 parser.add_option(
628 "-s", action="store_true", dest="source",
629 help="Show analyzed source"
630 )
631 parser.add_option(
632 "-t", action="store_true", dest="tokens",
633 help="Show tokens"
634 )
635
636 options, args = parser.parse_args()
637 if options.recursive:
638 if args:
639 root = args[0]
640 else:
641 root = "."
642 for root, _, _ in os.walk(root):
643 for f in glob.glob(root + "/*.py"):
644 self.adhoc_one_file(options, f)
645 else:
646 self.adhoc_one_file(options, args[0])
647
648 def adhoc_one_file(self, options, filename):
649 """Process just one file."""
650
651 if options.dis or options.chunks:
652 try:
653 bp = ByteParser(filename=filename)
654 except CoverageException:
655 _, err, _ = sys.exc_info()
656 print("%s" % (err,))
657 return
658
659 if options.dis:
660 print("Main code:")
661 bp._disassemble()
662
663 if options.chunks:
664 chunks = bp._all_chunks()
665 if options.recursive:
666 print("%6d: %s" % (len(chunks), filename))
667 else:
668 print("Chunks: %r" % chunks)
669 arcs = bp._all_arcs()
670 print("Arcs: %r" % sorted(arcs))
671
672 if options.source or options.tokens:
673 cp = CodeParser(filename=filename, exclude=r"no\s*cover")
674 cp.show_tokens = options.tokens
675 cp._raw_parse()
676
677 if options.source:
678 if options.chunks:
679 arc_width, arc_chars = self.arc_ascii_art(arcs)
680 else:
681 arc_width, arc_chars = 0, {}
682
683 exit_counts = cp.exit_counts()
684
685 for i, ltext in enumerate(cp.lines):
686 lineno = i+1
687 m0 = m1 = m2 = m3 = a = ' '
688 if lineno in cp.statement_starts:
689 m0 = '-'
690 exits = exit_counts.get(lineno, 0)
691 if exits > 1:
692 m1 = str(exits)
693 if lineno in cp.docstrings:
694 m2 = '"'
695 if lineno in cp.classdefs:
696 m2 = 'C'
697 if lineno in cp.excluded:
698 m3 = 'x'
699 a = arc_chars.get(lineno, '').ljust(arc_width)
700 print("%4d %s%s%s%s%s %s" %
701 (lineno, m0, m1, m2, m3, a, ltext)
702 )
703
704 def arc_ascii_art(self, arcs):
705 """Draw arcs as ascii art.
706
707 Returns a width of characters needed to draw all the arcs, and a
708 dictionary mapping line numbers to ascii strings to draw for that line.
709
710 """
711 arc_chars = {}
712 for lfrom, lto in sorted(arcs):
713 if lfrom == -1:
714 arc_chars[lto] = arc_chars.get(lto, '') + 'v'
715 elif lto == -1:
716 arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
717 else:
718 if lfrom == lto-1:
719 # Don't show obvious arcs.
720 continue
721 if lfrom < lto:
722 l1, l2 = lfrom, lto
723 else:
724 l1, l2 = lto, lfrom
725 w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)])
726 for l in range(l1, l2+1):
727 if l == lfrom:
728 ch = '<'
729 elif l == lto:
730 ch = '>'
731 else:
732 ch = '|'
733 arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch
734 arc_width = 0
735
736 if arc_chars:
737 arc_width = max([len(a) for a in arc_chars.values()])
738 else:
739 arc_width = 0
740
741 return arc_width, arc_chars
219 742
220 if __name__ == '__main__': 743 if __name__ == '__main__':
221 import sys 744 AdHocMain().main(sys.argv[1:])
222
223 parser = CodeParser(show_tokens=True)
224 parser.raw_parse(filename=sys.argv[1], exclude=r"no\s*cover")
225 parser.print_parse_results()

eric ide

mercurial