eric6/DebugClients/Python/coverage/parser.py

changeset 6942
2602857055c5
parent 6649
f1b3a73831c9
child 7427
362cd1b6f81a
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4 """Code parsing for coverage.py."""
5
6 import ast
7 import collections
8 import os
9 import re
10 import token
11 import tokenize
12
13 from coverage import env
14 from coverage.backward import range # pylint: disable=redefined-builtin
15 from coverage.backward import bytes_to_ints, string_class
16 from coverage.bytecode import CodeObjects
17 from coverage.debug import short_stack
18 from coverage.misc import contract, join_regex, new_contract, nice_pair, one_of
19 from coverage.misc import NoSource, NotPython, StopEverything
20 from coverage.phystokens import compile_unicode, generate_tokens, neuter_encoding_declaration
21
22
23 class PythonParser(object):
24 """Parse code to find executable lines, excluded lines, etc.
25
26 This information is all based on static analysis: no code execution is
27 involved.
28
29 """
30 @contract(text='unicode|None')
31 def __init__(self, text=None, filename=None, exclude=None):
32 """
33 Source can be provided as `text`, the text itself, or `filename`, from
34 which the text will be read. Excluded lines are those that match
35 `exclude`, a regex.
36
37 """
38 assert text or filename, "PythonParser needs either text or filename"
39 self.filename = filename or "<code>"
40 self.text = text
41 if not self.text:
42 from coverage.python import get_python_source
43 try:
44 self.text = get_python_source(self.filename)
45 except IOError as err:
46 raise NoSource(
47 "No source for code: '%s': %s" % (self.filename, err)
48 )
49
50 self.exclude = exclude
51
52 # The text lines of the parsed code.
53 self.lines = self.text.split('\n')
54
55 # The normalized line numbers of the statements in the code. Exclusions
56 # are taken into account, and statements are adjusted to their first
57 # lines.
58 self.statements = set()
59
60 # The normalized line numbers of the excluded lines in the code,
61 # adjusted to their first lines.
62 self.excluded = set()
63
64 # The raw_* attributes are only used in this class, and in
65 # lab/parser.py to show how this class is working.
66
67 # The line numbers that start statements, as reported by the line
68 # number table in the bytecode.
69 self.raw_statements = set()
70
71 # The raw line numbers of excluded lines of code, as marked by pragmas.
72 self.raw_excluded = set()
73
74 # The line numbers of class and function definitions.
75 self.raw_classdefs = set()
76
77 # The line numbers of docstring lines.
78 self.raw_docstrings = set()
79
80 # Internal detail, used by lab/parser.py.
81 self.show_tokens = False
82
83 # A dict mapping line numbers to lexical statement starts for
84 # multi-line statements.
85 self._multiline = {}
86
87 # Lazily-created ByteParser, arc data, and missing arc descriptions.
88 self._byte_parser = None
89 self._all_arcs = None
90 self._missing_arc_fragments = None
91
92 @property
93 def byte_parser(self):
94 """Create a ByteParser on demand."""
95 if not self._byte_parser:
96 self._byte_parser = ByteParser(self.text, filename=self.filename)
97 return self._byte_parser
98
99 def lines_matching(self, *regexes):
100 """Find the lines matching one of a list of regexes.
101
102 Returns a set of line numbers, the lines that contain a match for one
103 of the regexes in `regexes`. The entire line needn't match, just a
104 part of it.
105
106 """
107 combined = join_regex(regexes)
108 if env.PY2:
109 combined = combined.decode("utf8")
110 regex_c = re.compile(combined)
111 matches = set()
112 for i, ltext in enumerate(self.lines, start=1):
113 if regex_c.search(ltext):
114 matches.add(i)
115 return matches
116
117 def _raw_parse(self):
118 """Parse the source to find the interesting facts about its lines.
119
120 A handful of attributes are updated.
121
122 """
123 # Find lines which match an exclusion pattern.
124 if self.exclude:
125 self.raw_excluded = self.lines_matching(self.exclude)
126
127 # Tokenize, to find excluded suites, to find docstrings, and to find
128 # multi-line statements.
129 indent = 0
130 exclude_indent = 0
131 excluding = False
132 excluding_decorators = False
133 prev_toktype = token.INDENT
134 first_line = None
135 empty = True
136 first_on_line = True
137
138 tokgen = generate_tokens(self.text)
139 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
140 if self.show_tokens: # pragma: debugging
141 print("%10s %5s %-20r %r" % (
142 tokenize.tok_name.get(toktype, toktype),
143 nice_pair((slineno, elineno)), ttext, ltext
144 ))
145 if toktype == token.INDENT:
146 indent += 1
147 elif toktype == token.DEDENT:
148 indent -= 1
149 elif toktype == token.NAME:
150 if ttext == 'class':
151 # Class definitions look like branches in the bytecode, so
152 # we need to exclude them. The simplest way is to note the
153 # lines with the 'class' keyword.
154 self.raw_classdefs.add(slineno)
155 elif toktype == token.OP:
156 if ttext == ':':
157 should_exclude = (elineno in self.raw_excluded) or excluding_decorators
158 if not excluding and should_exclude:
159 # Start excluding a suite. We trigger off of the colon
160 # token so that the #pragma comment will be recognized on
161 # the same line as the colon.
162 self.raw_excluded.add(elineno)
163 exclude_indent = indent
164 excluding = True
165 excluding_decorators = False
166 elif ttext == '@' and first_on_line:
167 # A decorator.
168 if elineno in self.raw_excluded:
169 excluding_decorators = True
170 if excluding_decorators:
171 self.raw_excluded.add(elineno)
172 elif toktype == token.STRING and prev_toktype == token.INDENT:
173 # Strings that are first on an indented line are docstrings.
174 # (a trick from trace.py in the stdlib.) This works for
175 # 99.9999% of cases. For the rest (!) see:
176 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
177 self.raw_docstrings.update(range(slineno, elineno+1))
178 elif toktype == token.NEWLINE:
179 if first_line is not None and elineno != first_line:
180 # We're at the end of a line, and we've ended on a
181 # different line than the first line of the statement,
182 # so record a multi-line range.
183 for l in range(first_line, elineno+1):
184 self._multiline[l] = first_line
185 first_line = None
186 first_on_line = True
187
188 if ttext.strip() and toktype != tokenize.COMMENT:
189 # A non-whitespace token.
190 empty = False
191 if first_line is None:
192 # The token is not whitespace, and is the first in a
193 # statement.
194 first_line = slineno
195 # Check whether to end an excluded suite.
196 if excluding and indent <= exclude_indent:
197 excluding = False
198 if excluding:
199 self.raw_excluded.add(elineno)
200 first_on_line = False
201
202 prev_toktype = toktype
203
204 # Find the starts of the executable statements.
205 if not empty:
206 self.raw_statements.update(self.byte_parser._find_statements())
207
208 def first_line(self, line):
209 """Return the first line number of the statement including `line`."""
210 return self._multiline.get(line, line)
211
212 def first_lines(self, lines):
213 """Map the line numbers in `lines` to the correct first line of the
214 statement.
215
216 Returns a set of the first lines.
217
218 """
219 return set(self.first_line(l) for l in lines)
220
221 def translate_lines(self, lines):
222 """Implement `FileReporter.translate_lines`."""
223 return self.first_lines(lines)
224
225 def translate_arcs(self, arcs):
226 """Implement `FileReporter.translate_arcs`."""
227 return [(self.first_line(a), self.first_line(b)) for (a, b) in arcs]
228
229 def parse_source(self):
230 """Parse source text to find executable lines, excluded lines, etc.
231
232 Sets the .excluded and .statements attributes, normalized to the first
233 line of multi-line statements.
234
235 """
236 try:
237 self._raw_parse()
238 except (tokenize.TokenError, IndentationError) as err:
239 if hasattr(err, "lineno"):
240 lineno = err.lineno # IndentationError
241 else:
242 lineno = err.args[1][0] # TokenError
243 raise NotPython(
244 u"Couldn't parse '%s' as Python source: '%s' at line %d" % (
245 self.filename, err.args[0], lineno
246 )
247 )
248
249 self.excluded = self.first_lines(self.raw_excluded)
250
251 ignore = self.excluded | self.raw_docstrings
252 starts = self.raw_statements - ignore
253 self.statements = self.first_lines(starts) - ignore
254
255 def arcs(self):
256 """Get information about the arcs available in the code.
257
258 Returns a set of line number pairs. Line numbers have been normalized
259 to the first line of multi-line statements.
260
261 """
262 if self._all_arcs is None:
263 self._analyze_ast()
264 return self._all_arcs
265
266 def _analyze_ast(self):
267 """Run the AstArcAnalyzer and save its results.
268
269 `_all_arcs` is the set of arcs in the code.
270
271 """
272 aaa = AstArcAnalyzer(self.text, self.raw_statements, self._multiline)
273 aaa.analyze()
274
275 self._all_arcs = set()
276 for l1, l2 in aaa.arcs:
277 fl1 = self.first_line(l1)
278 fl2 = self.first_line(l2)
279 if fl1 != fl2:
280 self._all_arcs.add((fl1, fl2))
281
282 self._missing_arc_fragments = aaa.missing_arc_fragments
283
284 def exit_counts(self):
285 """Get a count of exits from that each line.
286
287 Excluded lines are excluded.
288
289 """
290 exit_counts = collections.defaultdict(int)
291 for l1, l2 in self.arcs():
292 if l1 < 0:
293 # Don't ever report -1 as a line number
294 continue
295 if l1 in self.excluded:
296 # Don't report excluded lines as line numbers.
297 continue
298 if l2 in self.excluded:
299 # Arcs to excluded lines shouldn't count.
300 continue
301 exit_counts[l1] += 1
302
303 # Class definitions have one extra exit, so remove one for each:
304 for l in self.raw_classdefs:
305 # Ensure key is there: class definitions can include excluded lines.
306 if l in exit_counts:
307 exit_counts[l] -= 1
308
309 return exit_counts
310
311 def missing_arc_description(self, start, end, executed_arcs=None):
312 """Provide an English sentence describing a missing arc."""
313 if self._missing_arc_fragments is None:
314 self._analyze_ast()
315
316 actual_start = start
317
318 if (
319 executed_arcs and
320 end < 0 and end == -start and
321 (end, start) not in executed_arcs and
322 (end, start) in self._missing_arc_fragments
323 ):
324 # It's a one-line callable, and we never even started it,
325 # and we have a message about not starting it.
326 start, end = end, start
327
328 fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)])
329
330 msgs = []
331 for fragment_pair in fragment_pairs:
332 smsg, emsg = fragment_pair
333
334 if emsg is None:
335 if end < 0:
336 # Hmm, maybe we have a one-line callable, let's check.
337 if (-end, end) in self._missing_arc_fragments:
338 return self.missing_arc_description(-end, end)
339 emsg = "didn't jump to the function exit"
340 else:
341 emsg = "didn't jump to line {lineno}"
342 emsg = emsg.format(lineno=end)
343
344 msg = "line {start} {emsg}".format(start=actual_start, emsg=emsg)
345 if smsg is not None:
346 msg += ", because {smsg}".format(smsg=smsg.format(lineno=actual_start))
347
348 msgs.append(msg)
349
350 return " or ".join(msgs)
351
352
353 class ByteParser(object):
354 """Parse bytecode to understand the structure of code."""
355
356 @contract(text='unicode')
357 def __init__(self, text, code=None, filename=None):
358 self.text = text
359 if code:
360 self.code = code
361 else:
362 try:
363 self.code = compile_unicode(text, filename, "exec")
364 except SyntaxError as synerr:
365 raise NotPython(
366 u"Couldn't parse '%s' as Python source: '%s' at line %d" % (
367 filename, synerr.msg, synerr.lineno
368 )
369 )
370
371 # Alternative Python implementations don't always provide all the
372 # attributes on code objects that we need to do the analysis.
373 for attr in ['co_lnotab', 'co_firstlineno']:
374 if not hasattr(self.code, attr):
375 raise StopEverything( # pragma: only jython
376 "This implementation of Python doesn't support code analysis.\n"
377 "Run coverage.py under another Python for this command."
378 )
379
380 def child_parsers(self):
381 """Iterate over all the code objects nested within this one.
382
383 The iteration includes `self` as its first value.
384
385 """
386 children = CodeObjects(self.code)
387 return (ByteParser(self.text, code=c) for c in children)
388
389 def _bytes_lines(self):
390 """Map byte offsets to line numbers in `code`.
391
392 Uses co_lnotab described in Python/compile.c to map byte offsets to
393 line numbers. Produces a sequence: (b0, l0), (b1, l1), ...
394
395 Only byte offsets that correspond to line numbers are included in the
396 results.
397
398 """
399 # Adapted from dis.py in the standard library.
400 byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
401 line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
402
403 last_line_num = None
404 line_num = self.code.co_firstlineno
405 byte_num = 0
406 for byte_incr, line_incr in zip(byte_increments, line_increments):
407 if byte_incr:
408 if line_num != last_line_num:
409 yield (byte_num, line_num)
410 last_line_num = line_num
411 byte_num += byte_incr
412 if env.PYVERSION >= (3, 6) and line_incr >= 0x80:
413 line_incr -= 0x100
414 line_num += line_incr
415 if line_num != last_line_num:
416 yield (byte_num, line_num)
417
418 def _find_statements(self):
419 """Find the statements in `self.code`.
420
421 Produce a sequence of line numbers that start statements. Recurses
422 into all code objects reachable from `self.code`.
423
424 """
425 for bp in self.child_parsers():
426 # Get all of the lineno information from this code.
427 for _, l in bp._bytes_lines():
428 yield l
429
430
431 #
432 # AST analysis
433 #
434
435 class LoopBlock(object):
436 """A block on the block stack representing a `for` or `while` loop."""
437 @contract(start=int)
438 def __init__(self, start):
439 # The line number where the loop starts.
440 self.start = start
441 # A set of ArcStarts, the arcs from break statements exiting this loop.
442 self.break_exits = set()
443
444
445 class FunctionBlock(object):
446 """A block on the block stack representing a function definition."""
447 @contract(start=int, name=str)
448 def __init__(self, start, name):
449 # The line number where the function starts.
450 self.start = start
451 # The name of the function.
452 self.name = name
453
454
455 class TryBlock(object):
456 """A block on the block stack representing a `try` block."""
457 @contract(handler_start='int|None', final_start='int|None')
458 def __init__(self, handler_start, final_start):
459 # The line number of the first "except" handler, if any.
460 self.handler_start = handler_start
461 # The line number of the "finally:" clause, if any.
462 self.final_start = final_start
463
464 # The ArcStarts for breaks/continues/returns/raises inside the "try:"
465 # that need to route through the "finally:" clause.
466 self.break_from = set()
467 self.continue_from = set()
468 self.return_from = set()
469 self.raise_from = set()
470
471
472 class ArcStart(collections.namedtuple("Arc", "lineno, cause")):
473 """The information needed to start an arc.
474
475 `lineno` is the line number the arc starts from.
476
477 `cause` is an English text fragment used as the `startmsg` for
478 AstArcAnalyzer.missing_arc_fragments. It will be used to describe why an
479 arc wasn't executed, so should fit well into a sentence of the form,
480 "Line 17 didn't run because {cause}." The fragment can include "{lineno}"
481 to have `lineno` interpolated into it.
482
483 """
484 def __new__(cls, lineno, cause=None):
485 return super(ArcStart, cls).__new__(cls, lineno, cause)
486
487
488 # Define contract words that PyContract doesn't have.
489 # ArcStarts is for a list or set of ArcStart's.
490 new_contract('ArcStarts', lambda seq: all(isinstance(x, ArcStart) for x in seq))
491
492
493 # Turn on AST dumps with an environment variable.
494 AST_DUMP = bool(int(os.environ.get("COVERAGE_AST_DUMP", 0)))
495
496 class NodeList(object):
497 """A synthetic fictitious node, containing a sequence of nodes.
498
499 This is used when collapsing optimized if-statements, to represent the
500 unconditional execution of one of the clauses.
501
502 """
503 def __init__(self, body):
504 self.body = body
505 self.lineno = body[0].lineno
506
507
508 # TODO: some add_arcs methods here don't add arcs, they return them. Rename them.
509 # TODO: the cause messages have too many commas.
510 # TODO: Shouldn't the cause messages join with "and" instead of "or"?
511
512 class AstArcAnalyzer(object):
513 """Analyze source text with an AST to find executable code paths."""
514
515 @contract(text='unicode', statements=set)
516 def __init__(self, text, statements, multiline):
517 self.root_node = ast.parse(neuter_encoding_declaration(text))
518 # TODO: I think this is happening in too many places.
519 self.statements = set(multiline.get(l, l) for l in statements)
520 self.multiline = multiline
521
522 if AST_DUMP: # pragma: debugging
523 # Dump the AST so that failing tests have helpful output.
524 print("Statements: {0}".format(self.statements))
525 print("Multiline map: {0}".format(self.multiline))
526 ast_dump(self.root_node)
527
528 self.arcs = set()
529
530 # A map from arc pairs to a list of pairs of sentence fragments:
531 # { (start, end): [(startmsg, endmsg), ...], }
532 #
533 # For an arc from line 17, they should be usable like:
534 # "Line 17 {endmsg}, because {startmsg}"
535 self.missing_arc_fragments = collections.defaultdict(list)
536 self.block_stack = []
537
538 self.debug = bool(int(os.environ.get("COVERAGE_TRACK_ARCS", 0)))
539
540 def analyze(self):
541 """Examine the AST tree from `root_node` to determine possible arcs.
542
543 This sets the `arcs` attribute to be a set of (from, to) line number
544 pairs.
545
546 """
547 for node in ast.walk(self.root_node):
548 node_name = node.__class__.__name__
549 code_object_handler = getattr(self, "_code_object__" + node_name, None)
550 if code_object_handler is not None:
551 code_object_handler(node)
552
553 @contract(start=int, end=int)
554 def add_arc(self, start, end, smsg=None, emsg=None):
555 """Add an arc, including message fragments to use if it is missing."""
556 if self.debug: # pragma: debugging
557 print("\nAdding arc: ({}, {}): {!r}, {!r}".format(start, end, smsg, emsg))
558 print(short_stack(limit=6))
559 self.arcs.add((start, end))
560
561 if smsg is not None or emsg is not None:
562 self.missing_arc_fragments[(start, end)].append((smsg, emsg))
563
564 def nearest_blocks(self):
565 """Yield the blocks in nearest-to-farthest order."""
566 return reversed(self.block_stack)
567
568 @contract(returns=int)
569 def line_for_node(self, node):
570 """What is the right line number to use for this node?
571
572 This dispatches to _line__Node functions where needed.
573
574 """
575 node_name = node.__class__.__name__
576 handler = getattr(self, "_line__" + node_name, None)
577 if handler is not None:
578 return handler(node)
579 else:
580 return node.lineno
581
582 def _line_decorated(self, node):
583 """Compute first line number for things that can be decorated (classes and functions)."""
584 lineno = node.lineno
585 if env.PYBEHAVIOR.trace_decorated_def:
586 if node.decorator_list:
587 lineno = node.decorator_list[0].lineno
588 return lineno
589
590 def _line__Assign(self, node):
591 return self.line_for_node(node.value)
592
593 _line__ClassDef = _line_decorated
594
595 def _line__Dict(self, node):
596 # Python 3.5 changed how dict literals are made.
597 if env.PYVERSION >= (3, 5) and node.keys:
598 if node.keys[0] is not None:
599 return node.keys[0].lineno
600 else:
601 # Unpacked dict literals `{**{'a':1}}` have None as the key,
602 # use the value in that case.
603 return node.values[0].lineno
604 else:
605 return node.lineno
606
607 _line__FunctionDef = _line_decorated
608
609 def _line__List(self, node):
610 if node.elts:
611 return self.line_for_node(node.elts[0])
612 else:
613 return node.lineno
614
615 def _line__Module(self, node):
616 if node.body:
617 return self.line_for_node(node.body[0])
618 else:
619 # Empty modules have no line number, they always start at 1.
620 return 1
621
622 # The node types that just flow to the next node with no complications.
623 OK_TO_DEFAULT = set([
624 "Assign", "Assert", "AugAssign", "Delete", "Exec", "Expr", "Global",
625 "Import", "ImportFrom", "Nonlocal", "Pass", "Print",
626 ])
627
628 @contract(returns='ArcStarts')
629 def add_arcs(self, node):
630 """Add the arcs for `node`.
631
632 Return a set of ArcStarts, exits from this node to the next. Because a
633 node represents an entire sub-tree (including its children), the exits
634 from a node can be arbitrarily complex::
635
636 if something(1):
637 if other(2):
638 doit(3)
639 else:
640 doit(5)
641
642 There are two exits from line 1: they start at line 3 and line 5.
643
644 """
645 node_name = node.__class__.__name__
646 handler = getattr(self, "_handle__" + node_name, None)
647 if handler is not None:
648 return handler(node)
649 else:
650 # No handler: either it's something that's ok to default (a simple
651 # statement), or it's something we overlooked. Change this 0 to 1
652 # to see if it's overlooked.
653 if 0:
654 if node_name not in self.OK_TO_DEFAULT:
655 print("*** Unhandled: {0}".format(node))
656
657 # Default for simple statements: one exit from this node.
658 return set([ArcStart(self.line_for_node(node))])
659
660 @one_of("from_start, prev_starts")
661 @contract(returns='ArcStarts')
662 def add_body_arcs(self, body, from_start=None, prev_starts=None):
663 """Add arcs for the body of a compound statement.
664
665 `body` is the body node. `from_start` is a single `ArcStart` that can
666 be the previous line in flow before this body. `prev_starts` is a set
667 of ArcStarts that can be the previous line. Only one of them should be
668 given.
669
670 Returns a set of ArcStarts, the exits from this body.
671
672 """
673 if prev_starts is None:
674 prev_starts = set([from_start])
675 for body_node in body:
676 lineno = self.line_for_node(body_node)
677 first_line = self.multiline.get(lineno, lineno)
678 if first_line not in self.statements:
679 body_node = self.find_non_missing_node(body_node)
680 if body_node is None:
681 continue
682 lineno = self.line_for_node(body_node)
683 for prev_start in prev_starts:
684 self.add_arc(prev_start.lineno, lineno, prev_start.cause)
685 prev_starts = self.add_arcs(body_node)
686 return prev_starts
687
688 def find_non_missing_node(self, node):
689 """Search `node` looking for a child that has not been optimized away.
690
691 This might return the node you started with, or it will work recursively
692 to find a child node in self.statements.
693
694 Returns a node, or None if none of the node remains.
695
696 """
697 # This repeats work just done in add_body_arcs, but this duplication
698 # means we can avoid a function call in the 99.9999% case of not
699 # optimizing away statements.
700 lineno = self.line_for_node(node)
701 first_line = self.multiline.get(lineno, lineno)
702 if first_line in self.statements:
703 return node
704
705 missing_fn = getattr(self, "_missing__" + node.__class__.__name__, None)
706 if missing_fn:
707 node = missing_fn(node)
708 else:
709 node = None
710 return node
711
712 # Missing nodes: _missing__*
713 #
714 # Entire statements can be optimized away by Python. They will appear in
715 # the AST, but not the bytecode. These functions are called (by
716 # find_non_missing_node) to find a node to use instead of the missing
717 # node. They can return None if the node should truly be gone.
718
719 def _missing__If(self, node):
720 # If the if-node is missing, then one of its children might still be
721 # here, but not both. So return the first of the two that isn't missing.
722 # Use a NodeList to hold the clauses as a single node.
723 non_missing = self.find_non_missing_node(NodeList(node.body))
724 if non_missing:
725 return non_missing
726 if node.orelse:
727 return self.find_non_missing_node(NodeList(node.orelse))
728 return None
729
730 def _missing__NodeList(self, node):
731 # A NodeList might be a mixture of missing and present nodes. Find the
732 # ones that are present.
733 non_missing_children = []
734 for child in node.body:
735 child = self.find_non_missing_node(child)
736 if child is not None:
737 non_missing_children.append(child)
738
739 # Return the simplest representation of the present children.
740 if not non_missing_children:
741 return None
742 if len(non_missing_children) == 1:
743 return non_missing_children[0]
744 return NodeList(non_missing_children)
745
746 def _missing__While(self, node):
747 body_nodes = self.find_non_missing_node(NodeList(node.body))
748 if not body_nodes:
749 return None
750 # Make a synthetic While-true node.
751 new_while = ast.While()
752 new_while.lineno = body_nodes.lineno
753 new_while.test = ast.Name()
754 new_while.test.lineno = body_nodes.lineno
755 new_while.test.id = "True"
756 new_while.body = body_nodes.body
757 new_while.orelse = None
758 return new_while
759
760 def is_constant_expr(self, node):
761 """Is this a compile-time constant?"""
762 node_name = node.__class__.__name__
763 if node_name in ["Constant", "NameConstant", "Num"]:
764 return "Num"
765 elif node_name == "Name":
766 if node.id in ["True", "False", "None", "__debug__"]:
767 return "Name"
768 return None
769
770 # In the fullness of time, these might be good tests to write:
771 # while EXPR:
772 # while False:
773 # listcomps hidden deep in other expressions
774 # listcomps hidden in lists: x = [[i for i in range(10)]]
775 # nested function definitions
776
777
778 # Exit processing: process_*_exits
779 #
780 # These functions process the four kinds of jump exits: break, continue,
781 # raise, and return. To figure out where an exit goes, we have to look at
782 # the block stack context. For example, a break will jump to the nearest
783 # enclosing loop block, or the nearest enclosing finally block, whichever
784 # is nearer.
785
786 @contract(exits='ArcStarts')
787 def process_break_exits(self, exits):
788 """Add arcs due to jumps from `exits` being breaks."""
789 for block in self.nearest_blocks():
790 if isinstance(block, LoopBlock):
791 block.break_exits.update(exits)
792 break
793 elif isinstance(block, TryBlock) and block.final_start is not None:
794 block.break_from.update(exits)
795 break
796
797 @contract(exits='ArcStarts')
798 def process_continue_exits(self, exits):
799 """Add arcs due to jumps from `exits` being continues."""
800 for block in self.nearest_blocks():
801 if isinstance(block, LoopBlock):
802 for xit in exits:
803 self.add_arc(xit.lineno, block.start, xit.cause)
804 break
805 elif isinstance(block, TryBlock) and block.final_start is not None:
806 block.continue_from.update(exits)
807 break
808
809 @contract(exits='ArcStarts')
810 def process_raise_exits(self, exits):
811 """Add arcs due to jumps from `exits` being raises."""
812 for block in self.nearest_blocks():
813 if isinstance(block, TryBlock):
814 if block.handler_start is not None:
815 for xit in exits:
816 self.add_arc(xit.lineno, block.handler_start, xit.cause)
817 break
818 elif block.final_start is not None:
819 block.raise_from.update(exits)
820 break
821 elif isinstance(block, FunctionBlock):
822 for xit in exits:
823 self.add_arc(
824 xit.lineno, -block.start, xit.cause,
825 "didn't except from function '{0}'".format(block.name),
826 )
827 break
828
829 @contract(exits='ArcStarts')
830 def process_return_exits(self, exits):
831 """Add arcs due to jumps from `exits` being returns."""
832 for block in self.nearest_blocks():
833 if isinstance(block, TryBlock) and block.final_start is not None:
834 block.return_from.update(exits)
835 break
836 elif isinstance(block, FunctionBlock):
837 for xit in exits:
838 self.add_arc(
839 xit.lineno, -block.start, xit.cause,
840 "didn't return from function '{0}'".format(block.name),
841 )
842 break
843
844
845 # Handlers: _handle__*
846 #
847 # Each handler deals with a specific AST node type, dispatched from
848 # add_arcs. Handlers return the set of exits from that node, and can
849 # also call self.add_arc to record arcs they find. These functions mirror
850 # the Python semantics of each syntactic construct. See the docstring
851 # for add_arcs to understand the concept of exits from a node.
852
853 @contract(returns='ArcStarts')
854 def _handle__Break(self, node):
855 here = self.line_for_node(node)
856 break_start = ArcStart(here, cause="the break on line {lineno} wasn't executed")
857 self.process_break_exits([break_start])
858 return set()
859
860 @contract(returns='ArcStarts')
861 def _handle_decorated(self, node):
862 """Add arcs for things that can be decorated (classes and functions)."""
863 main_line = last = node.lineno
864 if node.decorator_list:
865 if env.PYBEHAVIOR.trace_decorated_def:
866 last = None
867 for dec_node in node.decorator_list:
868 dec_start = self.line_for_node(dec_node)
869 if last is not None and dec_start != last:
870 self.add_arc(last, dec_start)
871 last = dec_start
872 if env.PYBEHAVIOR.trace_decorated_def:
873 self.add_arc(last, main_line)
874 last = main_line
875 # The definition line may have been missed, but we should have it
876 # in `self.statements`. For some constructs, `line_for_node` is
877 # not what we'd think of as the first line in the statement, so map
878 # it to the first one.
879 if node.body:
880 body_start = self.line_for_node(node.body[0])
881 body_start = self.multiline.get(body_start, body_start)
882 for lineno in range(last+1, body_start):
883 if lineno in self.statements:
884 self.add_arc(last, lineno)
885 last = lineno
886 # The body is handled in collect_arcs.
887 return set([ArcStart(last)])
888
889 _handle__ClassDef = _handle_decorated
890
891 @contract(returns='ArcStarts')
892 def _handle__Continue(self, node):
893 here = self.line_for_node(node)
894 continue_start = ArcStart(here, cause="the continue on line {lineno} wasn't executed")
895 self.process_continue_exits([continue_start])
896 return set()
897
898 @contract(returns='ArcStarts')
899 def _handle__For(self, node):
900 start = self.line_for_node(node.iter)
901 self.block_stack.append(LoopBlock(start=start))
902 from_start = ArcStart(start, cause="the loop on line {lineno} never started")
903 exits = self.add_body_arcs(node.body, from_start=from_start)
904 # Any exit from the body will go back to the top of the loop.
905 for xit in exits:
906 self.add_arc(xit.lineno, start, xit.cause)
907 my_block = self.block_stack.pop()
908 exits = my_block.break_exits
909 from_start = ArcStart(start, cause="the loop on line {lineno} didn't complete")
910 if node.orelse:
911 else_exits = self.add_body_arcs(node.orelse, from_start=from_start)
912 exits |= else_exits
913 else:
914 # No else clause: exit from the for line.
915 exits.add(from_start)
916 return exits
917
918 _handle__AsyncFor = _handle__For
919
920 _handle__FunctionDef = _handle_decorated
921 _handle__AsyncFunctionDef = _handle_decorated
922
923 @contract(returns='ArcStarts')
924 def _handle__If(self, node):
925 start = self.line_for_node(node.test)
926 from_start = ArcStart(start, cause="the condition on line {lineno} was never true")
927 exits = self.add_body_arcs(node.body, from_start=from_start)
928 from_start = ArcStart(start, cause="the condition on line {lineno} was never false")
929 exits |= self.add_body_arcs(node.orelse, from_start=from_start)
930 return exits
931
932 @contract(returns='ArcStarts')
933 def _handle__NodeList(self, node):
934 start = self.line_for_node(node)
935 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
936 return exits
937
938 @contract(returns='ArcStarts')
939 def _handle__Raise(self, node):
940 here = self.line_for_node(node)
941 raise_start = ArcStart(here, cause="the raise on line {lineno} wasn't executed")
942 self.process_raise_exits([raise_start])
943 # `raise` statement jumps away, no exits from here.
944 return set()
945
946 @contract(returns='ArcStarts')
947 def _handle__Return(self, node):
948 here = self.line_for_node(node)
949 return_start = ArcStart(here, cause="the return on line {lineno} wasn't executed")
950 self.process_return_exits([return_start])
951 # `return` statement jumps away, no exits from here.
952 return set()
953
954 @contract(returns='ArcStarts')
955 def _handle__Try(self, node):
956 if node.handlers:
957 handler_start = self.line_for_node(node.handlers[0])
958 else:
959 handler_start = None
960
961 if node.finalbody:
962 final_start = self.line_for_node(node.finalbody[0])
963 else:
964 final_start = None
965
966 try_block = TryBlock(handler_start, final_start)
967 self.block_stack.append(try_block)
968
969 start = self.line_for_node(node)
970 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
971
972 # We're done with the `try` body, so this block no longer handles
973 # exceptions. We keep the block so the `finally` clause can pick up
974 # flows from the handlers and `else` clause.
975 if node.finalbody:
976 try_block.handler_start = None
977 if node.handlers:
978 # If there are `except` clauses, then raises in the try body
979 # will already jump to them. Start this set over for raises in
980 # `except` and `else`.
981 try_block.raise_from = set([])
982 else:
983 self.block_stack.pop()
984
985 handler_exits = set()
986
987 if node.handlers:
988 last_handler_start = None
989 for handler_node in node.handlers:
990 handler_start = self.line_for_node(handler_node)
991 if last_handler_start is not None:
992 self.add_arc(last_handler_start, handler_start)
993 last_handler_start = handler_start
994 from_cause = "the exception caught by line {lineno} didn't happen"
995 from_start = ArcStart(handler_start, cause=from_cause)
996 handler_exits |= self.add_body_arcs(handler_node.body, from_start=from_start)
997
998 if node.orelse:
999 exits = self.add_body_arcs(node.orelse, prev_starts=exits)
1000
1001 exits |= handler_exits
1002
1003 if node.finalbody:
1004 self.block_stack.pop()
1005 final_from = ( # You can get to the `finally` clause from:
1006 exits | # the exits of the body or `else` clause,
1007 try_block.break_from | # or a `break`,
1008 try_block.continue_from | # or a `continue`,
1009 try_block.raise_from | # or a `raise`,
1010 try_block.return_from # or a `return`.
1011 )
1012
1013 final_exits = self.add_body_arcs(node.finalbody, prev_starts=final_from)
1014
1015 if try_block.break_from:
1016 if env.PYBEHAVIOR.finally_jumps_back:
1017 for break_line in try_block.break_from:
1018 lineno = break_line.lineno
1019 cause = break_line.cause.format(lineno=lineno)
1020 for final_exit in final_exits:
1021 self.add_arc(final_exit.lineno, lineno, cause)
1022 breaks = try_block.break_from
1023 else:
1024 breaks = self._combine_finally_starts(try_block.break_from, final_exits)
1025 self.process_break_exits(breaks)
1026
1027 if try_block.continue_from:
1028 if env.PYBEHAVIOR.finally_jumps_back:
1029 for continue_line in try_block.continue_from:
1030 lineno = continue_line.lineno
1031 cause = continue_line.cause.format(lineno=lineno)
1032 for final_exit in final_exits:
1033 self.add_arc(final_exit.lineno, lineno, cause)
1034 continues = try_block.continue_from
1035 else:
1036 continues = self._combine_finally_starts(try_block.continue_from, final_exits)
1037 self.process_continue_exits(continues)
1038
1039 if try_block.raise_from:
1040 self.process_raise_exits(
1041 self._combine_finally_starts(try_block.raise_from, final_exits)
1042 )
1043
1044 if try_block.return_from:
1045 if env.PYBEHAVIOR.finally_jumps_back:
1046 for return_line in try_block.return_from:
1047 lineno = return_line.lineno
1048 cause = return_line.cause.format(lineno=lineno)
1049 for final_exit in final_exits:
1050 self.add_arc(final_exit.lineno, lineno, cause)
1051 returns = try_block.return_from
1052 else:
1053 returns = self._combine_finally_starts(try_block.return_from, final_exits)
1054 self.process_return_exits(returns)
1055
1056 if exits:
1057 # The finally clause's exits are only exits for the try block
1058 # as a whole if the try block had some exits to begin with.
1059 exits = final_exits
1060
1061 return exits
1062
1063 @contract(starts='ArcStarts', exits='ArcStarts', returns='ArcStarts')
1064 def _combine_finally_starts(self, starts, exits):
1065 """Helper for building the cause of `finally` branches.
1066
1067 "finally" clauses might not execute their exits, and the causes could
1068 be due to a failure to execute any of the exits in the try block. So
1069 we use the causes from `starts` as the causes for `exits`.
1070 """
1071 causes = []
1072 for start in sorted(starts):
1073 if start.cause is not None:
1074 causes.append(start.cause.format(lineno=start.lineno))
1075 cause = " or ".join(causes)
1076 exits = set(ArcStart(xit.lineno, cause) for xit in exits)
1077 return exits
1078
1079 @contract(returns='ArcStarts')
1080 def _handle__TryExcept(self, node):
1081 # Python 2.7 uses separate TryExcept and TryFinally nodes. If we get
1082 # TryExcept, it means there was no finally, so fake it, and treat as
1083 # a general Try node.
1084 node.finalbody = []
1085 return self._handle__Try(node)
1086
1087 @contract(returns='ArcStarts')
1088 def _handle__TryFinally(self, node):
1089 # Python 2.7 uses separate TryExcept and TryFinally nodes. If we get
1090 # TryFinally, see if there's a TryExcept nested inside. If so, merge
1091 # them. Otherwise, fake fields to complete a Try node.
1092 node.handlers = []
1093 node.orelse = []
1094
1095 first = node.body[0]
1096 if first.__class__.__name__ == "TryExcept" and node.lineno == first.lineno:
1097 assert len(node.body) == 1
1098 node.body = first.body
1099 node.handlers = first.handlers
1100 node.orelse = first.orelse
1101
1102 return self._handle__Try(node)
1103
1104 @contract(returns='ArcStarts')
1105 def _handle__While(self, node):
1106 constant_test = self.is_constant_expr(node.test)
1107 start = to_top = self.line_for_node(node.test)
1108 if constant_test and (env.PY3 or constant_test == "Num"):
1109 to_top = self.line_for_node(node.body[0])
1110 self.block_stack.append(LoopBlock(start=to_top))
1111 from_start = ArcStart(start, cause="the condition on line {lineno} was never true")
1112 exits = self.add_body_arcs(node.body, from_start=from_start)
1113 for xit in exits:
1114 self.add_arc(xit.lineno, to_top, xit.cause)
1115 exits = set()
1116 my_block = self.block_stack.pop()
1117 exits.update(my_block.break_exits)
1118 from_start = ArcStart(start, cause="the condition on line {lineno} was never false")
1119 if node.orelse:
1120 else_exits = self.add_body_arcs(node.orelse, from_start=from_start)
1121 exits |= else_exits
1122 else:
1123 # No `else` clause: you can exit from the start.
1124 if not constant_test:
1125 exits.add(from_start)
1126 return exits
1127
1128 @contract(returns='ArcStarts')
1129 def _handle__With(self, node):
1130 start = self.line_for_node(node)
1131 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
1132 return exits
1133
1134 _handle__AsyncWith = _handle__With
1135
1136 def _code_object__Module(self, node):
1137 start = self.line_for_node(node)
1138 if node.body:
1139 exits = self.add_body_arcs(node.body, from_start=ArcStart(-start))
1140 for xit in exits:
1141 self.add_arc(xit.lineno, -start, xit.cause, "didn't exit the module")
1142 else:
1143 # Empty module.
1144 self.add_arc(-start, start)
1145 self.add_arc(start, -start)
1146
1147 def _code_object__FunctionDef(self, node):
1148 start = self.line_for_node(node)
1149 self.block_stack.append(FunctionBlock(start=start, name=node.name))
1150 exits = self.add_body_arcs(node.body, from_start=ArcStart(-start))
1151 self.process_return_exits(exits)
1152 self.block_stack.pop()
1153
1154 _code_object__AsyncFunctionDef = _code_object__FunctionDef
1155
1156 def _code_object__ClassDef(self, node):
1157 start = self.line_for_node(node)
1158 self.add_arc(-start, start)
1159 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
1160 for xit in exits:
1161 self.add_arc(
1162 xit.lineno, -start, xit.cause,
1163 "didn't exit the body of class '{0}'".format(node.name),
1164 )
1165
1166 def _make_oneline_code_method(noun): # pylint: disable=no-self-argument
1167 """A function to make methods for online callable _code_object__ methods."""
1168 def _code_object__oneline_callable(self, node):
1169 start = self.line_for_node(node)
1170 self.add_arc(-start, start, None, "didn't run the {0} on line {1}".format(noun, start))
1171 self.add_arc(
1172 start, -start, None,
1173 "didn't finish the {0} on line {1}".format(noun, start),
1174 )
1175 return _code_object__oneline_callable
1176
1177 _code_object__Lambda = _make_oneline_code_method("lambda")
1178 _code_object__GeneratorExp = _make_oneline_code_method("generator expression")
1179 _code_object__DictComp = _make_oneline_code_method("dictionary comprehension")
1180 _code_object__SetComp = _make_oneline_code_method("set comprehension")
1181 if env.PY3:
1182 _code_object__ListComp = _make_oneline_code_method("list comprehension")
1183
1184
1185 if AST_DUMP: # pragma: debugging
1186 # Code only used when dumping the AST for debugging.
1187
1188 SKIP_DUMP_FIELDS = ["ctx"]
1189
1190 def _is_simple_value(value):
1191 """Is `value` simple enough to be displayed on a single line?"""
1192 return (
1193 value in [None, [], (), {}, set()] or
1194 isinstance(value, (string_class, int, float))
1195 )
1196
1197 def ast_dump(node, depth=0):
1198 """Dump the AST for `node`.
1199
1200 This recursively walks the AST, printing a readable version.
1201
1202 """
1203 indent = " " * depth
1204 if not isinstance(node, ast.AST):
1205 print("{0}<{1} {2!r}>".format(indent, node.__class__.__name__, node))
1206 return
1207
1208 lineno = getattr(node, "lineno", None)
1209 if lineno is not None:
1210 linemark = " @ {0}".format(node.lineno)
1211 else:
1212 linemark = ""
1213 head = "{0}<{1}{2}".format(indent, node.__class__.__name__, linemark)
1214
1215 named_fields = [
1216 (name, value)
1217 for name, value in ast.iter_fields(node)
1218 if name not in SKIP_DUMP_FIELDS
1219 ]
1220 if not named_fields:
1221 print("{0}>".format(head))
1222 elif len(named_fields) == 1 and _is_simple_value(named_fields[0][1]):
1223 field_name, value = named_fields[0]
1224 print("{0} {1}: {2!r}>".format(head, field_name, value))
1225 else:
1226 print(head)
1227 if 0:
1228 print("{0}# mro: {1}".format(
1229 indent, ", ".join(c.__name__ for c in node.__class__.__mro__[1:]),
1230 ))
1231 next_indent = indent + " "
1232 for field_name, value in named_fields:
1233 prefix = "{0}{1}:".format(next_indent, field_name)
1234 if _is_simple_value(value):
1235 print("{0} {1!r}".format(prefix, value))
1236 elif isinstance(value, list):
1237 print("{0} [".format(prefix))
1238 for n in value:
1239 ast_dump(n, depth + 8)
1240 print("{0}]".format(next_indent))
1241 else:
1242 print(prefix)
1243 ast_dump(value, depth + 8)
1244
1245 print("{0}>".format(indent))

eric ide

mercurial