src/eric7/DebugClients/Python/coverage/parser.py

branch
eric7
changeset 9209
b99e7fd55fd3
parent 9099
0e511e0e94a3
child 9252
32dd11232e06
equal deleted inserted replaced
9208:3fc8dfeb6ebe 9209:b99e7fd55fd3
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4 """Code parsing for coverage.py."""
5
6 import ast
7 import collections
8 import os
9 import re
10 import token
11 import tokenize
12
13 from coverage import env
14 from coverage.bytecode import code_objects
15 from coverage.debug import short_stack
16 from coverage.exceptions import NoSource, NotPython, _StopEverything
17 from coverage.misc import contract, join_regex, new_contract, nice_pair, one_of
18 from coverage.phystokens import compile_unicode, generate_tokens, neuter_encoding_declaration
19
20
21 class PythonParser:
22 """Parse code to find executable lines, excluded lines, etc.
23
24 This information is all based on static analysis: no code execution is
25 involved.
26
27 """
28 @contract(text='unicode|None')
29 def __init__(self, text=None, filename=None, exclude=None):
30 """
31 Source can be provided as `text`, the text itself, or `filename`, from
32 which the text will be read. Excluded lines are those that match
33 `exclude`, a regex.
34
35 """
36 assert text or filename, "PythonParser needs either text or filename"
37 self.filename = filename or "<code>"
38 self.text = text
39 if not self.text:
40 from coverage.python import get_python_source
41 try:
42 self.text = get_python_source(self.filename)
43 except OSError as err:
44 raise NoSource(f"No source for code: '{self.filename}': {err}") from err
45
46 self.exclude = exclude
47
48 # The text lines of the parsed code.
49 self.lines = self.text.split('\n')
50
51 # The normalized line numbers of the statements in the code. Exclusions
52 # are taken into account, and statements are adjusted to their first
53 # lines.
54 self.statements = set()
55
56 # The normalized line numbers of the excluded lines in the code,
57 # adjusted to their first lines.
58 self.excluded = set()
59
60 # The raw_* attributes are only used in this class, and in
61 # lab/parser.py to show how this class is working.
62
63 # The line numbers that start statements, as reported by the line
64 # number table in the bytecode.
65 self.raw_statements = set()
66
67 # The raw line numbers of excluded lines of code, as marked by pragmas.
68 self.raw_excluded = set()
69
70 # The line numbers of docstring lines.
71 self.raw_docstrings = set()
72
73 # Internal detail, used by lab/parser.py.
74 self.show_tokens = False
75
76 # A dict mapping line numbers to lexical statement starts for
77 # multi-line statements.
78 self._multiline = {}
79
80 # Lazily-created arc data, and missing arc descriptions.
81 self._all_arcs = None
82 self._missing_arc_fragments = None
83
84 def lines_matching(self, *regexes):
85 """Find the lines matching one of a list of regexes.
86
87 Returns a set of line numbers, the lines that contain a match for one
88 of the regexes in `regexes`. The entire line needn't match, just a
89 part of it.
90
91 """
92 combined = join_regex(regexes)
93 regex_c = re.compile(combined)
94 matches = set()
95 for i, ltext in enumerate(self.lines, start=1):
96 if regex_c.search(ltext):
97 matches.add(i)
98 return matches
99
100 def _raw_parse(self):
101 """Parse the source to find the interesting facts about its lines.
102
103 A handful of attributes are updated.
104
105 """
106 # Find lines which match an exclusion pattern.
107 if self.exclude:
108 self.raw_excluded = self.lines_matching(self.exclude)
109
110 # Tokenize, to find excluded suites, to find docstrings, and to find
111 # multi-line statements.
112 indent = 0
113 exclude_indent = 0
114 excluding = False
115 excluding_decorators = False
116 prev_toktype = token.INDENT
117 first_line = None
118 empty = True
119 first_on_line = True
120 nesting = 0
121
122 tokgen = generate_tokens(self.text)
123 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
124 if self.show_tokens: # pragma: debugging
125 print("%10s %5s %-20r %r" % (
126 tokenize.tok_name.get(toktype, toktype),
127 nice_pair((slineno, elineno)), ttext, ltext
128 ))
129 if toktype == token.INDENT:
130 indent += 1
131 elif toktype == token.DEDENT:
132 indent -= 1
133 elif toktype == token.OP:
134 if ttext == ':' and nesting == 0:
135 should_exclude = (elineno in self.raw_excluded) or excluding_decorators
136 if not excluding and should_exclude:
137 # Start excluding a suite. We trigger off of the colon
138 # token so that the #pragma comment will be recognized on
139 # the same line as the colon.
140 self.raw_excluded.add(elineno)
141 exclude_indent = indent
142 excluding = True
143 excluding_decorators = False
144 elif ttext == '@' and first_on_line:
145 # A decorator.
146 if elineno in self.raw_excluded:
147 excluding_decorators = True
148 if excluding_decorators:
149 self.raw_excluded.add(elineno)
150 elif ttext in "([{":
151 nesting += 1
152 elif ttext in ")]}":
153 nesting -= 1
154 elif toktype == token.STRING and prev_toktype == token.INDENT:
155 # Strings that are first on an indented line are docstrings.
156 # (a trick from trace.py in the stdlib.) This works for
157 # 99.9999% of cases. For the rest (!) see:
158 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
159 self.raw_docstrings.update(range(slineno, elineno+1))
160 elif toktype == token.NEWLINE:
161 if first_line is not None and elineno != first_line:
162 # We're at the end of a line, and we've ended on a
163 # different line than the first line of the statement,
164 # so record a multi-line range.
165 for l in range(first_line, elineno+1):
166 self._multiline[l] = first_line
167 first_line = None
168 first_on_line = True
169
170 if ttext.strip() and toktype != tokenize.COMMENT:
171 # A non-whitespace token.
172 empty = False
173 if first_line is None:
174 # The token is not whitespace, and is the first in a
175 # statement.
176 first_line = slineno
177 # Check whether to end an excluded suite.
178 if excluding and indent <= exclude_indent:
179 excluding = False
180 if excluding:
181 self.raw_excluded.add(elineno)
182 first_on_line = False
183
184 prev_toktype = toktype
185
186 # Find the starts of the executable statements.
187 if not empty:
188 byte_parser = ByteParser(self.text, filename=self.filename)
189 self.raw_statements.update(byte_parser._find_statements())
190
191 # The first line of modules can lie and say 1 always, even if the first
192 # line of code is later. If so, map 1 to the actual first line of the
193 # module.
194 if env.PYBEHAVIOR.module_firstline_1 and self._multiline:
195 self._multiline[1] = min(self.raw_statements)
196
197 def first_line(self, line):
198 """Return the first line number of the statement including `line`."""
199 if line < 0:
200 line = -self._multiline.get(-line, -line)
201 else:
202 line = self._multiline.get(line, line)
203 return line
204
205 def first_lines(self, lines):
206 """Map the line numbers in `lines` to the correct first line of the
207 statement.
208
209 Returns a set of the first lines.
210
211 """
212 return {self.first_line(l) for l in lines}
213
214 def translate_lines(self, lines):
215 """Implement `FileReporter.translate_lines`."""
216 return self.first_lines(lines)
217
218 def translate_arcs(self, arcs):
219 """Implement `FileReporter.translate_arcs`."""
220 return [(self.first_line(a), self.first_line(b)) for (a, b) in arcs]
221
222 def parse_source(self):
223 """Parse source text to find executable lines, excluded lines, etc.
224
225 Sets the .excluded and .statements attributes, normalized to the first
226 line of multi-line statements.
227
228 """
229 try:
230 self._raw_parse()
231 except (tokenize.TokenError, IndentationError) as err:
232 if hasattr(err, "lineno"):
233 lineno = err.lineno # IndentationError
234 else:
235 lineno = err.args[1][0] # TokenError
236 raise NotPython(
237 f"Couldn't parse '{self.filename}' as Python source: " +
238 f"{err.args[0]!r} at line {lineno}"
239 ) from err
240
241 self.excluded = self.first_lines(self.raw_excluded)
242
243 ignore = self.excluded | self.raw_docstrings
244 starts = self.raw_statements - ignore
245 self.statements = self.first_lines(starts) - ignore
246
247 def arcs(self):
248 """Get information about the arcs available in the code.
249
250 Returns a set of line number pairs. Line numbers have been normalized
251 to the first line of multi-line statements.
252
253 """
254 if self._all_arcs is None:
255 self._analyze_ast()
256 return self._all_arcs
257
258 def _analyze_ast(self):
259 """Run the AstArcAnalyzer and save its results.
260
261 `_all_arcs` is the set of arcs in the code.
262
263 """
264 aaa = AstArcAnalyzer(self.text, self.raw_statements, self._multiline)
265 aaa.analyze()
266
267 self._all_arcs = set()
268 for l1, l2 in aaa.arcs:
269 fl1 = self.first_line(l1)
270 fl2 = self.first_line(l2)
271 if fl1 != fl2:
272 self._all_arcs.add((fl1, fl2))
273
274 self._missing_arc_fragments = aaa.missing_arc_fragments
275
276 def exit_counts(self):
277 """Get a count of exits from that each line.
278
279 Excluded lines are excluded.
280
281 """
282 exit_counts = collections.defaultdict(int)
283 for l1, l2 in self.arcs():
284 if l1 < 0:
285 # Don't ever report -1 as a line number
286 continue
287 if l1 in self.excluded:
288 # Don't report excluded lines as line numbers.
289 continue
290 if l2 in self.excluded:
291 # Arcs to excluded lines shouldn't count.
292 continue
293 exit_counts[l1] += 1
294
295 return exit_counts
296
297 def missing_arc_description(self, start, end, executed_arcs=None):
298 """Provide an English sentence describing a missing arc."""
299 if self._missing_arc_fragments is None:
300 self._analyze_ast()
301
302 actual_start = start
303
304 if (
305 executed_arcs and
306 end < 0 and end == -start and
307 (end, start) not in executed_arcs and
308 (end, start) in self._missing_arc_fragments
309 ):
310 # It's a one-line callable, and we never even started it,
311 # and we have a message about not starting it.
312 start, end = end, start
313
314 fragment_pairs = self._missing_arc_fragments.get((start, end), [(None, None)])
315
316 msgs = []
317 for smsg, emsg in fragment_pairs:
318 if emsg is None:
319 if end < 0:
320 # Hmm, maybe we have a one-line callable, let's check.
321 if (-end, end) in self._missing_arc_fragments:
322 return self.missing_arc_description(-end, end)
323 emsg = "didn't jump to the function exit"
324 else:
325 emsg = "didn't jump to line {lineno}"
326 emsg = emsg.format(lineno=end)
327
328 msg = f"line {actual_start} {emsg}"
329 if smsg is not None:
330 msg += f", because {smsg.format(lineno=actual_start)}"
331
332 msgs.append(msg)
333
334 return " or ".join(msgs)
335
336
337 class ByteParser:
338 """Parse bytecode to understand the structure of code."""
339
340 @contract(text='unicode')
341 def __init__(self, text, code=None, filename=None):
342 self.text = text
343 if code:
344 self.code = code
345 else:
346 try:
347 self.code = compile_unicode(text, filename, "exec")
348 except SyntaxError as synerr:
349 raise NotPython(
350 "Couldn't parse '%s' as Python source: '%s' at line %d" % (
351 filename, synerr.msg, synerr.lineno
352 )
353 ) from synerr
354
355 # Alternative Python implementations don't always provide all the
356 # attributes on code objects that we need to do the analysis.
357 for attr in ['co_lnotab', 'co_firstlineno']:
358 if not hasattr(self.code, attr):
359 raise _StopEverything( # pragma: only jython
360 "This implementation of Python doesn't support code analysis.\n" +
361 "Run coverage.py under another Python for this command."
362 )
363
364 def child_parsers(self):
365 """Iterate over all the code objects nested within this one.
366
367 The iteration includes `self` as its first value.
368
369 """
370 return (ByteParser(self.text, code=c) for c in code_objects(self.code))
371
372 def _line_numbers(self):
373 """Yield the line numbers possible in this code object.
374
375 Uses co_lnotab described in Python/compile.c to find the
376 line numbers. Produces a sequence: l0, l1, ...
377 """
378 if hasattr(self.code, "co_lines"):
379 for _, _, line in self.code.co_lines():
380 if line is not None:
381 yield line
382 else:
383 # Adapted from dis.py in the standard library.
384 byte_increments = self.code.co_lnotab[0::2]
385 line_increments = self.code.co_lnotab[1::2]
386
387 last_line_num = None
388 line_num = self.code.co_firstlineno
389 byte_num = 0
390 for byte_incr, line_incr in zip(byte_increments, line_increments):
391 if byte_incr:
392 if line_num != last_line_num:
393 yield line_num
394 last_line_num = line_num
395 byte_num += byte_incr
396 if env.PYBEHAVIOR.negative_lnotab and line_incr >= 0x80:
397 line_incr -= 0x100
398 line_num += line_incr
399 if line_num != last_line_num:
400 yield line_num
401
402 def _find_statements(self):
403 """Find the statements in `self.code`.
404
405 Produce a sequence of line numbers that start statements. Recurses
406 into all code objects reachable from `self.code`.
407
408 """
409 for bp in self.child_parsers():
410 # Get all of the lineno information from this code.
411 yield from bp._line_numbers()
412
413
414 #
415 # AST analysis
416 #
417
418 class BlockBase:
419 """
420 Blocks need to handle various exiting statements in their own ways.
421
422 All of these methods take a list of exits, and a callable `add_arc`
423 function that they can use to add arcs if needed. They return True if the
424 exits are handled, or False if the search should continue up the block
425 stack.
426 """
427 # pylint: disable=unused-argument
428 def process_break_exits(self, exits, add_arc):
429 """Process break exits."""
430 # Because break can only appear in loops, and most subclasses
431 # implement process_break_exits, this function is never reached.
432 raise AssertionError
433
434 def process_continue_exits(self, exits, add_arc):
435 """Process continue exits."""
436 # Because continue can only appear in loops, and most subclasses
437 # implement process_continue_exits, this function is never reached.
438 raise AssertionError
439
440 def process_raise_exits(self, exits, add_arc):
441 """Process raise exits."""
442 return False
443
444 def process_return_exits(self, exits, add_arc):
445 """Process return exits."""
446 return False
447
448
449 class LoopBlock(BlockBase):
450 """A block on the block stack representing a `for` or `while` loop."""
451 @contract(start=int)
452 def __init__(self, start):
453 # The line number where the loop starts.
454 self.start = start
455 # A set of ArcStarts, the arcs from break statements exiting this loop.
456 self.break_exits = set()
457
458 def process_break_exits(self, exits, add_arc):
459 self.break_exits.update(exits)
460 return True
461
462 def process_continue_exits(self, exits, add_arc):
463 for xit in exits:
464 add_arc(xit.lineno, self.start, xit.cause)
465 return True
466
467
468 class FunctionBlock(BlockBase):
469 """A block on the block stack representing a function definition."""
470 @contract(start=int, name=str)
471 def __init__(self, start, name):
472 # The line number where the function starts.
473 self.start = start
474 # The name of the function.
475 self.name = name
476
477 def process_raise_exits(self, exits, add_arc):
478 for xit in exits:
479 add_arc(
480 xit.lineno, -self.start, xit.cause,
481 f"didn't except from function {self.name!r}",
482 )
483 return True
484
485 def process_return_exits(self, exits, add_arc):
486 for xit in exits:
487 add_arc(
488 xit.lineno, -self.start, xit.cause,
489 f"didn't return from function {self.name!r}",
490 )
491 return True
492
493
494 class TryBlock(BlockBase):
495 """A block on the block stack representing a `try` block."""
496 @contract(handler_start='int|None', final_start='int|None')
497 def __init__(self, handler_start, final_start):
498 # The line number of the first "except" handler, if any.
499 self.handler_start = handler_start
500 # The line number of the "finally:" clause, if any.
501 self.final_start = final_start
502
503 # The ArcStarts for breaks/continues/returns/raises inside the "try:"
504 # that need to route through the "finally:" clause.
505 self.break_from = set()
506 self.continue_from = set()
507 self.raise_from = set()
508 self.return_from = set()
509
510 def process_break_exits(self, exits, add_arc):
511 if self.final_start is not None:
512 self.break_from.update(exits)
513 return True
514 return False
515
516 def process_continue_exits(self, exits, add_arc):
517 if self.final_start is not None:
518 self.continue_from.update(exits)
519 return True
520 return False
521
522 def process_raise_exits(self, exits, add_arc):
523 if self.handler_start is not None:
524 for xit in exits:
525 add_arc(xit.lineno, self.handler_start, xit.cause)
526 else:
527 assert self.final_start is not None
528 self.raise_from.update(exits)
529 return True
530
531 def process_return_exits(self, exits, add_arc):
532 if self.final_start is not None:
533 self.return_from.update(exits)
534 return True
535 return False
536
537
538 class WithBlock(BlockBase):
539 """A block on the block stack representing a `with` block."""
540 @contract(start=int)
541 def __init__(self, start):
542 # We only ever use this block if it is needed, so that we don't have to
543 # check this setting in all the methods.
544 assert env.PYBEHAVIOR.exit_through_with
545
546 # The line number of the with statement.
547 self.start = start
548
549 # The ArcStarts for breaks/continues/returns/raises inside the "with:"
550 # that need to go through the with-statement while exiting.
551 self.break_from = set()
552 self.continue_from = set()
553 self.return_from = set()
554
555 def _process_exits(self, exits, add_arc, from_set=None):
556 """Helper to process the four kinds of exits."""
557 for xit in exits:
558 add_arc(xit.lineno, self.start, xit.cause)
559 if from_set is not None:
560 from_set.update(exits)
561 return True
562
563 def process_break_exits(self, exits, add_arc):
564 return self._process_exits(exits, add_arc, self.break_from)
565
566 def process_continue_exits(self, exits, add_arc):
567 return self._process_exits(exits, add_arc, self.continue_from)
568
569 def process_raise_exits(self, exits, add_arc):
570 return self._process_exits(exits, add_arc)
571
572 def process_return_exits(self, exits, add_arc):
573 return self._process_exits(exits, add_arc, self.return_from)
574
575
576 class ArcStart(collections.namedtuple("Arc", "lineno, cause")):
577 """The information needed to start an arc.
578
579 `lineno` is the line number the arc starts from.
580
581 `cause` is an English text fragment used as the `startmsg` for
582 AstArcAnalyzer.missing_arc_fragments. It will be used to describe why an
583 arc wasn't executed, so should fit well into a sentence of the form,
584 "Line 17 didn't run because {cause}." The fragment can include "{lineno}"
585 to have `lineno` interpolated into it.
586
587 """
588 def __new__(cls, lineno, cause=None):
589 return super().__new__(cls, lineno, cause)
590
591
592 # Define contract words that PyContract doesn't have.
593 # ArcStarts is for a list or set of ArcStart's.
594 new_contract('ArcStarts', lambda seq: all(isinstance(x, ArcStart) for x in seq))
595
596
597 class NodeList:
598 """A synthetic fictitious node, containing a sequence of nodes.
599
600 This is used when collapsing optimized if-statements, to represent the
601 unconditional execution of one of the clauses.
602
603 """
604 def __init__(self, body):
605 self.body = body
606 self.lineno = body[0].lineno
607
608 # TODO: some add_arcs methods here don't add arcs, they return them. Rename them.
609 # TODO: the cause messages have too many commas.
610 # TODO: Shouldn't the cause messages join with "and" instead of "or"?
611
612 def ast_parse(text):
613 """How we create an AST parse."""
614 return ast.parse(neuter_encoding_declaration(text))
615
616
617 class AstArcAnalyzer:
618 """Analyze source text with an AST to find executable code paths."""
619
620 @contract(text='unicode', statements=set)
621 def __init__(self, text, statements, multiline):
622 self.root_node = ast_parse(text)
623 # TODO: I think this is happening in too many places.
624 self.statements = {multiline.get(l, l) for l in statements}
625 self.multiline = multiline
626
627 # Turn on AST dumps with an environment variable.
628 # $set_env.py: COVERAGE_AST_DUMP - Dump the AST nodes when parsing code.
629 dump_ast = bool(int(os.environ.get("COVERAGE_AST_DUMP", 0)))
630
631 if dump_ast: # pragma: debugging
632 # Dump the AST so that failing tests have helpful output.
633 print(f"Statements: {self.statements}")
634 print(f"Multiline map: {self.multiline}")
635 ast_dump(self.root_node)
636
637 self.arcs = set()
638
639 # A map from arc pairs to a list of pairs of sentence fragments:
640 # { (start, end): [(startmsg, endmsg), ...], }
641 #
642 # For an arc from line 17, they should be usable like:
643 # "Line 17 {endmsg}, because {startmsg}"
644 self.missing_arc_fragments = collections.defaultdict(list)
645 self.block_stack = []
646
647 # $set_env.py: COVERAGE_TRACK_ARCS - Trace possible arcs added while parsing code.
648 self.debug = bool(int(os.environ.get("COVERAGE_TRACK_ARCS", 0)))
649
650 def analyze(self):
651 """Examine the AST tree from `root_node` to determine possible arcs.
652
653 This sets the `arcs` attribute to be a set of (from, to) line number
654 pairs.
655
656 """
657 for node in ast.walk(self.root_node):
658 node_name = node.__class__.__name__
659 code_object_handler = getattr(self, "_code_object__" + node_name, None)
660 if code_object_handler is not None:
661 code_object_handler(node)
662
663 @contract(start=int, end=int)
664 def add_arc(self, start, end, smsg=None, emsg=None):
665 """Add an arc, including message fragments to use if it is missing."""
666 if self.debug: # pragma: debugging
667 print(f"\nAdding possible arc: ({start}, {end}): {smsg!r}, {emsg!r}")
668 print(short_stack(limit=10))
669 self.arcs.add((start, end))
670
671 if smsg is not None or emsg is not None:
672 self.missing_arc_fragments[(start, end)].append((smsg, emsg))
673
674 def nearest_blocks(self):
675 """Yield the blocks in nearest-to-farthest order."""
676 return reversed(self.block_stack)
677
678 @contract(returns=int)
679 def line_for_node(self, node):
680 """What is the right line number to use for this node?
681
682 This dispatches to _line__Node functions where needed.
683
684 """
685 node_name = node.__class__.__name__
686 handler = getattr(self, "_line__" + node_name, None)
687 if handler is not None:
688 return handler(node)
689 else:
690 return node.lineno
691
692 def _line_decorated(self, node):
693 """Compute first line number for things that can be decorated (classes and functions)."""
694 lineno = node.lineno
695 if env.PYBEHAVIOR.trace_decorated_def or env.PYBEHAVIOR.def_ast_no_decorator:
696 if node.decorator_list:
697 lineno = node.decorator_list[0].lineno
698 return lineno
699
700 def _line__Assign(self, node):
701 return self.line_for_node(node.value)
702
703 _line__ClassDef = _line_decorated
704
705 def _line__Dict(self, node):
706 if node.keys:
707 if node.keys[0] is not None:
708 return node.keys[0].lineno
709 else:
710 # Unpacked dict literals `{**{'a':1}}` have None as the key,
711 # use the value in that case.
712 return node.values[0].lineno
713 else:
714 return node.lineno
715
716 _line__FunctionDef = _line_decorated
717 _line__AsyncFunctionDef = _line_decorated
718
719 def _line__List(self, node):
720 if node.elts:
721 return self.line_for_node(node.elts[0])
722 else:
723 return node.lineno
724
725 def _line__Module(self, node):
726 if env.PYBEHAVIOR.module_firstline_1:
727 return 1
728 elif node.body:
729 return self.line_for_node(node.body[0])
730 else:
731 # Empty modules have no line number, they always start at 1.
732 return 1
733
734 # The node types that just flow to the next node with no complications.
735 OK_TO_DEFAULT = {
736 "AnnAssign", "Assign", "Assert", "AugAssign", "Delete", "Expr", "Global",
737 "Import", "ImportFrom", "Nonlocal", "Pass",
738 }
739
740 @contract(returns='ArcStarts')
741 def add_arcs(self, node):
742 """Add the arcs for `node`.
743
744 Return a set of ArcStarts, exits from this node to the next. Because a
745 node represents an entire sub-tree (including its children), the exits
746 from a node can be arbitrarily complex::
747
748 if something(1):
749 if other(2):
750 doit(3)
751 else:
752 doit(5)
753
754 There are two exits from line 1: they start at line 3 and line 5.
755
756 """
757 node_name = node.__class__.__name__
758 handler = getattr(self, "_handle__" + node_name, None)
759 if handler is not None:
760 return handler(node)
761 else:
762 # No handler: either it's something that's ok to default (a simple
763 # statement), or it's something we overlooked.
764 if env.TESTING:
765 if node_name not in self.OK_TO_DEFAULT:
766 raise Exception(f"*** Unhandled: {node}") # pragma: only failure
767
768 # Default for simple statements: one exit from this node.
769 return {ArcStart(self.line_for_node(node))}
770
771 @one_of("from_start, prev_starts")
772 @contract(returns='ArcStarts')
773 def add_body_arcs(self, body, from_start=None, prev_starts=None):
774 """Add arcs for the body of a compound statement.
775
776 `body` is the body node. `from_start` is a single `ArcStart` that can
777 be the previous line in flow before this body. `prev_starts` is a set
778 of ArcStarts that can be the previous line. Only one of them should be
779 given.
780
781 Returns a set of ArcStarts, the exits from this body.
782
783 """
784 if prev_starts is None:
785 prev_starts = {from_start}
786 for body_node in body:
787 lineno = self.line_for_node(body_node)
788 first_line = self.multiline.get(lineno, lineno)
789 if first_line not in self.statements:
790 body_node = self.find_non_missing_node(body_node)
791 if body_node is None:
792 continue
793 lineno = self.line_for_node(body_node)
794 for prev_start in prev_starts:
795 self.add_arc(prev_start.lineno, lineno, prev_start.cause)
796 prev_starts = self.add_arcs(body_node)
797 return prev_starts
798
799 def find_non_missing_node(self, node):
800 """Search `node` looking for a child that has not been optimized away.
801
802 This might return the node you started with, or it will work recursively
803 to find a child node in self.statements.
804
805 Returns a node, or None if none of the node remains.
806
807 """
808 # This repeats work just done in add_body_arcs, but this duplication
809 # means we can avoid a function call in the 99.9999% case of not
810 # optimizing away statements.
811 lineno = self.line_for_node(node)
812 first_line = self.multiline.get(lineno, lineno)
813 if first_line in self.statements:
814 return node
815
816 missing_fn = getattr(self, "_missing__" + node.__class__.__name__, None)
817 if missing_fn:
818 node = missing_fn(node)
819 else:
820 node = None
821 return node
822
823 # Missing nodes: _missing__*
824 #
825 # Entire statements can be optimized away by Python. They will appear in
826 # the AST, but not the bytecode. These functions are called (by
827 # find_non_missing_node) to find a node to use instead of the missing
828 # node. They can return None if the node should truly be gone.
829
830 def _missing__If(self, node):
831 # If the if-node is missing, then one of its children might still be
832 # here, but not both. So return the first of the two that isn't missing.
833 # Use a NodeList to hold the clauses as a single node.
834 non_missing = self.find_non_missing_node(NodeList(node.body))
835 if non_missing:
836 return non_missing
837 if node.orelse:
838 return self.find_non_missing_node(NodeList(node.orelse))
839 return None
840
841 def _missing__NodeList(self, node):
842 # A NodeList might be a mixture of missing and present nodes. Find the
843 # ones that are present.
844 non_missing_children = []
845 for child in node.body:
846 child = self.find_non_missing_node(child)
847 if child is not None:
848 non_missing_children.append(child)
849
850 # Return the simplest representation of the present children.
851 if not non_missing_children:
852 return None
853 if len(non_missing_children) == 1:
854 return non_missing_children[0]
855 return NodeList(non_missing_children)
856
857 def _missing__While(self, node):
858 body_nodes = self.find_non_missing_node(NodeList(node.body))
859 if not body_nodes:
860 return None
861 # Make a synthetic While-true node.
862 new_while = ast.While()
863 new_while.lineno = body_nodes.lineno
864 new_while.test = ast.Name()
865 new_while.test.lineno = body_nodes.lineno
866 new_while.test.id = "True"
867 new_while.body = body_nodes.body
868 new_while.orelse = None
869 return new_while
870
871 def is_constant_expr(self, node):
872 """Is this a compile-time constant?"""
873 node_name = node.__class__.__name__
874 if node_name in ["Constant", "NameConstant", "Num"]:
875 return "Num"
876 elif node_name == "Name":
877 if node.id in ["True", "False", "None", "__debug__"]:
878 return "Name"
879 return None
880
881 # In the fullness of time, these might be good tests to write:
882 # while EXPR:
883 # while False:
884 # listcomps hidden deep in other expressions
885 # listcomps hidden in lists: x = [[i for i in range(10)]]
886 # nested function definitions
887
888
889 # Exit processing: process_*_exits
890 #
891 # These functions process the four kinds of jump exits: break, continue,
892 # raise, and return. To figure out where an exit goes, we have to look at
893 # the block stack context. For example, a break will jump to the nearest
894 # enclosing loop block, or the nearest enclosing finally block, whichever
895 # is nearer.
896
897 @contract(exits='ArcStarts')
898 def process_break_exits(self, exits):
899 """Add arcs due to jumps from `exits` being breaks."""
900 for block in self.nearest_blocks(): # pragma: always breaks
901 if block.process_break_exits(exits, self.add_arc):
902 break
903
904 @contract(exits='ArcStarts')
905 def process_continue_exits(self, exits):
906 """Add arcs due to jumps from `exits` being continues."""
907 for block in self.nearest_blocks(): # pragma: always breaks
908 if block.process_continue_exits(exits, self.add_arc):
909 break
910
911 @contract(exits='ArcStarts')
912 def process_raise_exits(self, exits):
913 """Add arcs due to jumps from `exits` being raises."""
914 for block in self.nearest_blocks():
915 if block.process_raise_exits(exits, self.add_arc):
916 break
917
918 @contract(exits='ArcStarts')
919 def process_return_exits(self, exits):
920 """Add arcs due to jumps from `exits` being returns."""
921 for block in self.nearest_blocks(): # pragma: always breaks
922 if block.process_return_exits(exits, self.add_arc):
923 break
924
925 # Handlers: _handle__*
926 #
927 # Each handler deals with a specific AST node type, dispatched from
928 # add_arcs. Handlers return the set of exits from that node, and can
929 # also call self.add_arc to record arcs they find. These functions mirror
930 # the Python semantics of each syntactic construct. See the docstring
931 # for add_arcs to understand the concept of exits from a node.
932 #
933 # Every node type that represents a statement should have a handler, or it
934 # should be listed in OK_TO_DEFAULT.
935
936 @contract(returns='ArcStarts')
937 def _handle__Break(self, node):
938 here = self.line_for_node(node)
939 break_start = ArcStart(here, cause="the break on line {lineno} wasn't executed")
940 self.process_break_exits([break_start])
941 return set()
942
943 @contract(returns='ArcStarts')
944 def _handle_decorated(self, node):
945 """Add arcs for things that can be decorated (classes and functions)."""
946 main_line = last = node.lineno
947 decs = node.decorator_list
948 if decs:
949 if env.PYBEHAVIOR.trace_decorated_def or env.PYBEHAVIOR.def_ast_no_decorator:
950 last = None
951 for dec_node in decs:
952 dec_start = self.line_for_node(dec_node)
953 if last is not None and dec_start != last:
954 self.add_arc(last, dec_start)
955 last = dec_start
956 if env.PYBEHAVIOR.trace_decorated_def:
957 self.add_arc(last, main_line)
958 last = main_line
959 if env.PYBEHAVIOR.trace_decorator_line_again:
960 for top, bot in zip(decs, decs[1:]):
961 self.add_arc(self.line_for_node(bot), self.line_for_node(top))
962 self.add_arc(self.line_for_node(decs[0]), main_line)
963 self.add_arc(main_line, self.line_for_node(decs[-1]))
964 # The definition line may have been missed, but we should have it
965 # in `self.statements`. For some constructs, `line_for_node` is
966 # not what we'd think of as the first line in the statement, so map
967 # it to the first one.
968 if node.body:
969 body_start = self.line_for_node(node.body[0])
970 body_start = self.multiline.get(body_start, body_start)
971 for lineno in range(last+1, body_start):
972 if lineno in self.statements:
973 self.add_arc(last, lineno)
974 last = lineno
975 # The body is handled in collect_arcs.
976 return {ArcStart(last)}
977
978 _handle__ClassDef = _handle_decorated
979
980 @contract(returns='ArcStarts')
981 def _handle__Continue(self, node):
982 here = self.line_for_node(node)
983 continue_start = ArcStart(here, cause="the continue on line {lineno} wasn't executed")
984 self.process_continue_exits([continue_start])
985 return set()
986
987 @contract(returns='ArcStarts')
988 def _handle__For(self, node):
989 start = self.line_for_node(node.iter)
990 self.block_stack.append(LoopBlock(start=start))
991 from_start = ArcStart(start, cause="the loop on line {lineno} never started")
992 exits = self.add_body_arcs(node.body, from_start=from_start)
993 # Any exit from the body will go back to the top of the loop.
994 for xit in exits:
995 self.add_arc(xit.lineno, start, xit.cause)
996 my_block = self.block_stack.pop()
997 exits = my_block.break_exits
998 from_start = ArcStart(start, cause="the loop on line {lineno} didn't complete")
999 if node.orelse:
1000 else_exits = self.add_body_arcs(node.orelse, from_start=from_start)
1001 exits |= else_exits
1002 else:
1003 # No else clause: exit from the for line.
1004 exits.add(from_start)
1005 return exits
1006
1007 _handle__AsyncFor = _handle__For
1008
1009 _handle__FunctionDef = _handle_decorated
1010 _handle__AsyncFunctionDef = _handle_decorated
1011
1012 @contract(returns='ArcStarts')
1013 def _handle__If(self, node):
1014 start = self.line_for_node(node.test)
1015 from_start = ArcStart(start, cause="the condition on line {lineno} was never true")
1016 exits = self.add_body_arcs(node.body, from_start=from_start)
1017 from_start = ArcStart(start, cause="the condition on line {lineno} was never false")
1018 exits |= self.add_body_arcs(node.orelse, from_start=from_start)
1019 return exits
1020
1021 @contract(returns='ArcStarts')
1022 def _handle__Match(self, node):
1023 start = self.line_for_node(node)
1024 last_start = start
1025 exits = set()
1026 had_wildcard = False
1027 for case in node.cases:
1028 case_start = self.line_for_node(case.pattern)
1029 if isinstance(case.pattern, ast.MatchAs):
1030 had_wildcard = True
1031 self.add_arc(last_start, case_start, "the pattern on line {lineno} always matched")
1032 from_start = ArcStart(case_start, cause="the pattern on line {lineno} never matched")
1033 exits |= self.add_body_arcs(case.body, from_start=from_start)
1034 last_start = case_start
1035 if not had_wildcard:
1036 exits.add(from_start)
1037 return exits
1038
1039 @contract(returns='ArcStarts')
1040 def _handle__NodeList(self, node):
1041 start = self.line_for_node(node)
1042 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
1043 return exits
1044
1045 @contract(returns='ArcStarts')
1046 def _handle__Raise(self, node):
1047 here = self.line_for_node(node)
1048 raise_start = ArcStart(here, cause="the raise on line {lineno} wasn't executed")
1049 self.process_raise_exits([raise_start])
1050 # `raise` statement jumps away, no exits from here.
1051 return set()
1052
1053 @contract(returns='ArcStarts')
1054 def _handle__Return(self, node):
1055 here = self.line_for_node(node)
1056 return_start = ArcStart(here, cause="the return on line {lineno} wasn't executed")
1057 self.process_return_exits([return_start])
1058 # `return` statement jumps away, no exits from here.
1059 return set()
1060
1061 @contract(returns='ArcStarts')
1062 def _handle__Try(self, node):
1063 if node.handlers:
1064 handler_start = self.line_for_node(node.handlers[0])
1065 else:
1066 handler_start = None
1067
1068 if node.finalbody:
1069 final_start = self.line_for_node(node.finalbody[0])
1070 else:
1071 final_start = None
1072
1073 # This is true by virtue of Python syntax: have to have either except
1074 # or finally, or both.
1075 assert handler_start is not None or final_start is not None
1076 try_block = TryBlock(handler_start, final_start)
1077 self.block_stack.append(try_block)
1078
1079 start = self.line_for_node(node)
1080 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
1081
1082 # We're done with the `try` body, so this block no longer handles
1083 # exceptions. We keep the block so the `finally` clause can pick up
1084 # flows from the handlers and `else` clause.
1085 if node.finalbody:
1086 try_block.handler_start = None
1087 if node.handlers:
1088 # If there are `except` clauses, then raises in the try body
1089 # will already jump to them. Start this set over for raises in
1090 # `except` and `else`.
1091 try_block.raise_from = set()
1092 else:
1093 self.block_stack.pop()
1094
1095 handler_exits = set()
1096
1097 if node.handlers:
1098 last_handler_start = None
1099 for handler_node in node.handlers:
1100 handler_start = self.line_for_node(handler_node)
1101 if last_handler_start is not None:
1102 self.add_arc(last_handler_start, handler_start)
1103 last_handler_start = handler_start
1104 from_cause = "the exception caught by line {lineno} didn't happen"
1105 from_start = ArcStart(handler_start, cause=from_cause)
1106 handler_exits |= self.add_body_arcs(handler_node.body, from_start=from_start)
1107
1108 if node.orelse:
1109 exits = self.add_body_arcs(node.orelse, prev_starts=exits)
1110
1111 exits |= handler_exits
1112
1113 if node.finalbody:
1114 self.block_stack.pop()
1115 final_from = ( # You can get to the `finally` clause from:
1116 exits | # the exits of the body or `else` clause,
1117 try_block.break_from | # or a `break`,
1118 try_block.continue_from | # or a `continue`,
1119 try_block.raise_from | # or a `raise`,
1120 try_block.return_from # or a `return`.
1121 )
1122
1123 final_exits = self.add_body_arcs(node.finalbody, prev_starts=final_from)
1124
1125 if try_block.break_from:
1126 if env.PYBEHAVIOR.finally_jumps_back:
1127 for break_line in try_block.break_from:
1128 lineno = break_line.lineno
1129 cause = break_line.cause.format(lineno=lineno)
1130 for final_exit in final_exits:
1131 self.add_arc(final_exit.lineno, lineno, cause)
1132 breaks = try_block.break_from
1133 else:
1134 breaks = self._combine_finally_starts(try_block.break_from, final_exits)
1135 self.process_break_exits(breaks)
1136
1137 if try_block.continue_from:
1138 if env.PYBEHAVIOR.finally_jumps_back:
1139 for continue_line in try_block.continue_from:
1140 lineno = continue_line.lineno
1141 cause = continue_line.cause.format(lineno=lineno)
1142 for final_exit in final_exits:
1143 self.add_arc(final_exit.lineno, lineno, cause)
1144 continues = try_block.continue_from
1145 else:
1146 continues = self._combine_finally_starts(try_block.continue_from, final_exits)
1147 self.process_continue_exits(continues)
1148
1149 if try_block.raise_from:
1150 self.process_raise_exits(
1151 self._combine_finally_starts(try_block.raise_from, final_exits)
1152 )
1153
1154 if try_block.return_from:
1155 if env.PYBEHAVIOR.finally_jumps_back:
1156 for return_line in try_block.return_from:
1157 lineno = return_line.lineno
1158 cause = return_line.cause.format(lineno=lineno)
1159 for final_exit in final_exits:
1160 self.add_arc(final_exit.lineno, lineno, cause)
1161 returns = try_block.return_from
1162 else:
1163 returns = self._combine_finally_starts(try_block.return_from, final_exits)
1164 self.process_return_exits(returns)
1165
1166 if exits:
1167 # The finally clause's exits are only exits for the try block
1168 # as a whole if the try block had some exits to begin with.
1169 exits = final_exits
1170
1171 return exits
1172
1173 @contract(starts='ArcStarts', exits='ArcStarts', returns='ArcStarts')
1174 def _combine_finally_starts(self, starts, exits):
1175 """Helper for building the cause of `finally` branches.
1176
1177 "finally" clauses might not execute their exits, and the causes could
1178 be due to a failure to execute any of the exits in the try block. So
1179 we use the causes from `starts` as the causes for `exits`.
1180 """
1181 causes = []
1182 for start in sorted(starts):
1183 if start.cause is not None:
1184 causes.append(start.cause.format(lineno=start.lineno))
1185 cause = " or ".join(causes)
1186 exits = {ArcStart(xit.lineno, cause) for xit in exits}
1187 return exits
1188
1189 @contract(returns='ArcStarts')
1190 def _handle__While(self, node):
1191 start = to_top = self.line_for_node(node.test)
1192 constant_test = self.is_constant_expr(node.test)
1193 top_is_body0 = False
1194 if constant_test:
1195 top_is_body0 = True
1196 if env.PYBEHAVIOR.keep_constant_test:
1197 top_is_body0 = False
1198 if top_is_body0:
1199 to_top = self.line_for_node(node.body[0])
1200 self.block_stack.append(LoopBlock(start=to_top))
1201 from_start = ArcStart(start, cause="the condition on line {lineno} was never true")
1202 exits = self.add_body_arcs(node.body, from_start=from_start)
1203 for xit in exits:
1204 self.add_arc(xit.lineno, to_top, xit.cause)
1205 exits = set()
1206 my_block = self.block_stack.pop()
1207 exits.update(my_block.break_exits)
1208 from_start = ArcStart(start, cause="the condition on line {lineno} was never false")
1209 if node.orelse:
1210 else_exits = self.add_body_arcs(node.orelse, from_start=from_start)
1211 exits |= else_exits
1212 else:
1213 # No `else` clause: you can exit from the start.
1214 if not constant_test:
1215 exits.add(from_start)
1216 return exits
1217
1218 @contract(returns='ArcStarts')
1219 def _handle__With(self, node):
1220 start = self.line_for_node(node)
1221 if env.PYBEHAVIOR.exit_through_with:
1222 self.block_stack.append(WithBlock(start=start))
1223 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
1224 if env.PYBEHAVIOR.exit_through_with:
1225 with_block = self.block_stack.pop()
1226 with_exit = {ArcStart(start)}
1227 if exits:
1228 for xit in exits:
1229 self.add_arc(xit.lineno, start)
1230 exits = with_exit
1231 if with_block.break_from:
1232 self.process_break_exits(
1233 self._combine_finally_starts(with_block.break_from, with_exit)
1234 )
1235 if with_block.continue_from:
1236 self.process_continue_exits(
1237 self._combine_finally_starts(with_block.continue_from, with_exit)
1238 )
1239 if with_block.return_from:
1240 self.process_return_exits(
1241 self._combine_finally_starts(with_block.return_from, with_exit)
1242 )
1243 return exits
1244
1245 _handle__AsyncWith = _handle__With
1246
1247 # Code object dispatchers: _code_object__*
1248 #
1249 # These methods are used by analyze() as the start of the analysis.
1250 # There is one for each construct with a code object.
1251
1252 def _code_object__Module(self, node):
1253 start = self.line_for_node(node)
1254 if node.body:
1255 exits = self.add_body_arcs(node.body, from_start=ArcStart(-start))
1256 for xit in exits:
1257 self.add_arc(xit.lineno, -start, xit.cause, "didn't exit the module")
1258 else:
1259 # Empty module.
1260 self.add_arc(-start, start)
1261 self.add_arc(start, -start)
1262
1263 def _code_object__FunctionDef(self, node):
1264 start = self.line_for_node(node)
1265 self.block_stack.append(FunctionBlock(start=start, name=node.name))
1266 exits = self.add_body_arcs(node.body, from_start=ArcStart(-start))
1267 self.process_return_exits(exits)
1268 self.block_stack.pop()
1269
1270 _code_object__AsyncFunctionDef = _code_object__FunctionDef
1271
1272 def _code_object__ClassDef(self, node):
1273 start = self.line_for_node(node)
1274 self.add_arc(-start, start)
1275 exits = self.add_body_arcs(node.body, from_start=ArcStart(start))
1276 for xit in exits:
1277 self.add_arc(
1278 xit.lineno, -start, xit.cause,
1279 f"didn't exit the body of class {node.name!r}",
1280 )
1281
1282 def _make_expression_code_method(noun): # pylint: disable=no-self-argument
1283 """A function to make methods for expression-based callable _code_object__ methods."""
1284 def _code_object__expression_callable(self, node):
1285 start = self.line_for_node(node)
1286 self.add_arc(-start, start, None, f"didn't run the {noun} on line {start}")
1287 self.add_arc(start, -start, None, f"didn't finish the {noun} on line {start}")
1288 return _code_object__expression_callable
1289
1290 # pylint: disable=too-many-function-args
1291 _code_object__Lambda = _make_expression_code_method("lambda")
1292 _code_object__GeneratorExp = _make_expression_code_method("generator expression")
1293 _code_object__DictComp = _make_expression_code_method("dictionary comprehension")
1294 _code_object__SetComp = _make_expression_code_method("set comprehension")
1295 _code_object__ListComp = _make_expression_code_method("list comprehension")
1296
1297
1298 # Code only used when dumping the AST for debugging.
1299
1300 SKIP_DUMP_FIELDS = ["ctx"]
1301
1302 def _is_simple_value(value):
1303 """Is `value` simple enough to be displayed on a single line?"""
1304 return (
1305 value in [None, [], (), {}, set()] or
1306 isinstance(value, (bytes, int, float, str))
1307 )
1308
1309 def ast_dump(node, depth=0, print=print): # pylint: disable=redefined-builtin
1310 """Dump the AST for `node`.
1311
1312 This recursively walks the AST, printing a readable version.
1313
1314 """
1315 indent = " " * depth
1316 lineno = getattr(node, "lineno", None)
1317 if lineno is not None:
1318 linemark = f" @ {node.lineno},{node.col_offset}"
1319 if hasattr(node, "end_lineno"):
1320 linemark += ":"
1321 if node.end_lineno != node.lineno:
1322 linemark += f"{node.end_lineno},"
1323 linemark += f"{node.end_col_offset}"
1324 else:
1325 linemark = ""
1326 head = f"{indent}<{node.__class__.__name__}{linemark}"
1327
1328 named_fields = [
1329 (name, value)
1330 for name, value in ast.iter_fields(node)
1331 if name not in SKIP_DUMP_FIELDS
1332 ]
1333 if not named_fields:
1334 print(f"{head}>")
1335 elif len(named_fields) == 1 and _is_simple_value(named_fields[0][1]):
1336 field_name, value = named_fields[0]
1337 print(f"{head} {field_name}: {value!r}>")
1338 else:
1339 print(head)
1340 if 0:
1341 print("{}# mro: {}".format(
1342 indent, ", ".join(c.__name__ for c in node.__class__.__mro__[1:]),
1343 ))
1344 next_indent = indent + " "
1345 for field_name, value in named_fields:
1346 prefix = f"{next_indent}{field_name}:"
1347 if _is_simple_value(value):
1348 print(f"{prefix} {value!r}")
1349 elif isinstance(value, list):
1350 print(f"{prefix} [")
1351 for n in value:
1352 if _is_simple_value(n):
1353 print(f"{next_indent} {n!r}")
1354 else:
1355 ast_dump(n, depth + 8, print=print)
1356 print(f"{next_indent}]")
1357 else:
1358 print(prefix)
1359 ast_dump(value, depth + 8, print=print)
1360
1361 print(f"{indent}>")

eric ide

mercurial