--- a/eric7/DebugClients/Python/coverage/parser.py Fri Nov 19 19:28:47 2021 +0100 +++ b/eric7/DebugClients/Python/coverage/parser.py Sat Nov 20 16:47:38 2021 +0100 @@ -11,16 +11,14 @@ import tokenize from coverage import env -from coverage.backward import range # pylint: disable=redefined-builtin -from coverage.backward import bytes_to_ints, string_class from coverage.bytecode import code_objects from coverage.debug import short_stack +from coverage.exceptions import NoSource, NotPython, StopEverything from coverage.misc import contract, join_regex, new_contract, nice_pair, one_of -from coverage.misc import NoSource, NotPython, StopEverything from coverage.phystokens import compile_unicode, generate_tokens, neuter_encoding_declaration -class PythonParser(object): +class PythonParser: """Parse code to find executable lines, excluded lines, etc. This information is all based on static analysis: no code execution is @@ -42,10 +40,8 @@ from coverage.python import get_python_source try: self.text = get_python_source(self.filename) - except IOError as err: - raise NoSource( - "No source for code: '%s': %s" % (self.filename, err) - ) + except OSError as err: + raise NoSource(f"No source for code: '{self.filename}': {err}") from err self.exclude = exclude @@ -84,18 +80,10 @@ # multi-line statements. self._multiline = {} - # Lazily-created ByteParser, arc data, and missing arc descriptions. - self._byte_parser = None + # Lazily-created arc data, and missing arc descriptions. self._all_arcs = None self._missing_arc_fragments = None - @property - def byte_parser(self): - """Create a ByteParser on demand.""" - if not self._byte_parser: - self._byte_parser = ByteParser(self.text, filename=self.filename) - return self._byte_parser - def lines_matching(self, *regexes): """Find the lines matching one of a list of regexes. @@ -105,8 +93,6 @@ """ combined = join_regex(regexes) - if env.PY2: - combined = combined.decode("utf8") regex_c = re.compile(combined) matches = set() for i, ltext in enumerate(self.lines, start=1): @@ -203,7 +189,8 @@ # Find the starts of the executable statements. if not empty: - self.raw_statements.update(self.byte_parser._find_statements()) + byte_parser = ByteParser(self.text, filename=self.filename) + self.raw_statements.update(byte_parser._find_statements()) # The first line of modules can lie and say 1 always, even if the first # line of code is later. If so, map 1 to the actual first line of the @@ -251,10 +238,9 @@ else: lineno = err.args[1][0] # TokenError raise NotPython( - u"Couldn't parse '%s' as Python source: '%s' at line %d" % ( - self.filename, err.args[0], lineno - ) - ) + f"Couldn't parse '{self.filename}' as Python source: " + + f"{err.args[0]!r} at line {lineno}" + ) from err self.excluded = self.first_lines(self.raw_excluded) @@ -349,16 +335,16 @@ emsg = "didn't jump to line {lineno}" emsg = emsg.format(lineno=end) - msg = "line {start} {emsg}".format(start=actual_start, emsg=emsg) + msg = f"line {actual_start} {emsg}" if smsg is not None: - msg += ", because {smsg}".format(smsg=smsg.format(lineno=actual_start)) + msg += f", because {smsg.format(lineno=actual_start)}" msgs.append(msg) return " or ".join(msgs) -class ByteParser(object): +class ByteParser: """Parse bytecode to understand the structure of code.""" @contract(text='unicode') @@ -371,17 +357,17 @@ self.code = compile_unicode(text, filename, "exec") except SyntaxError as synerr: raise NotPython( - u"Couldn't parse '%s' as Python source: '%s' at line %d" % ( + "Couldn't parse '%s' as Python source: '%s' at line %d" % ( filename, synerr.msg, synerr.lineno ) - ) + ) from synerr # Alternative Python implementations don't always provide all the # attributes on code objects that we need to do the analysis. for attr in ['co_lnotab', 'co_firstlineno']: if not hasattr(self.code, attr): raise StopEverything( # pragma: only jython - "This implementation of Python doesn't support code analysis.\n" + "This implementation of Python doesn't support code analysis.\n" + "Run coverage.py under another Python for this command." ) @@ -405,8 +391,8 @@ yield line else: # Adapted from dis.py in the standard library. - byte_increments = bytes_to_ints(self.code.co_lnotab[0::2]) - line_increments = bytes_to_ints(self.code.co_lnotab[1::2]) + byte_increments = self.code.co_lnotab[0::2] + line_increments = self.code.co_lnotab[1::2] last_line_num = None line_num = self.code.co_firstlineno @@ -432,15 +418,45 @@ """ for bp in self.child_parsers(): # Get all of the lineno information from this code. - for l in bp._line_numbers(): - yield l + yield from bp._line_numbers() # # AST analysis # -class LoopBlock(object): +class BlockBase: + """ + Blocks need to handle various exiting statements in their own ways. + + All of these methods take a list of exits, and a callable `add_arc` + function that they can use to add arcs if needed. They return True if the + exits are handled, or False if the search should continue up the block + stack. + """ + # pylint: disable=unused-argument + def process_break_exits(self, exits, add_arc): + """Process break exits.""" + # Because break can only appear in loops, and most subclasses + # implement process_break_exits, this function is never reached. + raise AssertionError + + def process_continue_exits(self, exits, add_arc): + """Process continue exits.""" + # Because continue can only appear in loops, and most subclasses + # implement process_continue_exits, this function is never reached. + raise AssertionError + + def process_raise_exits(self, exits, add_arc): + """Process raise exits.""" + return False + + def process_return_exits(self, exits, add_arc): + """Process return exits.""" + return False + + +class LoopBlock(BlockBase): """A block on the block stack representing a `for` or `while` loop.""" @contract(start=int) def __init__(self, start): @@ -449,8 +465,17 @@ # A set of ArcStarts, the arcs from break statements exiting this loop. self.break_exits = set() + def process_break_exits(self, exits, add_arc): + self.break_exits.update(exits) + return True -class FunctionBlock(object): + def process_continue_exits(self, exits, add_arc): + for xit in exits: + add_arc(xit.lineno, self.start, xit.cause) + return True + + +class FunctionBlock(BlockBase): """A block on the block stack representing a function definition.""" @contract(start=int, name=str) def __init__(self, start, name): @@ -459,8 +484,24 @@ # The name of the function. self.name = name + def process_raise_exits(self, exits, add_arc): + for xit in exits: + add_arc( + xit.lineno, -self.start, xit.cause, + f"didn't except from function {self.name!r}", + ) + return True -class TryBlock(object): + def process_return_exits(self, exits, add_arc): + for xit in exits: + add_arc( + xit.lineno, -self.start, xit.cause, + f"didn't return from function {self.name!r}", + ) + return True + + +class TryBlock(BlockBase): """A block on the block stack representing a `try` block.""" @contract(handler_start='int|None', final_start='int|None') def __init__(self, handler_start, final_start): @@ -473,8 +514,73 @@ # that need to route through the "finally:" clause. self.break_from = set() self.continue_from = set() + self.raise_from = set() self.return_from = set() - self.raise_from = set() + + def process_break_exits(self, exits, add_arc): + if self.final_start is not None: + self.break_from.update(exits) + return True + return False + + def process_continue_exits(self, exits, add_arc): + if self.final_start is not None: + self.continue_from.update(exits) + return True + return False + + def process_raise_exits(self, exits, add_arc): + if self.handler_start is not None: + for xit in exits: + add_arc(xit.lineno, self.handler_start, xit.cause) + else: + assert self.final_start is not None + self.raise_from.update(exits) + return True + + def process_return_exits(self, exits, add_arc): + if self.final_start is not None: + self.return_from.update(exits) + return True + return False + + +class WithBlock(BlockBase): + """A block on the block stack representing a `with` block.""" + @contract(start=int) + def __init__(self, start): + # We only ever use this block if it is needed, so that we don't have to + # check this setting in all the methods. + assert env.PYBEHAVIOR.exit_through_with + + # The line number of the with statement. + self.start = start + + # The ArcStarts for breaks/continues/returns/raises inside the "with:" + # that need to go through the with-statement while exiting. + self.break_from = set() + self.continue_from = set() + self.return_from = set() + + def _process_exits(self, exits, add_arc, from_set=None): + """Helper to process the four kinds of exits.""" + for xit in exits: + add_arc(xit.lineno, self.start, xit.cause) + if from_set is not None: + from_set.update(exits) + return True + + def process_break_exits(self, exits, add_arc): + return self._process_exits(exits, add_arc, self.break_from) + + def process_continue_exits(self, exits, add_arc): + return self._process_exits(exits, add_arc, self.continue_from) + + def process_raise_exits(self, exits, add_arc): + return self._process_exits(exits, add_arc) + + def process_return_exits(self, exits, add_arc): + return self._process_exits(exits, add_arc, self.return_from) class ArcStart(collections.namedtuple("Arc", "lineno, cause")): @@ -490,7 +596,7 @@ """ def __new__(cls, lineno, cause=None): - return super(ArcStart, cls).__new__(cls, lineno, cause) + return super().__new__(cls, lineno, cause) # Define contract words that PyContract doesn't have. @@ -498,11 +604,7 @@ new_contract('ArcStarts', lambda seq: all(isinstance(x, ArcStart) for x in seq)) -# Turn on AST dumps with an environment variable. -# $set_env.py: COVERAGE_AST_DUMP - Dump the AST nodes when parsing code. -AST_DUMP = bool(int(os.environ.get("COVERAGE_AST_DUMP", 0))) - -class NodeList(object): +class NodeList: """A synthetic fictitious node, containing a sequence of nodes. This is used when collapsing optimized if-statements, to represent the @@ -513,25 +615,33 @@ self.body = body self.lineno = body[0].lineno - # TODO: some add_arcs methods here don't add arcs, they return them. Rename them. # TODO: the cause messages have too many commas. # TODO: Shouldn't the cause messages join with "and" instead of "or"? -class AstArcAnalyzer(object): +def ast_parse(text): + """How we create an AST parse.""" + return ast.parse(neuter_encoding_declaration(text)) + + +class AstArcAnalyzer: """Analyze source text with an AST to find executable code paths.""" @contract(text='unicode', statements=set) def __init__(self, text, statements, multiline): - self.root_node = ast.parse(neuter_encoding_declaration(text)) + self.root_node = ast_parse(text) # TODO: I think this is happening in too many places. self.statements = {multiline.get(l, l) for l in statements} self.multiline = multiline - if AST_DUMP: # pragma: debugging + # Turn on AST dumps with an environment variable. + # $set_env.py: COVERAGE_AST_DUMP - Dump the AST nodes when parsing code. + dump_ast = bool(int(os.environ.get("COVERAGE_AST_DUMP", 0))) + + if dump_ast: # pragma: debugging # Dump the AST so that failing tests have helpful output. - print("Statements: {}".format(self.statements)) - print("Multiline map: {}".format(self.multiline)) + print(f"Statements: {self.statements}") + print(f"Multiline map: {self.multiline}") ast_dump(self.root_node) self.arcs = set() @@ -564,7 +674,7 @@ def add_arc(self, start, end, smsg=None, emsg=None): """Add an arc, including message fragments to use if it is missing.""" if self.debug: # pragma: debugging - print("\nAdding arc: ({}, {}): {!r}, {!r}".format(start, end, smsg, emsg)) + print(f"\nAdding arc: ({start}, {end}): {smsg!r}, {emsg!r}") print(short_stack(limit=6)) self.arcs.add((start, end)) @@ -603,8 +713,7 @@ _line__ClassDef = _line_decorated def _line__Dict(self, node): - # Python 3.5 changed how dict literals are made. - if env.PYVERSION >= (3, 5) and node.keys: + if node.keys: if node.keys[0] is not None: return node.keys[0].lineno else: @@ -634,8 +743,8 @@ # The node types that just flow to the next node with no complications. OK_TO_DEFAULT = { - "Assign", "Assert", "AugAssign", "Delete", "Exec", "Expr", "Global", - "Import", "ImportFrom", "Nonlocal", "Pass", "Print", + "AnnAssign", "Assign", "Assert", "AugAssign", "Delete", "Expr", "Global", + "Import", "ImportFrom", "Nonlocal", "Pass", } @contract(returns='ArcStarts') @@ -661,11 +770,10 @@ return handler(node) else: # No handler: either it's something that's ok to default (a simple - # statement), or it's something we overlooked. Change this 0 to 1 - # to see if it's overlooked. - if 0: + # statement), or it's something we overlooked. + if env.TESTING: if node_name not in self.OK_TO_DEFAULT: - print("*** Unhandled: {}".format(node)) + raise Exception(f"*** Unhandled: {node}") # pragma: only failure # Default for simple statements: one exit from this node. return {ArcStart(self.line_for_node(node))} @@ -799,61 +907,30 @@ @contract(exits='ArcStarts') def process_break_exits(self, exits): """Add arcs due to jumps from `exits` being breaks.""" - for block in self.nearest_blocks(): - if isinstance(block, LoopBlock): - block.break_exits.update(exits) - break - elif isinstance(block, TryBlock) and block.final_start is not None: - block.break_from.update(exits) + for block in self.nearest_blocks(): # pragma: always breaks + if block.process_break_exits(exits, self.add_arc): break @contract(exits='ArcStarts') def process_continue_exits(self, exits): """Add arcs due to jumps from `exits` being continues.""" - for block in self.nearest_blocks(): - if isinstance(block, LoopBlock): - for xit in exits: - self.add_arc(xit.lineno, block.start, xit.cause) - break - elif isinstance(block, TryBlock) and block.final_start is not None: - block.continue_from.update(exits) + for block in self.nearest_blocks(): # pragma: always breaks + if block.process_continue_exits(exits, self.add_arc): break @contract(exits='ArcStarts') def process_raise_exits(self, exits): """Add arcs due to jumps from `exits` being raises.""" for block in self.nearest_blocks(): - if isinstance(block, TryBlock): - if block.handler_start is not None: - for xit in exits: - self.add_arc(xit.lineno, block.handler_start, xit.cause) - break - elif block.final_start is not None: - block.raise_from.update(exits) - break - elif isinstance(block, FunctionBlock): - for xit in exits: - self.add_arc( - xit.lineno, -block.start, xit.cause, - "didn't except from function {!r}".format(block.name), - ) + if block.process_raise_exits(exits, self.add_arc): break @contract(exits='ArcStarts') def process_return_exits(self, exits): """Add arcs due to jumps from `exits` being returns.""" - for block in self.nearest_blocks(): - if isinstance(block, TryBlock) and block.final_start is not None: - block.return_from.update(exits) + for block in self.nearest_blocks(): # pragma: always breaks + if block.process_return_exits(exits, self.add_arc): break - elif isinstance(block, FunctionBlock): - for xit in exits: - self.add_arc( - xit.lineno, -block.start, xit.cause, - "didn't return from function {!r}".format(block.name), - ) - break - # Handlers: _handle__* # @@ -862,6 +939,9 @@ # also call self.add_arc to record arcs they find. These functions mirror # the Python semantics of each syntactic construct. See the docstring # for add_arcs to understand the concept of exits from a node. + # + # Every node type that represents a statement should have a handler, or it + # should be listed in OK_TO_DEFAULT. @contract(returns='ArcStarts') def _handle__Break(self, node): @@ -943,6 +1023,24 @@ return exits @contract(returns='ArcStarts') + def _handle__Match(self, node): + start = self.line_for_node(node) + last_start = start + exits = set() + had_wildcard = False + for case in node.cases: + case_start = self.line_for_node(case.pattern) + if isinstance(case.pattern, ast.MatchAs): + had_wildcard = True + self.add_arc(last_start, case_start, "the pattern on line {lineno} always matched") + from_start = ArcStart(case_start, cause="the pattern on line {lineno} never matched") + exits |= self.add_body_arcs(case.body, from_start=from_start) + last_start = case_start + if not had_wildcard: + exits.add(from_start) + return exits + + @contract(returns='ArcStarts') def _handle__NodeList(self, node): start = self.line_for_node(node) exits = self.add_body_arcs(node.body, from_start=ArcStart(start)) @@ -976,6 +1074,9 @@ else: final_start = None + # This is true by virtue of Python syntax: have to have either except + # or finally, or both. + assert handler_start is not None or final_start is not None try_block = TryBlock(handler_start, final_start) self.block_stack.append(try_block) @@ -1090,36 +1191,11 @@ return exits @contract(returns='ArcStarts') - def _handle__TryExcept(self, node): - # Python 2.7 uses separate TryExcept and TryFinally nodes. If we get - # TryExcept, it means there was no finally, so fake it, and treat as - # a general Try node. - node.finalbody = [] - return self._handle__Try(node) - - @contract(returns='ArcStarts') - def _handle__TryFinally(self, node): - # Python 2.7 uses separate TryExcept and TryFinally nodes. If we get - # TryFinally, see if there's a TryExcept nested inside. If so, merge - # them. Otherwise, fake fields to complete a Try node. - node.handlers = [] - node.orelse = [] - - first = node.body[0] - if first.__class__.__name__ == "TryExcept" and node.lineno == first.lineno: - assert len(node.body) == 1 - node.body = first.body - node.handlers = first.handlers - node.orelse = first.orelse - - return self._handle__Try(node) - - @contract(returns='ArcStarts') def _handle__While(self, node): start = to_top = self.line_for_node(node.test) constant_test = self.is_constant_expr(node.test) top_is_body0 = False - if constant_test and (env.PY3 or constant_test == "Num"): + if constant_test: top_is_body0 = True if env.PYBEHAVIOR.keep_constant_test: top_is_body0 = False @@ -1146,11 +1222,37 @@ @contract(returns='ArcStarts') def _handle__With(self, node): start = self.line_for_node(node) + if env.PYBEHAVIOR.exit_through_with: + self.block_stack.append(WithBlock(start=start)) exits = self.add_body_arcs(node.body, from_start=ArcStart(start)) + if env.PYBEHAVIOR.exit_through_with: + with_block = self.block_stack.pop() + with_exit = {ArcStart(start)} + if exits: + for xit in exits: + self.add_arc(xit.lineno, start) + exits = with_exit + if with_block.break_from: + self.process_break_exits( + self._combine_finally_starts(with_block.break_from, with_exit) + ) + if with_block.continue_from: + self.process_continue_exits( + self._combine_finally_starts(with_block.continue_from, with_exit) + ) + if with_block.return_from: + self.process_return_exits( + self._combine_finally_starts(with_block.return_from, with_exit) + ) return exits _handle__AsyncWith = _handle__With + # Code object dispatchers: _code_object__* + # + # These methods are used by analyze() as the start of the analysis. + # There is one for each construct with a code object. + def _code_object__Module(self, node): start = self.line_for_node(node) if node.body: @@ -1178,86 +1280,85 @@ for xit in exits: self.add_arc( xit.lineno, -start, xit.cause, - "didn't exit the body of class {!r}".format(node.name), + f"didn't exit the body of class {node.name!r}", ) - def _make_oneline_code_method(noun): # pylint: disable=no-self-argument - """A function to make methods for online callable _code_object__ methods.""" - def _code_object__oneline_callable(self, node): + def _make_expression_code_method(noun): # pylint: disable=no-self-argument + """A function to make methods for expression-based callable _code_object__ methods.""" + def _code_object__expression_callable(self, node): start = self.line_for_node(node) - self.add_arc(-start, start, None, "didn't run the {} on line {}".format(noun, start)) - self.add_arc( - start, -start, None, - "didn't finish the {} on line {}".format(noun, start), - ) - return _code_object__oneline_callable + self.add_arc(-start, start, None, f"didn't run the {noun} on line {start}") + self.add_arc(start, -start, None, f"didn't finish the {noun} on line {start}") + return _code_object__expression_callable - _code_object__Lambda = _make_oneline_code_method("lambda") - _code_object__GeneratorExp = _make_oneline_code_method("generator expression") - _code_object__DictComp = _make_oneline_code_method("dictionary comprehension") - _code_object__SetComp = _make_oneline_code_method("set comprehension") - if env.PY3: - _code_object__ListComp = _make_oneline_code_method("list comprehension") + _code_object__Lambda = _make_expression_code_method("lambda") + _code_object__GeneratorExp = _make_expression_code_method("generator expression") + _code_object__DictComp = _make_expression_code_method("dictionary comprehension") + _code_object__SetComp = _make_expression_code_method("set comprehension") + _code_object__ListComp = _make_expression_code_method("list comprehension") -if AST_DUMP: # pragma: debugging - # Code only used when dumping the AST for debugging. +# Code only used when dumping the AST for debugging. - SKIP_DUMP_FIELDS = ["ctx"] +SKIP_DUMP_FIELDS = ["ctx"] - def _is_simple_value(value): - """Is `value` simple enough to be displayed on a single line?""" - return ( - value in [None, [], (), {}, set()] or - isinstance(value, (string_class, int, float)) - ) +def _is_simple_value(value): + """Is `value` simple enough to be displayed on a single line?""" + return ( + value in [None, [], (), {}, set()] or + isinstance(value, (bytes, int, float, str)) + ) - def ast_dump(node, depth=0): - """Dump the AST for `node`. +def ast_dump(node, depth=0, print=print): # pylint: disable=redefined-builtin + """Dump the AST for `node`. - This recursively walks the AST, printing a readable version. + This recursively walks the AST, printing a readable version. - """ - indent = " " * depth - if not isinstance(node, ast.AST): - print("{}<{} {!r}>".format(indent, node.__class__.__name__, node)) - return - - lineno = getattr(node, "lineno", None) - if lineno is not None: - linemark = " @ {}".format(node.lineno) - else: - linemark = "" - head = "{}<{}{}".format(indent, node.__class__.__name__, linemark) + """ + indent = " " * depth + lineno = getattr(node, "lineno", None) + if lineno is not None: + linemark = f" @ {node.lineno},{node.col_offset}" + if hasattr(node, "end_lineno"): + linemark += ":" + if node.end_lineno != node.lineno: + linemark += f"{node.end_lineno}," + linemark += f"{node.end_col_offset}" + else: + linemark = "" + head = f"{indent}<{node.__class__.__name__}{linemark}" - named_fields = [ - (name, value) - for name, value in ast.iter_fields(node) - if name not in SKIP_DUMP_FIELDS - ] - if not named_fields: - print("{}>".format(head)) - elif len(named_fields) == 1 and _is_simple_value(named_fields[0][1]): - field_name, value = named_fields[0] - print("{} {}: {!r}>".format(head, field_name, value)) - else: - print(head) - if 0: - print("{}# mro: {}".format( - indent, ", ".join(c.__name__ for c in node.__class__.__mro__[1:]), - )) - next_indent = indent + " " - for field_name, value in named_fields: - prefix = "{}{}:".format(next_indent, field_name) - if _is_simple_value(value): - print("{} {!r}".format(prefix, value)) - elif isinstance(value, list): - print("{} [".format(prefix)) - for n in value: - ast_dump(n, depth + 8) - print("{}]".format(next_indent)) - else: - print(prefix) - ast_dump(value, depth + 8) + named_fields = [ + (name, value) + for name, value in ast.iter_fields(node) + if name not in SKIP_DUMP_FIELDS + ] + if not named_fields: + print(f"{head}>") + elif len(named_fields) == 1 and _is_simple_value(named_fields[0][1]): + field_name, value = named_fields[0] + print(f"{head} {field_name}: {value!r}>") + else: + print(head) + if 0: + print("{}# mro: {}".format( + indent, ", ".join(c.__name__ for c in node.__class__.__mro__[1:]), + )) + next_indent = indent + " " + for field_name, value in named_fields: + prefix = f"{next_indent}{field_name}:" + if _is_simple_value(value): + print(f"{prefix} {value!r}") + elif isinstance(value, list): + print(f"{prefix} [") + for n in value: + if _is_simple_value(n): + print(f"{next_indent} {n!r}") + else: + ast_dump(n, depth + 8, print=print) + print(f"{next_indent}]") + else: + print(prefix) + ast_dump(value, depth + 8, print=print) - print("{}>".format(indent)) + print(f"{indent}>")