DebugClients/Python3/coverage/parser.py

branch
Py2 comp.
changeset 3495
fac17a82b431
parent 29
391dc0bc4ae5
child 4489
d0d6e4ad31bd
diff -r f1cbc18f88b2 -r fac17a82b431 DebugClients/Python3/coverage/parser.py
--- a/DebugClients/Python3/coverage/parser.py	Fri Apr 04 22:57:07 2014 +0200
+++ b/DebugClients/Python3/coverage/parser.py	Thu Apr 10 23:02:20 2014 +0200
@@ -1,10 +1,14 @@
 """Code parsing for Coverage."""
 
-import glob, opcode, os, re, sys, token, tokenize
+import dis, re, sys, token, tokenize
 
-from .backward import set, sorted, StringIO # pylint: disable-msg=W0622
+from .backward import set, sorted, StringIO # pylint: disable=W0622
+from .backward import open_source, range    # pylint: disable=W0622
+from .backward import reversed              # pylint: disable=W0622
+from .backward import bytes_to_ints
 from .bytecode import ByteCodes, CodeObjects
-from .misc import nice_pair, CoverageException, NoSource, expensive
+from .misc import nice_pair, expensive, join_regex
+from .misc import CoverageException, NoSource, NotPython
 
 
 class CodeParser(object):
@@ -13,7 +17,7 @@
     def __init__(self, text=None, filename=None, exclude=None):
         """
         Source can be provided as `text`, the text itself, or `filename`, from
-        which text will be read.  Excluded lines are those that match
+        which the text will be read.  Excluded lines are those that match
         `exclude`, a regex.
 
         """
@@ -22,15 +26,20 @@
         self.text = text
         if not self.text:
             try:
-                sourcef = open(self.filename, 'rU')
-                self.text = sourcef.read()
-                sourcef.close()
+                sourcef = open_source(self.filename)
+                try:
+                    self.text = sourcef.read()
+                finally:
+                    sourcef.close()
             except IOError:
                 _, err, _ = sys.exc_info()
                 raise NoSource(
-                    "No source for code: %r: %s" % (self.filename, err)
+                    "No source for code: '%s': %s" % (self.filename, err)
                     )
-        self.text = self.text.replace('\r\n', '\n')
+
+        # Scrap the BOM if it exists.
+        if self.text and ord(self.text[0]) == 0xfeff:
+            self.text = self.text[1:]
 
         self.exclude = exclude
 
@@ -65,6 +74,21 @@
         return self._byte_parser
     byte_parser = property(_get_byte_parser)
 
+    def lines_matching(self, *regexes):
+        """Find the lines matching one of a list of regexes.
+
+        Returns a set of line numbers, the lines that contain a match for one
+        of the regexes in `regexes`.  The entire line needn't match, just a
+        part of it.
+
+        """
+        regex_c = re.compile(join_regex(regexes))
+        matches = set()
+        for i, ltext in enumerate(self.lines):
+            if regex_c.search(ltext):
+                matches.add(i+1)
+        return matches
+
     def _raw_parse(self):
         """Parse the source to find the interesting facts about its lines.
 
@@ -73,10 +97,7 @@
         """
         # Find lines which match an exclusion pattern.
         if self.exclude:
-            re_exclude = re.compile(self.exclude)
-            for i, ltext in enumerate(self.lines):
-                if re_exclude.search(ltext):
-                    self.excluded.add(i+1)
+            self.excluded = self.lines_matching(self.exclude)
 
         # Tokenize, to find excluded suites, to find docstrings, and to find
         # multi-line statements.
@@ -85,10 +106,11 @@
         excluding = False
         prev_toktype = token.INDENT
         first_line = None
+        empty = True
 
-        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
+        tokgen = generate_tokens(self.text)
         for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
-            if self.show_tokens:                # pragma: no cover
+            if self.show_tokens:                # pragma: not covered
                 print("%10s %5s %-20r %r" % (
                     tokenize.tok_name.get(toktype, toktype),
                     nice_pair((slineno, elineno)), ttext, ltext
@@ -114,8 +136,7 @@
                 # (a trick from trace.py in the stdlib.) This works for
                 # 99.9999% of cases.  For the rest (!) see:
                 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
-                for i in range(slineno, elineno+1):
-                    self.docstrings.add(i)
+                self.docstrings.update(range(slineno, elineno+1))
             elif toktype == token.NEWLINE:
                 if first_line is not None and elineno != first_line:
                     # We're at the end of a line, and we've ended on a
@@ -128,6 +149,7 @@
 
             if ttext.strip() and toktype != tokenize.COMMENT:
                 # A non-whitespace token.
+                empty = False
                 if first_line is None:
                     # The token is not whitespace, and is the first in a
                     # statement.
@@ -141,7 +163,8 @@
             prev_toktype = toktype
 
         # Find the starts of the executable statements.
-        self.statement_starts.update(self.byte_parser._find_statements())
+        if not empty:
+            self.statement_starts.update(self.byte_parser._find_statements())
 
     def first_line(self, line):
         """Return the first line number of the statement including `line`."""
@@ -152,16 +175,18 @@
             first_line = line
         return first_line
 
-    def first_lines(self, lines, ignore=None):
+    def first_lines(self, lines, *ignores):
         """Map the line numbers in `lines` to the correct first line of the
         statement.
 
-        Skip any line mentioned in `ignore`.
+        Skip any line mentioned in any of the sequences in `ignores`.
 
-        Returns a sorted list of the first lines.
+        Returns a set of the first lines.
 
         """
-        ignore = ignore or []
+        ignore = set()
+        for ign in ignores:
+            ignore.update(ign)
         lset = set()
         for l in lines:
             if l in ignore:
@@ -169,23 +194,34 @@
             new_l = self.first_line(l)
             if new_l not in ignore:
                 lset.add(new_l)
-        return sorted(lset)
+        return lset
 
     def parse_source(self):
         """Parse source text to find executable lines, excluded lines, etc.
 
-        Return values are 1) a sorted list of executable line numbers, and
-        2) a sorted list of excluded line numbers.
+        Return values are 1) a set of executable line numbers, and 2) a set of
+        excluded line numbers.
 
         Reported line numbers are normalized to the first line of multi-line
         statements.
 
         """
-        self._raw_parse()
+        try:
+            self._raw_parse()
+        except (tokenize.TokenError, IndentationError):
+            _, tokerr, _ = sys.exc_info()
+            msg, lineno = tokerr.args
+            raise NotPython(
+                "Couldn't parse '%s' as Python source: '%s' at %s" %
+                    (self.filename, msg, lineno)
+                )
 
         excluded_lines = self.first_lines(self.excluded)
-        ignore = excluded_lines + list(self.docstrings)
-        lines = self.first_lines(self.statement_starts, ignore)
+        lines = self.first_lines(
+            self.statement_starts,
+            excluded_lines,
+            self.docstrings
+        )
 
         return lines, excluded_lines
 
@@ -214,7 +250,7 @@
         excluded_lines = self.first_lines(self.excluded)
         exit_counts = {}
         for l1, l2 in self.arcs():
-            if l1 == -1:
+            if l1 < 0:
                 # Don't ever report -1 as a line number
                 continue
             if l1 in excluded_lines:
@@ -240,12 +276,18 @@
 ## Opcodes that guide the ByteParser.
 
 def _opcode(name):
-    """Return the opcode by name from the opcode module."""
-    return opcode.opmap[name]
+    """Return the opcode by name from the dis module."""
+    return dis.opmap[name]
 
 def _opcode_set(*names):
     """Return a set of opcodes by the names in `names`."""
-    return set([_opcode(name) for name in names])
+    s = set()
+    for name in names:
+        try:
+            s.add(_opcode(name))
+        except KeyError:
+            pass
+    return s
 
 # Opcodes that leave the code object.
 OPS_CODE_END = _opcode_set('RETURN_VALUE')
@@ -256,8 +298,15 @@
     'BREAK_LOOP', 'CONTINUE_LOOP',
     )
 
+# Opcodes that unconditionally begin a new code chunk.  By starting new chunks
+# with unconditional jump instructions, we neatly deal with jumps to jumps
+# properly.
+OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
+
 # Opcodes that push a block on the block stack.
-OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY')
+OPS_PUSH_BLOCK = _opcode_set(
+    'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
+    )
 
 # Block types for exception handling.
 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
@@ -266,7 +315,7 @@
 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
 
 # Opcodes that have a jump destination, but aren't really a jump.
-OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
+OPS_NO_JUMP = OPS_PUSH_BLOCK
 
 # Individual opcodes we need below.
 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
@@ -283,12 +332,16 @@
     def __init__(self, code=None, text=None, filename=None):
         if code:
             self.code = code
+            self.text = text
         else:
             if not text:
                 assert filename, "If no code or text, need a filename"
-                sourcef = open(filename, 'rU')
-                text = sourcef.read()
-                sourcef.close()
+                sourcef = open_source(filename)
+                try:
+                    text = sourcef.read()
+                finally:
+                    sourcef.close()
+            self.text = text
 
             try:
                 # Python 2.3 and 2.4 don't like partial last lines, so be sure
@@ -296,80 +349,75 @@
                 self.code = compile(text + '\n', filename, "exec")
             except SyntaxError:
                 _, synerr, _ = sys.exc_info()
-                raise CoverageException(
+                raise NotPython(
                     "Couldn't parse '%s' as Python source: '%s' at line %d" %
                         (filename, synerr.msg, synerr.lineno)
                     )
 
+        # Alternative Python implementations don't always provide all the
+        # attributes on code objects that we need to do the analysis.
+        for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
+            if not hasattr(self.code, attr):
+                raise CoverageException(
+                    "This implementation of Python doesn't support code "
+                    "analysis.\n"
+                    "Run coverage.py under CPython for this command."
+                    )
+
     def child_parsers(self):
         """Iterate over all the code objects nested within this one.
 
         The iteration includes `self` as its first value.
 
         """
-        return map(lambda c: ByteParser(code=c), CodeObjects(self.code))
-
-    # Getting numbers from the lnotab value changed in Py3.0.
-    if sys.hexversion >= 0x03000000:
-        def _lnotab_increments(self, lnotab):
-            """Return a list of ints from the lnotab bytes in 3.x"""
-            return list(lnotab)
-    else:
-        def _lnotab_increments(self, lnotab):
-            """Return a list of ints from the lnotab string in 2.x"""
-            return [ord(c) for c in lnotab]
+        children = CodeObjects(self.code)
+        return [ByteParser(code=c, text=self.text) for c in children]
 
     def _bytes_lines(self):
         """Map byte offsets to line numbers in `code`.
 
         Uses co_lnotab described in Python/compile.c to map byte offsets to
-        line numbers.  Returns a list: [(b0, l0), (b1, l1), ...]
+        line numbers.  Produces a sequence: (b0, l0), (b1, l1), ...
+
+        Only byte offsets that correspond to line numbers are included in the
+        results.
 
         """
         # Adapted from dis.py in the standard library.
-        byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2])
-        line_increments = self._lnotab_increments(self.code.co_lnotab[1::2])
+        byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
+        line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
 
-        bytes_lines = []
         last_line_num = None
         line_num = self.code.co_firstlineno
         byte_num = 0
         for byte_incr, line_incr in zip(byte_increments, line_increments):
             if byte_incr:
                 if line_num != last_line_num:
-                    bytes_lines.append((byte_num, line_num))
+                    yield (byte_num, line_num)
                     last_line_num = line_num
                 byte_num += byte_incr
             line_num += line_incr
         if line_num != last_line_num:
-            bytes_lines.append((byte_num, line_num))
-        return bytes_lines
+            yield (byte_num, line_num)
 
     def _find_statements(self):
         """Find the statements in `self.code`.
 
-        Return a set of line numbers that start statements.  Recurses into all
-        code objects reachable from `self.code`.
+        Produce a sequence of line numbers that start statements.  Recurses
+        into all code objects reachable from `self.code`.
 
         """
-        stmts = set()
         for bp in self.child_parsers():
             # Get all of the lineno information from this code.
             for _, l in bp._bytes_lines():
-                stmts.add(l)
-        return stmts
-
-    def _disassemble(self):     # pragma: no cover
-        """Disassemble code, for ad-hoc experimenting."""
+                yield l
 
-        import dis
-
-        for bp in self.child_parsers():
-            print("\n%s: " % bp.code)
-            dis.dis(bp.code)
-            print("Bytes lines: %r" % bp._bytes_lines())
-
-        print("")
+    def _block_stack_repr(self, block_stack):
+        """Get a string version of `block_stack`, for debugging."""
+        blocks = ", ".join(
+            ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
+        )
+        return "[" + blocks + "]"
 
     def _split_into_chunks(self):
         """Split the code object into a list of `Chunk` objects.
@@ -380,10 +428,11 @@
         Returns a list of `Chunk` objects.
 
         """
-
         # The list of chunks so far, and the one we're working on.
         chunks = []
         chunk = None
+
+        # A dict mapping byte offsets of line starts to the line numbers.
         bytes_lines_map = dict(self._bytes_lines())
 
         # The block stack: loops and try blocks get pushed here for the
@@ -398,16 +447,38 @@
         # We have to handle the last two bytecodes specially.
         ult = penult = None
 
-        for bc in ByteCodes(self.code.co_code):
-            # Maybe have to start a new block
+        # Get a set of all of the jump-to points.
+        jump_to = set()
+        bytecodes = list(ByteCodes(self.code.co_code))
+        for bc in bytecodes:
+            if bc.jump_to >= 0:
+                jump_to.add(bc.jump_to)
+
+        chunk_lineno = 0
+
+        # Walk the byte codes building chunks.
+        for bc in bytecodes:
+            # Maybe have to start a new chunk
+            start_new_chunk = False
+            first_chunk = False
             if bc.offset in bytes_lines_map:
+                # Start a new chunk for each source line number.
+                start_new_chunk = True
+                chunk_lineno = bytes_lines_map[bc.offset]
+                first_chunk = True
+            elif bc.offset in jump_to:
+                # To make chunks have a single entrance, we have to make a new
+                # chunk when we get to a place some bytecode jumps to.
+                start_new_chunk = True
+            elif bc.op in OPS_CHUNK_BEGIN:
+                # Jumps deserve their own unnumbered chunk.  This fixes
+                # problems with jumps to jumps getting confused.
+                start_new_chunk = True
+
+            if not chunk or start_new_chunk:
                 if chunk:
                     chunk.exits.add(bc.offset)
-                chunk = Chunk(bc.offset, bytes_lines_map[bc.offset])
-                chunks.append(chunk)
-
-            if not chunk:
-                chunk = Chunk(bc.offset)
+                chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
                 chunks.append(chunk)
 
             # Look at the opcode
@@ -421,7 +492,7 @@
 
             if bc.op in OPS_CODE_END:
                 # The opcode can exit the code object.
-                chunk.exits.add(-1)
+                chunk.exits.add(-self.code.co_firstlineno)
             if bc.op in OPS_PUSH_BLOCK:
                 # The opcode adds a block to the block_stack.
                 block_stack.append((bc.op, bc.jump_to))
@@ -436,15 +507,11 @@
                     chunk.exits.add(block_stack[-1][1])
                 chunk = None
             if bc.op == OP_END_FINALLY:
-                if block_stack:
-                    # A break that goes through a finally will jump to whatever
-                    # block is on top of the stack.
-                    chunk.exits.add(block_stack[-1][1])
                 # For the finally clause we need to find the closest exception
                 # block, and use its jump target as an exit.
-                for iblock in range(len(block_stack)-1, -1, -1):
-                    if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS:
-                        chunk.exits.add(block_stack[iblock][1])
+                for block in reversed(block_stack):
+                    if block[0] in OPS_EXCEPT_BLOCKS:
+                        chunk.exits.add(block[1])
                         break
             if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
                 # This is an except clause.  We want to overlook the next
@@ -454,7 +521,6 @@
             penult = ult
             ult = bc
 
-
         if chunks:
             # The last two bytecodes could be a dummy "return None" that
             # shouldn't be counted as real code. Every Python code object seems
@@ -466,27 +532,38 @@
                         # This is "return None", but is it dummy?  A real line
                         # would be a last chunk all by itself.
                         if chunks[-1].byte != penult.offset:
+                            ex = -self.code.co_firstlineno
                             # Split the last chunk
                             last_chunk = chunks[-1]
-                            last_chunk.exits.remove(-1)
+                            last_chunk.exits.remove(ex)
                             last_chunk.exits.add(penult.offset)
-                            chunk = Chunk(penult.offset)
-                            chunk.exits.add(-1)
+                            chunk = Chunk(
+                                penult.offset, last_chunk.line, False
+                            )
+                            chunk.exits.add(ex)
                             chunks.append(chunk)
 
             # Give all the chunks a length.
-            chunks[-1].length = bc.next_offset - chunks[-1].byte
+            chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
             for i in range(len(chunks)-1):
                 chunks[i].length = chunks[i+1].byte - chunks[i].byte
 
+        #self.validate_chunks(chunks)
         return chunks
 
+    def validate_chunks(self, chunks):
+        """Validate the rule that chunks have a single entrance."""
+        # starts is the entrances to the chunks
+        starts = set([ch.byte for ch in chunks])
+        for ch in chunks:
+            assert all([(ex in starts or ex < 0) for ex in ch.exits])
+
     def _arcs(self):
         """Find the executable arcs in the code.
 
-        Returns a set of pairs, (from,to).  From and to are integer line
-        numbers.  If from is -1, then the arc is an entrance into the code
-        object.  If to is -1, the arc is an exit from the code object.
+        Yields pairs: (from,to).  From and to are integer line numbers.  If
+        from is < 0, then the arc is an entrance into the code object.  If to
+        is < 0, the arc is an exit from the code object.
 
         """
         chunks = self._split_into_chunks()
@@ -494,61 +571,43 @@
         # A map from byte offsets to chunks jumped into.
         byte_chunks = dict([(c.byte, c) for c in chunks])
 
-        # Build a map from byte offsets to actual lines reached.
-        byte_lines = {-1:[-1]}
-        bytes_to_add = set([c.byte for c in chunks])
+        # There's always an entrance at the first chunk.
+        yield (-1, byte_chunks[0].line)
 
-        while bytes_to_add:
-            byte_to_add = bytes_to_add.pop()
-            if byte_to_add in byte_lines or byte_to_add == -1:
+        # Traverse from the first chunk in each line, and yield arcs where
+        # the trace function will be invoked.
+        for chunk in chunks:
+            if not chunk.first:
                 continue
 
-            # Which lines does this chunk lead to?
-            bytes_considered = set()
-            bytes_to_consider = [byte_to_add]
-            lines = set()
-
-            while bytes_to_consider:
-                byte = bytes_to_consider.pop()
-                bytes_considered.add(byte)
-
-                # Find chunk for byte
-                try:
-                    ch = byte_chunks[byte]
-                except KeyError:
-                    for ch in chunks:
-                        if ch.byte <= byte < ch.byte+ch.length:
-                            break
-                    else:
-                        # No chunk for this byte!
-                        raise Exception("Couldn't find chunk @ %d" % byte)
-                    byte_chunks[byte] = ch
+            chunks_considered = set()
+            chunks_to_consider = [chunk]
+            while chunks_to_consider:
+                # Get the chunk we're considering, and make sure we don't
+                # consider it again
+                this_chunk = chunks_to_consider.pop()
+                chunks_considered.add(this_chunk)
 
-                if ch.line:
-                    lines.add(ch.line)
-                else:
-                    for ex in ch.exits:
-                        if ex == -1:
-                            lines.add(-1)
-                        elif ex not in bytes_considered:
-                            bytes_to_consider.append(ex)
-
-                bytes_to_add.update(ch.exits)
-
-            byte_lines[byte_to_add] = lines
+                # For each exit, add the line number if the trace function
+                # would be triggered, or add the chunk to those being
+                # considered if not.
+                for ex in this_chunk.exits:
+                    if ex < 0:
+                        yield (chunk.line, ex)
+                    else:
+                        next_chunk = byte_chunks[ex]
+                        if next_chunk in chunks_considered:
+                            continue
 
-        # Figure out for each chunk where the exits go.
-        arcs = set()
-        for chunk in chunks:
-            if chunk.line:
-                for ex in chunk.exits:
-                    for exit_line in byte_lines[ex]:
-                        if chunk.line != exit_line:
-                            arcs.add((chunk.line, exit_line))
-        for line in byte_lines[0]:
-            arcs.add((-1, line))
-
-        return arcs
+                        # The trace function is invoked if visiting the first
+                        # bytecode in a line, or if the transition is a
+                        # backward jump.
+                        backward_jump = next_chunk.byte < this_chunk.byte
+                        if next_chunk.first or backward_jump:
+                            if next_chunk.line != chunk.line:
+                                yield (chunk.line, next_chunk.line)
+                        else:
+                            chunks_to_consider.append(next_chunk)
 
     def _all_chunks(self):
         """Returns a list of `Chunk` objects for this code and its children.
@@ -576,11 +635,11 @@
 
 
 class Chunk(object):
-    """A sequence of bytecodes with a single entrance.
+    """A sequence of byte codes with a single entrance.
 
     To analyze byte code, we have to divide it into chunks, sequences of byte
-    codes such that each basic block has only one entrance, the first
-    instruction in the block.
+    codes such that each chunk has only one entrance, the first instruction in
+    the block.
 
     This is almost the CS concept of `basic block`_, except that we're willing
     to have many exits from a chunk, and "basic block" is a more cumbersome
@@ -588,157 +647,54 @@
 
     .. _basic block: http://en.wikipedia.org/wiki/Basic_block
 
-    An exit of -1 means the chunk can leave the code (return).
+    `line` is the source line number containing this chunk.
+
+    `first` is true if this is the first chunk in the source line.
+
+    An exit < 0 means the chunk can leave the code (return).  The exit is
+    the negative of the starting line number of the code block.
 
     """
-    def __init__(self, byte, line=0):
+    def __init__(self, byte, line, first):
         self.byte = byte
         self.line = line
+        self.first = first
         self.length = 0
         self.exits = set()
 
     def __repr__(self):
-        return "<%d+%d @%d %r>" % (
-            self.byte, self.length, self.line, list(self.exits)
+        if self.first:
+            bang = "!"
+        else:
+            bang = ""
+        return "<%d+%d @%d%s %r>" % (
+            self.byte, self.length, self.line, bang, list(self.exits)
             )
 
 
-class AdHocMain(object):        # pragma: no cover
-    """An ad-hoc main for code parsing experiments."""
-
-    def main(self, args):
-        """A main function for trying the code from the command line."""
-
-        from optparse import OptionParser
-
-        parser = OptionParser()
-        parser.add_option(
-            "-c", action="store_true", dest="chunks",
-            help="Show basic block chunks"
-            )
-        parser.add_option(
-            "-d", action="store_true", dest="dis",
-            help="Disassemble"
-            )
-        parser.add_option(
-            "-R", action="store_true", dest="recursive",
-            help="Recurse to find source files"
-            )
-        parser.add_option(
-            "-s", action="store_true", dest="source",
-            help="Show analyzed source"
-            )
-        parser.add_option(
-            "-t", action="store_true", dest="tokens",
-            help="Show tokens"
-            )
+class CachedTokenizer(object):
+    """A one-element cache around tokenize.generate_tokens.
 
-        options, args = parser.parse_args()
-        if options.recursive:
-            if args:
-                root = args[0]
-            else:
-                root = "."
-            for root, _, _ in os.walk(root):
-                for f in glob.glob(root + "/*.py"):
-                    self.adhoc_one_file(options, f)
-        else:
-            self.adhoc_one_file(options, args[0])
-
-    def adhoc_one_file(self, options, filename):
-        """Process just one file."""
+    When reporting, coverage.py tokenizes files twice, once to find the
+    structure of the file, and once to syntax-color it.  Tokenizing is
+    expensive, and easily cached.
 
-        if options.dis or options.chunks:
-            try:
-                bp = ByteParser(filename=filename)
-            except CoverageException:
-                _, err, _ = sys.exc_info()
-                print("%s" % (err,))
-                return
-
-        if options.dis:
-            print("Main code:")
-            bp._disassemble()
-
-        if options.chunks:
-            chunks = bp._all_chunks()
-            if options.recursive:
-                print("%6d: %s" % (len(chunks), filename))
-            else:
-                print("Chunks: %r" % chunks)
-                arcs = bp._all_arcs()
-                print("Arcs: %r" % sorted(arcs))
+    This is a one-element cache so that our twice-in-a-row tokenizing doesn't
+    actually tokenize twice.
 
-        if options.source or options.tokens:
-            cp = CodeParser(filename=filename, exclude=r"no\s*cover")
-            cp.show_tokens = options.tokens
-            cp._raw_parse()
-
-            if options.source:
-                if options.chunks:
-                    arc_width, arc_chars = self.arc_ascii_art(arcs)
-                else:
-                    arc_width, arc_chars = 0, {}
-
-                exit_counts = cp.exit_counts()
-
-                for i, ltext in enumerate(cp.lines):
-                    lineno = i+1
-                    m0 = m1 = m2 = m3 = a = ' '
-                    if lineno in cp.statement_starts:
-                        m0 = '-'
-                    exits = exit_counts.get(lineno, 0)
-                    if exits > 1:
-                        m1 = str(exits)
-                    if lineno in cp.docstrings:
-                        m2 = '"'
-                    if lineno in cp.classdefs:
-                        m2 = 'C'
-                    if lineno in cp.excluded:
-                        m3 = 'x'
-                    a = arc_chars.get(lineno, '').ljust(arc_width)
-                    print("%4d %s%s%s%s%s %s" %
-                                (lineno, m0, m1, m2, m3, a, ltext)
-                        )
-
-    def arc_ascii_art(self, arcs):
-        """Draw arcs as ascii art.
+    """
+    def __init__(self):
+        self.last_text = None
+        self.last_tokens = None
 
-        Returns a width of characters needed to draw all the arcs, and a
-        dictionary mapping line numbers to ascii strings to draw for that line.
+    def generate_tokens(self, text):
+        """A stand-in for `tokenize.generate_tokens`."""
+        if text != self.last_text:
+            self.last_text = text
+            self.last_tokens = list(
+                tokenize.generate_tokens(StringIO(text).readline)
+            )
+        return self.last_tokens
 
-        """
-        arc_chars = {}
-        for lfrom, lto in sorted(arcs):
-            if lfrom == -1:
-                arc_chars[lto] = arc_chars.get(lto, '') + 'v'
-            elif lto == -1:
-                arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^'
-            else:
-                if lfrom == lto-1:
-                    # Don't show obvious arcs.
-                    continue
-                if lfrom < lto:
-                    l1, l2 = lfrom, lto
-                else:
-                    l1, l2 = lto, lfrom
-                w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)])
-                for l in range(l1, l2+1):
-                    if l == lfrom:
-                        ch = '<'
-                    elif l == lto:
-                        ch = '>'
-                    else:
-                        ch = '|'
-                    arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch
-                arc_width = 0
-
-        if arc_chars:
-            arc_width = max([len(a) for a in arc_chars.values()])
-        else:
-            arc_width = 0
-
-        return arc_width, arc_chars
-
-if __name__ == '__main__':
-    AdHocMain().main(sys.argv[1:])
\ No newline at end of file
+# Create our generate_tokens cache as a callable replacement function.
+generate_tokens = CachedTokenizer().generate_tokens

eric ide

mercurial