diff -r 71f15675e89f -r 7f51ab29a1a2 DebugClients/Python/coverage/parser.py --- a/DebugClients/Python/coverage/parser.py Fri Apr 11 18:37:22 2014 +0200 +++ b/DebugClients/Python/coverage/parser.py Thu Apr 10 23:02:20 2014 +0200 @@ -1,10 +1,14 @@ """Code parsing for Coverage.""" -import glob, opcode, os, re, sys, token, tokenize +import dis, re, sys, token, tokenize -from .backward import set, sorted, StringIO # pylint: disable-msg=W0622 +from .backward import set, sorted, StringIO # pylint: disable=W0622 +from .backward import open_source, range # pylint: disable=W0622 +from .backward import reversed # pylint: disable=W0622 +from .backward import bytes_to_ints from .bytecode import ByteCodes, CodeObjects -from .misc import nice_pair, CoverageException, NoSource, expensive +from .misc import nice_pair, expensive, join_regex +from .misc import CoverageException, NoSource, NotPython class CodeParser(object): @@ -13,7 +17,7 @@ def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from - which text will be read. Excluded lines are those that match + which the text will be read. Excluded lines are those that match `exclude`, a regex. """ @@ -22,15 +26,20 @@ self.text = text if not self.text: try: - sourcef = open(self.filename, 'rU') - self.text = sourcef.read() - sourcef.close() + sourcef = open_source(self.filename) + try: + self.text = sourcef.read() + finally: + sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource( - "No source for code: %r: %s" % (self.filename, err) + "No source for code: '%s': %s" % (self.filename, err) ) - self.text = self.text.replace('\r\n', '\n') + + # Scrap the BOM if it exists. + if self.text and ord(self.text[0]) == 0xfeff: + self.text = self.text[1:] self.exclude = exclude @@ -65,6 +74,21 @@ return self._byte_parser byte_parser = property(_get_byte_parser) + def lines_matching(self, *regexes): + """Find the lines matching one of a list of regexes. + + Returns a set of line numbers, the lines that contain a match for one + of the regexes in `regexes`. The entire line needn't match, just a + part of it. + + """ + regex_c = re.compile(join_regex(regexes)) + matches = set() + for i, ltext in enumerate(self.lines): + if regex_c.search(ltext): + matches.add(i+1) + return matches + def _raw_parse(self): """Parse the source to find the interesting facts about its lines. @@ -73,10 +97,7 @@ """ # Find lines which match an exclusion pattern. if self.exclude: - re_exclude = re.compile(self.exclude) - for i, ltext in enumerate(self.lines): - if re_exclude.search(ltext): - self.excluded.add(i+1) + self.excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. @@ -85,10 +106,11 @@ excluding = False prev_toktype = token.INDENT first_line = None + empty = True - tokgen = tokenize.generate_tokens(StringIO(self.text).readline) + tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: - if self.show_tokens: # pragma: no cover + if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext @@ -114,8 +136,7 @@ # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 - for i in range(slineno, elineno+1): - self.docstrings.add(i) + self.docstrings.update(range(slineno, elineno+1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a @@ -128,6 +149,7 @@ if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. + empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. @@ -141,7 +163,8 @@ prev_toktype = toktype # Find the starts of the executable statements. - self.statement_starts.update(self.byte_parser._find_statements()) + if not empty: + self.statement_starts.update(self.byte_parser._find_statements()) def first_line(self, line): """Return the first line number of the statement including `line`.""" @@ -152,16 +175,18 @@ first_line = line return first_line - def first_lines(self, lines, ignore=None): + def first_lines(self, lines, *ignores): """Map the line numbers in `lines` to the correct first line of the statement. - Skip any line mentioned in `ignore`. + Skip any line mentioned in any of the sequences in `ignores`. - Returns a sorted list of the first lines. + Returns a set of the first lines. """ - ignore = ignore or [] + ignore = set() + for ign in ignores: + ignore.update(ign) lset = set() for l in lines: if l in ignore: @@ -169,23 +194,34 @@ new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) - return sorted(lset) + return lset def parse_source(self): """Parse source text to find executable lines, excluded lines, etc. - Return values are 1) a sorted list of executable line numbers, and - 2) a sorted list of excluded line numbers. + Return values are 1) a set of executable line numbers, and 2) a set of + excluded line numbers. Reported line numbers are normalized to the first line of multi-line statements. """ - self._raw_parse() + try: + self._raw_parse() + except (tokenize.TokenError, IndentationError): + _, tokerr, _ = sys.exc_info() + msg, lineno = tokerr.args + raise NotPython( + "Couldn't parse '%s' as Python source: '%s' at %s" % + (self.filename, msg, lineno) + ) excluded_lines = self.first_lines(self.excluded) - ignore = excluded_lines + list(self.docstrings) - lines = self.first_lines(self.statement_starts, ignore) + lines = self.first_lines( + self.statement_starts, + excluded_lines, + self.docstrings + ) return lines, excluded_lines @@ -214,7 +250,7 @@ excluded_lines = self.first_lines(self.excluded) exit_counts = {} for l1, l2 in self.arcs(): - if l1 == -1: + if l1 < 0: # Don't ever report -1 as a line number continue if l1 in excluded_lines: @@ -240,12 +276,18 @@ ## Opcodes that guide the ByteParser. def _opcode(name): - """Return the opcode by name from the opcode module.""" - return opcode.opmap[name] + """Return the opcode by name from the dis module.""" + return dis.opmap[name] def _opcode_set(*names): """Return a set of opcodes by the names in `names`.""" - return set([_opcode(name) for name in names]) + s = set() + for name in names: + try: + s.add(_opcode(name)) + except KeyError: + pass + return s # Opcodes that leave the code object. OPS_CODE_END = _opcode_set('RETURN_VALUE') @@ -256,8 +298,15 @@ 'BREAK_LOOP', 'CONTINUE_LOOP', ) +# Opcodes that unconditionally begin a new code chunk. By starting new chunks +# with unconditional jump instructions, we neatly deal with jumps to jumps +# properly. +OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') + # Opcodes that push a block on the block stack. -OPS_PUSH_BLOCK = _opcode_set('SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY') +OPS_PUSH_BLOCK = _opcode_set( + 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' + ) # Block types for exception handling. OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') @@ -266,7 +315,7 @@ OPS_POP_BLOCK = _opcode_set('POP_BLOCK') # Opcodes that have a jump destination, but aren't really a jump. -OPS_NO_JUMP = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') +OPS_NO_JUMP = OPS_PUSH_BLOCK # Individual opcodes we need below. OP_BREAK_LOOP = _opcode('BREAK_LOOP') @@ -283,12 +332,16 @@ def __init__(self, code=None, text=None, filename=None): if code: self.code = code + self.text = text else: if not text: assert filename, "If no code or text, need a filename" - sourcef = open(filename, 'rU') - text = sourcef.read() - sourcef.close() + sourcef = open_source(filename) + try: + text = sourcef.read() + finally: + sourcef.close() + self.text = text try: # Python 2.3 and 2.4 don't like partial last lines, so be sure @@ -296,80 +349,75 @@ self.code = compile(text + '\n', filename, "exec") except SyntaxError: _, synerr, _ = sys.exc_info() - raise CoverageException( + raise NotPython( "Couldn't parse '%s' as Python source: '%s' at line %d" % (filename, synerr.msg, synerr.lineno) ) + # Alternative Python implementations don't always provide all the + # attributes on code objects that we need to do the analysis. + for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: + if not hasattr(self.code, attr): + raise CoverageException( + "This implementation of Python doesn't support code " + "analysis.\n" + "Run coverage.py under CPython for this command." + ) + def child_parsers(self): """Iterate over all the code objects nested within this one. The iteration includes `self` as its first value. """ - return map(lambda c: ByteParser(code=c), CodeObjects(self.code)) - - # Getting numbers from the lnotab value changed in Py3.0. - if sys.hexversion >= 0x03000000: - def _lnotab_increments(self, lnotab): - """Return a list of ints from the lnotab bytes in 3.x""" - return list(lnotab) - else: - def _lnotab_increments(self, lnotab): - """Return a list of ints from the lnotab string in 2.x""" - return [ord(c) for c in lnotab] + children = CodeObjects(self.code) + return [ByteParser(code=c, text=self.text) for c in children] def _bytes_lines(self): """Map byte offsets to line numbers in `code`. Uses co_lnotab described in Python/compile.c to map byte offsets to - line numbers. Returns a list: [(b0, l0), (b1, l1), ...] + line numbers. Produces a sequence: (b0, l0), (b1, l1), ... + + Only byte offsets that correspond to line numbers are included in the + results. """ # Adapted from dis.py in the standard library. - byte_increments = self._lnotab_increments(self.code.co_lnotab[0::2]) - line_increments = self._lnotab_increments(self.code.co_lnotab[1::2]) + byte_increments = bytes_to_ints(self.code.co_lnotab[0::2]) + line_increments = bytes_to_ints(self.code.co_lnotab[1::2]) - bytes_lines = [] last_line_num = None line_num = self.code.co_firstlineno byte_num = 0 for byte_incr, line_incr in zip(byte_increments, line_increments): if byte_incr: if line_num != last_line_num: - bytes_lines.append((byte_num, line_num)) + yield (byte_num, line_num) last_line_num = line_num byte_num += byte_incr line_num += line_incr if line_num != last_line_num: - bytes_lines.append((byte_num, line_num)) - return bytes_lines + yield (byte_num, line_num) def _find_statements(self): """Find the statements in `self.code`. - Return a set of line numbers that start statements. Recurses into all - code objects reachable from `self.code`. + Produce a sequence of line numbers that start statements. Recurses + into all code objects reachable from `self.code`. """ - stmts = set() for bp in self.child_parsers(): # Get all of the lineno information from this code. for _, l in bp._bytes_lines(): - stmts.add(l) - return stmts - - def _disassemble(self): # pragma: no cover - """Disassemble code, for ad-hoc experimenting.""" + yield l - import dis - - for bp in self.child_parsers(): - print("\n%s: " % bp.code) - dis.dis(bp.code) - print("Bytes lines: %r" % bp._bytes_lines()) - - print("") + def _block_stack_repr(self, block_stack): + """Get a string version of `block_stack`, for debugging.""" + blocks = ", ".join( + ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack] + ) + return "[" + blocks + "]" def _split_into_chunks(self): """Split the code object into a list of `Chunk` objects. @@ -380,10 +428,11 @@ Returns a list of `Chunk` objects. """ - # The list of chunks so far, and the one we're working on. chunks = [] chunk = None + + # A dict mapping byte offsets of line starts to the line numbers. bytes_lines_map = dict(self._bytes_lines()) # The block stack: loops and try blocks get pushed here for the @@ -398,16 +447,38 @@ # We have to handle the last two bytecodes specially. ult = penult = None - for bc in ByteCodes(self.code.co_code): - # Maybe have to start a new block + # Get a set of all of the jump-to points. + jump_to = set() + bytecodes = list(ByteCodes(self.code.co_code)) + for bc in bytecodes: + if bc.jump_to >= 0: + jump_to.add(bc.jump_to) + + chunk_lineno = 0 + + # Walk the byte codes building chunks. + for bc in bytecodes: + # Maybe have to start a new chunk + start_new_chunk = False + first_chunk = False if bc.offset in bytes_lines_map: + # Start a new chunk for each source line number. + start_new_chunk = True + chunk_lineno = bytes_lines_map[bc.offset] + first_chunk = True + elif bc.offset in jump_to: + # To make chunks have a single entrance, we have to make a new + # chunk when we get to a place some bytecode jumps to. + start_new_chunk = True + elif bc.op in OPS_CHUNK_BEGIN: + # Jumps deserve their own unnumbered chunk. This fixes + # problems with jumps to jumps getting confused. + start_new_chunk = True + + if not chunk or start_new_chunk: if chunk: chunk.exits.add(bc.offset) - chunk = Chunk(bc.offset, bytes_lines_map[bc.offset]) - chunks.append(chunk) - - if not chunk: - chunk = Chunk(bc.offset) + chunk = Chunk(bc.offset, chunk_lineno, first_chunk) chunks.append(chunk) # Look at the opcode @@ -421,7 +492,7 @@ if bc.op in OPS_CODE_END: # The opcode can exit the code object. - chunk.exits.add(-1) + chunk.exits.add(-self.code.co_firstlineno) if bc.op in OPS_PUSH_BLOCK: # The opcode adds a block to the block_stack. block_stack.append((bc.op, bc.jump_to)) @@ -436,15 +507,11 @@ chunk.exits.add(block_stack[-1][1]) chunk = None if bc.op == OP_END_FINALLY: - if block_stack: - # A break that goes through a finally will jump to whatever - # block is on top of the stack. - chunk.exits.add(block_stack[-1][1]) # For the finally clause we need to find the closest exception # block, and use its jump target as an exit. - for iblock in range(len(block_stack)-1, -1, -1): - if block_stack[iblock][0] in OPS_EXCEPT_BLOCKS: - chunk.exits.add(block_stack[iblock][1]) + for block in reversed(block_stack): + if block[0] in OPS_EXCEPT_BLOCKS: + chunk.exits.add(block[1]) break if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION: # This is an except clause. We want to overlook the next @@ -454,7 +521,6 @@ penult = ult ult = bc - if chunks: # The last two bytecodes could be a dummy "return None" that # shouldn't be counted as real code. Every Python code object seems @@ -466,27 +532,38 @@ # This is "return None", but is it dummy? A real line # would be a last chunk all by itself. if chunks[-1].byte != penult.offset: + ex = -self.code.co_firstlineno # Split the last chunk last_chunk = chunks[-1] - last_chunk.exits.remove(-1) + last_chunk.exits.remove(ex) last_chunk.exits.add(penult.offset) - chunk = Chunk(penult.offset) - chunk.exits.add(-1) + chunk = Chunk( + penult.offset, last_chunk.line, False + ) + chunk.exits.add(ex) chunks.append(chunk) # Give all the chunks a length. - chunks[-1].length = bc.next_offset - chunks[-1].byte + chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 for i in range(len(chunks)-1): chunks[i].length = chunks[i+1].byte - chunks[i].byte + #self.validate_chunks(chunks) return chunks + def validate_chunks(self, chunks): + """Validate the rule that chunks have a single entrance.""" + # starts is the entrances to the chunks + starts = set([ch.byte for ch in chunks]) + for ch in chunks: + assert all([(ex in starts or ex < 0) for ex in ch.exits]) + def _arcs(self): """Find the executable arcs in the code. - Returns a set of pairs, (from,to). From and to are integer line - numbers. If from is -1, then the arc is an entrance into the code - object. If to is -1, the arc is an exit from the code object. + Yields pairs: (from,to). From and to are integer line numbers. If + from is < 0, then the arc is an entrance into the code object. If to + is < 0, the arc is an exit from the code object. """ chunks = self._split_into_chunks() @@ -494,61 +571,43 @@ # A map from byte offsets to chunks jumped into. byte_chunks = dict([(c.byte, c) for c in chunks]) - # Build a map from byte offsets to actual lines reached. - byte_lines = {-1:[-1]} - bytes_to_add = set([c.byte for c in chunks]) + # There's always an entrance at the first chunk. + yield (-1, byte_chunks[0].line) - while bytes_to_add: - byte_to_add = bytes_to_add.pop() - if byte_to_add in byte_lines or byte_to_add == -1: + # Traverse from the first chunk in each line, and yield arcs where + # the trace function will be invoked. + for chunk in chunks: + if not chunk.first: continue - # Which lines does this chunk lead to? - bytes_considered = set() - bytes_to_consider = [byte_to_add] - lines = set() - - while bytes_to_consider: - byte = bytes_to_consider.pop() - bytes_considered.add(byte) - - # Find chunk for byte - try: - ch = byte_chunks[byte] - except KeyError: - for ch in chunks: - if ch.byte <= byte < ch.byte+ch.length: - break - else: - # No chunk for this byte! - raise Exception("Couldn't find chunk @ %d" % byte) - byte_chunks[byte] = ch + chunks_considered = set() + chunks_to_consider = [chunk] + while chunks_to_consider: + # Get the chunk we're considering, and make sure we don't + # consider it again + this_chunk = chunks_to_consider.pop() + chunks_considered.add(this_chunk) - if ch.line: - lines.add(ch.line) - else: - for ex in ch.exits: - if ex == -1: - lines.add(-1) - elif ex not in bytes_considered: - bytes_to_consider.append(ex) - - bytes_to_add.update(ch.exits) - - byte_lines[byte_to_add] = lines + # For each exit, add the line number if the trace function + # would be triggered, or add the chunk to those being + # considered if not. + for ex in this_chunk.exits: + if ex < 0: + yield (chunk.line, ex) + else: + next_chunk = byte_chunks[ex] + if next_chunk in chunks_considered: + continue - # Figure out for each chunk where the exits go. - arcs = set() - for chunk in chunks: - if chunk.line: - for ex in chunk.exits: - for exit_line in byte_lines[ex]: - if chunk.line != exit_line: - arcs.add((chunk.line, exit_line)) - for line in byte_lines[0]: - arcs.add((-1, line)) - - return arcs + # The trace function is invoked if visiting the first + # bytecode in a line, or if the transition is a + # backward jump. + backward_jump = next_chunk.byte < this_chunk.byte + if next_chunk.first or backward_jump: + if next_chunk.line != chunk.line: + yield (chunk.line, next_chunk.line) + else: + chunks_to_consider.append(next_chunk) def _all_chunks(self): """Returns a list of `Chunk` objects for this code and its children. @@ -576,11 +635,11 @@ class Chunk(object): - """A sequence of bytecodes with a single entrance. + """A sequence of byte codes with a single entrance. To analyze byte code, we have to divide it into chunks, sequences of byte - codes such that each basic block has only one entrance, the first - instruction in the block. + codes such that each chunk has only one entrance, the first instruction in + the block. This is almost the CS concept of `basic block`_, except that we're willing to have many exits from a chunk, and "basic block" is a more cumbersome @@ -588,160 +647,54 @@ .. _basic block: http://en.wikipedia.org/wiki/Basic_block - An exit of -1 means the chunk can leave the code (return). + `line` is the source line number containing this chunk. + + `first` is true if this is the first chunk in the source line. + + An exit < 0 means the chunk can leave the code (return). The exit is + the negative of the starting line number of the code block. """ - def __init__(self, byte, line=0): + def __init__(self, byte, line, first): self.byte = byte self.line = line + self.first = first self.length = 0 self.exits = set() def __repr__(self): - return "<%d+%d @%d %r>" % ( - self.byte, self.length, self.line, list(self.exits) + if self.first: + bang = "!" + else: + bang = "" + return "<%d+%d @%d%s %r>" % ( + self.byte, self.length, self.line, bang, list(self.exits) ) -class AdHocMain(object): # pragma: no cover - """An ad-hoc main for code parsing experiments.""" - - def main(self, args): - """A main function for trying the code from the command line.""" - - from optparse import OptionParser - - parser = OptionParser() - parser.add_option( - "-c", action="store_true", dest="chunks", - help="Show basic block chunks" - ) - parser.add_option( - "-d", action="store_true", dest="dis", - help="Disassemble" - ) - parser.add_option( - "-R", action="store_true", dest="recursive", - help="Recurse to find source files" - ) - parser.add_option( - "-s", action="store_true", dest="source", - help="Show analyzed source" - ) - parser.add_option( - "-t", action="store_true", dest="tokens", - help="Show tokens" - ) +class CachedTokenizer(object): + """A one-element cache around tokenize.generate_tokens. - options, args = parser.parse_args() - if options.recursive: - if args: - root = args[0] - else: - root = "." - for root, _, _ in os.walk(root): - for f in glob.glob(root + "/*.py"): - self.adhoc_one_file(options, f) - else: - self.adhoc_one_file(options, args[0]) - - def adhoc_one_file(self, options, filename): - """Process just one file.""" + When reporting, coverage.py tokenizes files twice, once to find the + structure of the file, and once to syntax-color it. Tokenizing is + expensive, and easily cached. - if options.dis or options.chunks: - try: - bp = ByteParser(filename=filename) - except CoverageException: - _, err, _ = sys.exc_info() - print("%s" % (err,)) - return - - if options.dis: - print("Main code:") - bp._disassemble() - - if options.chunks: - chunks = bp._all_chunks() - if options.recursive: - print("%6d: %s" % (len(chunks), filename)) - else: - print("Chunks: %r" % chunks) - arcs = bp._all_arcs() - print("Arcs: %r" % sorted(arcs)) - - if options.source or options.tokens: - cp = CodeParser(filename=filename, exclude=r"no\s*cover") - cp.show_tokens = options.tokens - cp._raw_parse() + This is a one-element cache so that our twice-in-a-row tokenizing doesn't + actually tokenize twice. - if options.source: - if options.chunks: - arc_width, arc_chars = self.arc_ascii_art(arcs) - else: - arc_width, arc_chars = 0, {} - - exit_counts = cp.exit_counts() - - for i, ltext in enumerate(cp.lines): - lineno = i+1 - m0 = m1 = m2 = m3 = a = ' ' - if lineno in cp.statement_starts: - m0 = '-' - exits = exit_counts.get(lineno, 0) - if exits > 1: - m1 = str(exits) - if lineno in cp.docstrings: - m2 = '"' - if lineno in cp.classdefs: - m2 = 'C' - if lineno in cp.excluded: - m3 = 'x' - a = arc_chars.get(lineno, '').ljust(arc_width) - print("%4d %s%s%s%s%s %s" % - (lineno, m0, m1, m2, m3, a, ltext) - ) - - def arc_ascii_art(self, arcs): - """Draw arcs as ascii art. - - Returns a width of characters needed to draw all the arcs, and a - dictionary mapping line numbers to ascii strings to draw for that line. + """ + def __init__(self): + self.last_text = None + self.last_tokens = None - """ - arc_chars = {} - for lfrom, lto in sorted(arcs): - if lfrom == -1: - arc_chars[lto] = arc_chars.get(lto, '') + 'v' - elif lto == -1: - arc_chars[lfrom] = arc_chars.get(lfrom, '') + '^' - else: - if lfrom == lto-1: - # Don't show obvious arcs. - continue - if lfrom < lto: - l1, l2 = lfrom, lto - else: - l1, l2 = lto, lfrom - w = max([len(arc_chars.get(l, '')) for l in range(l1, l2+1)]) - for l in range(l1, l2+1): - if l == lfrom: - ch = '<' - elif l == lto: - ch = '>' - else: - ch = '|' - arc_chars[l] = arc_chars.get(l, '').ljust(w) + ch - arc_width = 0 + def generate_tokens(self, text): + """A stand-in for `tokenize.generate_tokens`.""" + if text != self.last_text: + self.last_text = text + self.last_tokens = list( + tokenize.generate_tokens(StringIO(text).readline) + ) + return self.last_tokens - if arc_chars: - arc_width = max([len(a) for a in arc_chars.values()]) - else: - arc_width = 0 - - return arc_width, arc_chars - -if __name__ == '__main__': - AdHocMain().main(sys.argv[1:]) - -# -# eflag: FileType = Python2 +# Create our generate_tokens cache as a callable replacement function. +generate_tokens = CachedTokenizer().generate_tokens