1 """Code parsing for Coverage.""" |
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
2 |
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt |
3 import dis, re, sys, token, tokenize |
3 |
4 |
4 """Code parsing for coverage.py.""" |
5 from .backward import set, sorted, StringIO # pylint: disable=W0622 |
5 |
6 from .backward import open_source, range # pylint: disable=W0622 |
6 import collections |
7 from .backward import reversed # pylint: disable=W0622 |
7 import dis |
8 from .backward import bytes_to_ints |
8 import re |
9 from .bytecode import ByteCodes, CodeObjects |
9 import token |
10 from .misc import nice_pair, expensive, join_regex |
10 import tokenize |
11 from .misc import CoverageException, NoSource, NotPython |
11 |
12 |
12 from coverage.backward import range # pylint: disable=redefined-builtin |
13 |
13 from coverage.backward import bytes_to_ints |
14 class CodeParser(object): |
14 from coverage.bytecode import ByteCodes, CodeObjects |
|
15 from coverage.misc import contract, nice_pair, expensive, join_regex |
|
16 from coverage.misc import CoverageException, NoSource, NotPython |
|
17 from coverage.phystokens import compile_unicode, generate_tokens |
|
18 |
|
19 |
|
20 class PythonParser(object): |
15 """Parse code to find executable lines, excluded lines, etc.""" |
21 """Parse code to find executable lines, excluded lines, etc.""" |
16 |
22 |
|
23 @contract(text='unicode|None') |
17 def __init__(self, text=None, filename=None, exclude=None): |
24 def __init__(self, text=None, filename=None, exclude=None): |
18 """ |
25 """ |
19 Source can be provided as `text`, the text itself, or `filename`, from |
26 Source can be provided as `text`, the text itself, or `filename`, from |
20 which the text will be read. Excluded lines are those that match |
27 which the text will be read. Excluded lines are those that match |
21 `exclude`, a regex. |
28 `exclude`, a regex. |
22 |
29 |
23 """ |
30 """ |
24 assert text or filename, "CodeParser needs either text or filename" |
31 assert text or filename, "PythonParser needs either text or filename" |
25 self.filename = filename or "<code>" |
32 self.filename = filename or "<code>" |
26 self.text = text |
33 self.text = text |
27 if not self.text: |
34 if not self.text: |
|
35 from coverage.python import get_python_source |
28 try: |
36 try: |
29 sourcef = open_source(self.filename) |
37 self.text = get_python_source(self.filename) |
30 try: |
38 except IOError as err: |
31 self.text = sourcef.read() |
|
32 finally: |
|
33 sourcef.close() |
|
34 except IOError: |
|
35 _, err, _ = sys.exc_info() |
|
36 raise NoSource( |
39 raise NoSource( |
37 "No source for code: '%s': %s" % (self.filename, err) |
40 "No source for code: '%s': %s" % (self.filename, err) |
38 ) |
41 ) |
39 |
|
40 # Scrap the BOM if it exists. |
|
41 if self.text and ord(self.text[0]) == 0xfeff: |
|
42 self.text = self.text[1:] |
|
43 |
42 |
44 self.exclude = exclude |
43 self.exclude = exclude |
45 |
44 |
46 self.show_tokens = False |
45 self.show_tokens = False |
47 |
46 |
61 self.multiline = {} |
60 self.multiline = {} |
62 |
61 |
63 # The line numbers that start statements. |
62 # The line numbers that start statements. |
64 self.statement_starts = set() |
63 self.statement_starts = set() |
65 |
64 |
66 # Lazily-created ByteParser |
65 # Lazily-created ByteParser and arc data. |
67 self._byte_parser = None |
66 self._byte_parser = None |
68 |
67 self._all_arcs = None |
69 def _get_byte_parser(self): |
68 |
|
69 @property |
|
70 def byte_parser(self): |
70 """Create a ByteParser on demand.""" |
71 """Create a ByteParser on demand.""" |
71 if not self._byte_parser: |
72 if not self._byte_parser: |
72 self._byte_parser = \ |
73 self._byte_parser = ByteParser(self.text, filename=self.filename) |
73 ByteParser(text=self.text, filename=self.filename) |
|
74 return self._byte_parser |
74 return self._byte_parser |
75 byte_parser = property(_get_byte_parser) |
|
76 |
75 |
77 def lines_matching(self, *regexes): |
76 def lines_matching(self, *regexes): |
78 """Find the lines matching one of a list of regexes. |
77 """Find the lines matching one of a list of regexes. |
79 |
78 |
80 Returns a set of line numbers, the lines that contain a match for one |
79 Returns a set of line numbers, the lines that contain a match for one |
112 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
111 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: |
113 if self.show_tokens: # pragma: not covered |
112 if self.show_tokens: # pragma: not covered |
114 print("%10s %5s %-20r %r" % ( |
113 print("%10s %5s %-20r %r" % ( |
115 tokenize.tok_name.get(toktype, toktype), |
114 tokenize.tok_name.get(toktype, toktype), |
116 nice_pair((slineno, elineno)), ttext, ltext |
115 nice_pair((slineno, elineno)), ttext, ltext |
117 )) |
116 )) |
118 if toktype == token.INDENT: |
117 if toktype == token.INDENT: |
119 indent += 1 |
118 indent += 1 |
120 elif toktype == token.DEDENT: |
119 elif toktype == token.DEDENT: |
121 indent -= 1 |
120 indent -= 1 |
122 elif toktype == token.NAME and ttext == 'class': |
121 elif toktype == token.NAME and ttext == 'class': |
140 elif toktype == token.NEWLINE: |
139 elif toktype == token.NEWLINE: |
141 if first_line is not None and elineno != first_line: |
140 if first_line is not None and elineno != first_line: |
142 # We're at the end of a line, and we've ended on a |
141 # We're at the end of a line, and we've ended on a |
143 # different line than the first line of the statement, |
142 # different line than the first line of the statement, |
144 # so record a multi-line range. |
143 # so record a multi-line range. |
145 rng = (first_line, elineno) |
|
146 for l in range(first_line, elineno+1): |
144 for l in range(first_line, elineno+1): |
147 self.multiline[l] = rng |
145 self.multiline[l] = first_line |
148 first_line = None |
146 first_line = None |
149 |
147 |
150 if ttext.strip() and toktype != tokenize.COMMENT: |
148 if ttext.strip() and toktype != tokenize.COMMENT: |
151 # A non-whitespace token. |
149 # A non-whitespace token. |
152 empty = False |
150 empty = False |
166 if not empty: |
164 if not empty: |
167 self.statement_starts.update(self.byte_parser._find_statements()) |
165 self.statement_starts.update(self.byte_parser._find_statements()) |
168 |
166 |
169 def first_line(self, line): |
167 def first_line(self, line): |
170 """Return the first line number of the statement including `line`.""" |
168 """Return the first line number of the statement including `line`.""" |
171 rng = self.multiline.get(line) |
169 first_line = self.multiline.get(line) |
172 if rng: |
170 if first_line: |
173 first_line = rng[0] |
171 return first_line |
174 else: |
172 else: |
175 first_line = line |
173 return line |
176 return first_line |
174 |
177 |
175 def first_lines(self, lines): |
178 def first_lines(self, lines, *ignores): |
|
179 """Map the line numbers in `lines` to the correct first line of the |
176 """Map the line numbers in `lines` to the correct first line of the |
180 statement. |
177 statement. |
181 |
178 |
182 Skip any line mentioned in any of the sequences in `ignores`. |
|
183 |
|
184 Returns a set of the first lines. |
179 Returns a set of the first lines. |
185 |
180 |
186 """ |
181 """ |
187 ignore = set() |
182 return set(self.first_line(l) for l in lines) |
188 for ign in ignores: |
183 |
189 ignore.update(ign) |
184 def translate_lines(self, lines): |
190 lset = set() |
185 """Implement `FileReporter.translate_lines`.""" |
191 for l in lines: |
186 return self.first_lines(lines) |
192 if l in ignore: |
187 |
193 continue |
188 def translate_arcs(self, arcs): |
194 new_l = self.first_line(l) |
189 """Implement `FileReporter.translate_arcs`.""" |
195 if new_l not in ignore: |
190 return [ |
196 lset.add(new_l) |
191 (self.first_line(a), self.first_line(b)) |
197 return lset |
192 for (a, b) in arcs |
198 |
193 ] |
|
194 |
|
195 @expensive |
199 def parse_source(self): |
196 def parse_source(self): |
200 """Parse source text to find executable lines, excluded lines, etc. |
197 """Parse source text to find executable lines, excluded lines, etc. |
201 |
198 |
202 Return values are 1) a set of executable line numbers, and 2) a set of |
199 Return values are 1) a set of executable line numbers, and 2) a set of |
203 excluded line numbers. |
200 excluded line numbers. |
206 statements. |
203 statements. |
207 |
204 |
208 """ |
205 """ |
209 try: |
206 try: |
210 self._raw_parse() |
207 self._raw_parse() |
211 except (tokenize.TokenError, IndentationError): |
208 except (tokenize.TokenError, IndentationError) as err: |
212 _, tokerr, _ = sys.exc_info() |
209 if hasattr(err, "lineno"): |
213 msg, lineno = tokerr.args |
210 lineno = err.lineno # IndentationError |
|
211 else: |
|
212 lineno = err.args[1][0] # TokenError |
214 raise NotPython( |
213 raise NotPython( |
215 "Couldn't parse '%s' as Python source: '%s' at %s" % |
214 "Couldn't parse '%s' as Python source: '%s' at line %d" % ( |
216 (self.filename, msg, lineno) |
215 self.filename, err.args[0], lineno |
217 ) |
216 ) |
|
217 ) |
218 |
218 |
219 excluded_lines = self.first_lines(self.excluded) |
219 excluded_lines = self.first_lines(self.excluded) |
220 lines = self.first_lines( |
220 ignore = set() |
221 self.statement_starts, |
221 ignore.update(excluded_lines) |
222 excluded_lines, |
222 ignore.update(self.docstrings) |
223 self.docstrings |
223 starts = self.statement_starts - ignore |
224 ) |
224 lines = self.first_lines(starts) |
|
225 lines -= ignore |
225 |
226 |
226 return lines, excluded_lines |
227 return lines, excluded_lines |
227 |
228 |
228 def arcs(self): |
229 def arcs(self): |
229 """Get information about the arcs available in the code. |
230 """Get information about the arcs available in the code. |
230 |
231 |
231 Returns a sorted list of line number pairs. Line numbers have been |
232 Returns a set of line number pairs. Line numbers have been normalized |
232 normalized to the first line of multiline statements. |
233 to the first line of multi-line statements. |
233 |
234 |
234 """ |
235 """ |
235 all_arcs = [] |
236 if self._all_arcs is None: |
236 for l1, l2 in self.byte_parser._all_arcs(): |
237 self._all_arcs = set() |
237 fl1 = self.first_line(l1) |
238 for l1, l2 in self.byte_parser._all_arcs(): |
238 fl2 = self.first_line(l2) |
239 fl1 = self.first_line(l1) |
239 if fl1 != fl2: |
240 fl2 = self.first_line(l2) |
240 all_arcs.append((fl1, fl2)) |
241 if fl1 != fl2: |
241 return sorted(all_arcs) |
242 self._all_arcs.add((fl1, fl2)) |
242 arcs = expensive(arcs) |
243 return self._all_arcs |
243 |
244 |
244 def exit_counts(self): |
245 def exit_counts(self): |
245 """Get a mapping from line numbers to count of exits from that line. |
246 """Get a count of exits from that each line. |
246 |
247 |
247 Excluded lines are excluded. |
248 Excluded lines are excluded. |
248 |
249 |
249 """ |
250 """ |
250 excluded_lines = self.first_lines(self.excluded) |
251 excluded_lines = self.first_lines(self.excluded) |
251 exit_counts = {} |
252 exit_counts = collections.defaultdict(int) |
252 for l1, l2 in self.arcs(): |
253 for l1, l2 in self.arcs(): |
253 if l1 < 0: |
254 if l1 < 0: |
254 # Don't ever report -1 as a line number |
255 # Don't ever report -1 as a line number |
255 continue |
256 continue |
256 if l1 in excluded_lines: |
257 if l1 in excluded_lines: |
257 # Don't report excluded lines as line numbers. |
258 # Don't report excluded lines as line numbers. |
258 continue |
259 continue |
259 if l2 in excluded_lines: |
260 if l2 in excluded_lines: |
260 # Arcs to excluded lines shouldn't count. |
261 # Arcs to excluded lines shouldn't count. |
261 continue |
262 continue |
262 if l1 not in exit_counts: |
|
263 exit_counts[l1] = 0 |
|
264 exit_counts[l1] += 1 |
263 exit_counts[l1] += 1 |
265 |
264 |
266 # Class definitions have one extra exit, so remove one for each: |
265 # Class definitions have one extra exit, so remove one for each: |
267 for l in self.classdefs: |
266 for l in self.classdefs: |
268 # Ensure key is there: classdefs can include excluded lines. |
267 # Ensure key is there: class definitions can include excluded lines. |
269 if l in exit_counts: |
268 if l in exit_counts: |
270 exit_counts[l] -= 1 |
269 exit_counts[l] -= 1 |
271 |
270 |
272 return exit_counts |
271 return exit_counts |
273 exit_counts = expensive(exit_counts) |
|
274 |
272 |
275 |
273 |
276 ## Opcodes that guide the ByteParser. |
274 ## Opcodes that guide the ByteParser. |
277 |
275 |
278 def _opcode(name): |
276 def _opcode(name): |
279 """Return the opcode by name from the dis module.""" |
277 """Return the opcode by name from the dis module.""" |
280 return dis.opmap[name] |
278 return dis.opmap[name] |
|
279 |
281 |
280 |
282 def _opcode_set(*names): |
281 def _opcode_set(*names): |
283 """Return a set of opcodes by the names in `names`.""" |
282 """Return a set of opcodes by the names in `names`.""" |
284 s = set() |
283 s = set() |
285 for name in names: |
284 for name in names: |
294 |
293 |
295 # Opcodes that unconditionally end the code chunk. |
294 # Opcodes that unconditionally end the code chunk. |
296 OPS_CHUNK_END = _opcode_set( |
295 OPS_CHUNK_END = _opcode_set( |
297 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', |
296 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS', |
298 'BREAK_LOOP', 'CONTINUE_LOOP', |
297 'BREAK_LOOP', 'CONTINUE_LOOP', |
299 ) |
298 ) |
300 |
299 |
301 # Opcodes that unconditionally begin a new code chunk. By starting new chunks |
300 # Opcodes that unconditionally begin a new code chunk. By starting new chunks |
302 # with unconditional jump instructions, we neatly deal with jumps to jumps |
301 # with unconditional jump instructions, we neatly deal with jumps to jumps |
303 # properly. |
302 # properly. |
304 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') |
303 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD') |
305 |
304 |
306 # Opcodes that push a block on the block stack. |
305 # Opcodes that push a block on the block stack. |
307 OPS_PUSH_BLOCK = _opcode_set( |
306 OPS_PUSH_BLOCK = _opcode_set( |
308 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' |
307 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH' |
309 ) |
308 ) |
310 |
309 |
311 # Block types for exception handling. |
310 # Block types for exception handling. |
312 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
311 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY') |
313 |
312 |
314 # Opcodes that pop a block from the block stack. |
313 # Opcodes that pop a block from the block stack. |
319 |
318 |
320 # Individual opcodes we need below. |
319 # Individual opcodes we need below. |
321 OP_BREAK_LOOP = _opcode('BREAK_LOOP') |
320 OP_BREAK_LOOP = _opcode('BREAK_LOOP') |
322 OP_END_FINALLY = _opcode('END_FINALLY') |
321 OP_END_FINALLY = _opcode('END_FINALLY') |
323 OP_COMPARE_OP = _opcode('COMPARE_OP') |
322 OP_COMPARE_OP = _opcode('COMPARE_OP') |
324 COMPARE_EXCEPTION = 10 # just have to get this const from the code. |
323 COMPARE_EXCEPTION = 10 # just have to get this constant from the code. |
325 OP_LOAD_CONST = _opcode('LOAD_CONST') |
324 OP_LOAD_CONST = _opcode('LOAD_CONST') |
326 OP_RETURN_VALUE = _opcode('RETURN_VALUE') |
325 OP_RETURN_VALUE = _opcode('RETURN_VALUE') |
327 |
326 |
328 |
327 |
329 class ByteParser(object): |
328 class ByteParser(object): |
330 """Parse byte codes to understand the structure of code.""" |
329 """Parse byte codes to understand the structure of code.""" |
331 |
330 |
332 def __init__(self, code=None, text=None, filename=None): |
331 @contract(text='unicode') |
|
332 def __init__(self, text, code=None, filename=None): |
|
333 self.text = text |
333 if code: |
334 if code: |
334 self.code = code |
335 self.code = code |
335 self.text = text |
|
336 else: |
336 else: |
337 if not text: |
|
338 assert filename, "If no code or text, need a filename" |
|
339 sourcef = open_source(filename) |
|
340 try: |
|
341 text = sourcef.read() |
|
342 finally: |
|
343 sourcef.close() |
|
344 self.text = text |
|
345 |
|
346 try: |
337 try: |
347 # Python 2.3 and 2.4 don't like partial last lines, so be sure |
338 self.code = compile_unicode(text, filename, "exec") |
348 # the text ends nicely for them. |
339 except SyntaxError as synerr: |
349 self.code = compile(text + '\n', filename, "exec") |
|
350 except SyntaxError: |
|
351 _, synerr, _ = sys.exc_info() |
|
352 raise NotPython( |
340 raise NotPython( |
353 "Couldn't parse '%s' as Python source: '%s' at line %d" % |
341 "Couldn't parse '%s' as Python source: '%s' at line %d" % ( |
354 (filename, synerr.msg, synerr.lineno) |
342 filename, synerr.msg, synerr.lineno |
355 ) |
343 ) |
|
344 ) |
356 |
345 |
357 # Alternative Python implementations don't always provide all the |
346 # Alternative Python implementations don't always provide all the |
358 # attributes on code objects that we need to do the analysis. |
347 # attributes on code objects that we need to do the analysis. |
359 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: |
348 for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']: |
360 if not hasattr(self.code, attr): |
349 if not hasattr(self.code, attr): |
361 raise CoverageException( |
350 raise CoverageException( |
362 "This implementation of Python doesn't support code " |
351 "This implementation of Python doesn't support code analysis.\n" |
363 "analysis.\n" |
|
364 "Run coverage.py under CPython for this command." |
352 "Run coverage.py under CPython for this command." |
365 ) |
353 ) |
366 |
354 |
367 def child_parsers(self): |
355 def child_parsers(self): |
368 """Iterate over all the code objects nested within this one. |
356 """Iterate over all the code objects nested within this one. |
369 |
357 |
370 The iteration includes `self` as its first value. |
358 The iteration includes `self` as its first value. |
371 |
359 |
372 """ |
360 """ |
373 children = CodeObjects(self.code) |
361 children = CodeObjects(self.code) |
374 return [ByteParser(code=c, text=self.text) for c in children] |
362 return (ByteParser(self.text, code=c) for c in children) |
375 |
363 |
376 def _bytes_lines(self): |
364 def _bytes_lines(self): |
377 """Map byte offsets to line numbers in `code`. |
365 """Map byte offsets to line numbers in `code`. |
378 |
366 |
379 Uses co_lnotab described in Python/compile.c to map byte offsets to |
367 Uses co_lnotab described in Python/compile.c to map byte offsets to |
410 for bp in self.child_parsers(): |
398 for bp in self.child_parsers(): |
411 # Get all of the lineno information from this code. |
399 # Get all of the lineno information from this code. |
412 for _, l in bp._bytes_lines(): |
400 for _, l in bp._bytes_lines(): |
413 yield l |
401 yield l |
414 |
402 |
415 def _block_stack_repr(self, block_stack): |
403 def _block_stack_repr(self, block_stack): # pragma: debugging |
416 """Get a string version of `block_stack`, for debugging.""" |
404 """Get a string version of `block_stack`, for debugging.""" |
417 blocks = ", ".join( |
405 blocks = ", ".join( |
418 ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack] |
406 "(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack |
419 ) |
407 ) |
420 return "[" + blocks + "]" |
408 return "[" + blocks + "]" |
421 |
409 |
422 def _split_into_chunks(self): |
410 def _split_into_chunks(self): |
423 """Split the code object into a list of `Chunk` objects. |
411 """Split the code object into a list of `Chunk` objects. |
542 ) |
534 ) |
543 chunk.exits.add(ex) |
535 chunk.exits.add(ex) |
544 chunks.append(chunk) |
536 chunks.append(chunk) |
545 |
537 |
546 # Give all the chunks a length. |
538 # Give all the chunks a length. |
547 chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301 |
539 chunks[-1].length = bc.next_offset - chunks[-1].byte |
548 for i in range(len(chunks)-1): |
540 for i in range(len(chunks)-1): |
549 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
541 chunks[i].length = chunks[i+1].byte - chunks[i].byte |
550 |
542 |
551 #self.validate_chunks(chunks) |
543 #self.validate_chunks(chunks) |
552 return chunks |
544 return chunks |
553 |
545 |
554 def validate_chunks(self, chunks): |
546 def validate_chunks(self, chunks): # pragma: debugging |
555 """Validate the rule that chunks have a single entrance.""" |
547 """Validate the rule that chunks have a single entrance.""" |
556 # starts is the entrances to the chunks |
548 # starts is the entrances to the chunks |
557 starts = set([ch.byte for ch in chunks]) |
549 starts = set(ch.byte for ch in chunks) |
558 for ch in chunks: |
550 for ch in chunks: |
559 assert all([(ex in starts or ex < 0) for ex in ch.exits]) |
551 assert all((ex in starts or ex < 0) for ex in ch.exits) |
560 |
552 |
561 def _arcs(self): |
553 def _arcs(self): |
562 """Find the executable arcs in the code. |
554 """Find the executable arcs in the code. |
563 |
555 |
564 Yields pairs: (from,to). From and to are integer line numbers. If |
556 Yields pairs: (from,to). From and to are integer line numbers. If |
566 is < 0, the arc is an exit from the code object. |
558 is < 0, the arc is an exit from the code object. |
567 |
559 |
568 """ |
560 """ |
569 chunks = self._split_into_chunks() |
561 chunks = self._split_into_chunks() |
570 |
562 |
571 # A map from byte offsets to chunks jumped into. |
563 # A map from byte offsets to the chunk starting at that offset. |
572 byte_chunks = dict([(c.byte, c) for c in chunks]) |
564 byte_chunks = dict((c.byte, c) for c in chunks) |
573 |
|
574 # There's always an entrance at the first chunk. |
|
575 yield (-1, byte_chunks[0].line) |
|
576 |
565 |
577 # Traverse from the first chunk in each line, and yield arcs where |
566 # Traverse from the first chunk in each line, and yield arcs where |
578 # the trace function will be invoked. |
567 # the trace function will be invoked. |
579 for chunk in chunks: |
568 for chunk in chunks: |
|
569 if chunk.entrance: |
|
570 yield (-1, chunk.line) |
|
571 |
580 if not chunk.first: |
572 if not chunk.first: |
581 continue |
573 continue |
582 |
574 |
583 chunks_considered = set() |
575 chunks_considered = set() |
584 chunks_to_consider = [chunk] |
576 chunks_to_consider = [chunk] |
585 while chunks_to_consider: |
577 while chunks_to_consider: |
586 # Get the chunk we're considering, and make sure we don't |
578 # Get the chunk we're considering, and make sure we don't |
587 # consider it again |
579 # consider it again. |
588 this_chunk = chunks_to_consider.pop() |
580 this_chunk = chunks_to_consider.pop() |
589 chunks_considered.add(this_chunk) |
581 chunks_considered.add(this_chunk) |
590 |
582 |
591 # For each exit, add the line number if the trace function |
583 # For each exit, add the line number if the trace function |
592 # would be triggered, or add the chunk to those being |
584 # would be triggered, or add the chunk to those being |
645 to have many exits from a chunk, and "basic block" is a more cumbersome |
637 to have many exits from a chunk, and "basic block" is a more cumbersome |
646 term. |
638 term. |
647 |
639 |
648 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
640 .. _basic block: http://en.wikipedia.org/wiki/Basic_block |
649 |
641 |
|
642 `byte` is the offset to the bytecode starting this chunk. |
|
643 |
650 `line` is the source line number containing this chunk. |
644 `line` is the source line number containing this chunk. |
651 |
645 |
652 `first` is true if this is the first chunk in the source line. |
646 `first` is true if this is the first chunk in the source line. |
653 |
647 |
654 An exit < 0 means the chunk can leave the code (return). The exit is |
648 An exit < 0 means the chunk can leave the code (return). The exit is |
655 the negative of the starting line number of the code block. |
649 the negative of the starting line number of the code block. |
|
650 |
|
651 The `entrance` attribute is a boolean indicating whether the code object |
|
652 can be entered at this chunk. |
656 |
653 |
657 """ |
654 """ |
658 def __init__(self, byte, line, first): |
655 def __init__(self, byte, line, first): |
659 self.byte = byte |
656 self.byte = byte |
660 self.line = line |
657 self.line = line |
661 self.first = first |
658 self.first = first |
662 self.length = 0 |
659 self.length = 0 |
|
660 self.entrance = False |
663 self.exits = set() |
661 self.exits = set() |
664 |
662 |
665 def __repr__(self): |
663 def __repr__(self): |
666 if self.first: |
664 return "<%d+%d @%d%s%s %r>" % ( |
667 bang = "!" |
665 self.byte, |
668 else: |
666 self.length, |
669 bang = "" |
667 self.line, |
670 return "<%d+%d @%d%s %r>" % ( |
668 "!" if self.first else "", |
671 self.byte, self.length, self.line, bang, list(self.exits) |
669 "v" if self.entrance else "", |
672 ) |
670 list(self.exits), |
673 |
671 ) |
674 |
|
675 class CachedTokenizer(object): |
|
676 """A one-element cache around tokenize.generate_tokens. |
|
677 |
|
678 When reporting, coverage.py tokenizes files twice, once to find the |
|
679 structure of the file, and once to syntax-color it. Tokenizing is |
|
680 expensive, and easily cached. |
|
681 |
|
682 This is a one-element cache so that our twice-in-a-row tokenizing doesn't |
|
683 actually tokenize twice. |
|
684 |
|
685 """ |
|
686 def __init__(self): |
|
687 self.last_text = None |
|
688 self.last_tokens = None |
|
689 |
|
690 def generate_tokens(self, text): |
|
691 """A stand-in for `tokenize.generate_tokens`.""" |
|
692 if text != self.last_text: |
|
693 self.last_text = text |
|
694 self.last_tokens = list( |
|
695 tokenize.generate_tokens(StringIO(text).readline) |
|
696 ) |
|
697 return self.last_tokens |
|
698 |
|
699 # Create our generate_tokens cache as a callable replacement function. |
|
700 generate_tokens = CachedTokenizer().generate_tokens |
|