DebugClients/Python3/coverage/parser.py

changeset 0
de9c2efb9d02
child 29
391dc0bc4ae5
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 """Code parsing for Coverage."""
2
3 import re, token, tokenize, types
4 import io as StringIO
5
6 from .misc import nice_pair, CoverageException
7 from .backward import set # pylint: disable-msg=W0622
8
9
10 class CodeParser:
11 """Parse code to find executable lines, excluded lines, etc."""
12
13 def __init__(self, show_tokens=False):
14 self.show_tokens = show_tokens
15
16 # The text lines of the parsed code.
17 self.lines = None
18
19 # The line numbers of excluded lines of code.
20 self.excluded = set()
21
22 # The line numbers of docstring lines.
23 self.docstrings = set()
24
25 # A dict mapping line numbers to (lo,hi) for multi-line statements.
26 self.multiline = {}
27
28 # The line numbers that start statements.
29 self.statement_starts = set()
30
31 def find_statement_starts(self, code):
32 """Find the starts of statements in compiled code.
33
34 Uses co_lnotab described in Python/compile.c to find line numbers that
35 start statements, adding them to `self.statement_starts`.
36
37 """
38 # Adapted from dis.py in the standard library.
39 byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
40 line_increments = [ord(c) for c in code.co_lnotab[1::2]]
41
42 last_line_num = None
43 line_num = code.co_firstlineno
44 for byte_incr, line_incr in zip(byte_increments, line_increments):
45 if byte_incr:
46 if line_num != last_line_num:
47 self.statement_starts.add(line_num)
48 last_line_num = line_num
49 line_num += line_incr
50 if line_num != last_line_num:
51 self.statement_starts.add(line_num)
52
53 def find_statements(self, code):
54 """Find the statements in `code`.
55
56 Update `self.statement_starts`, a set of line numbers that start
57 statements. Recurses into all code objects reachable from `code`.
58
59 """
60 # Adapted from trace.py in the standard library.
61
62 # Get all of the lineno information from this code.
63 self.find_statement_starts(code)
64
65 # Check the constants for references to other code objects.
66 for c in code.co_consts:
67 if isinstance(c, types.CodeType):
68 # Found another code object, so recurse into it.
69 self.find_statements(c)
70
71 def raw_parse(self, text=None, filename=None, exclude=None):
72 """Parse `text` to find the interesting facts about its lines.
73
74 A handful of member fields are updated.
75
76 """
77 if not text:
78 sourcef = open(filename, 'rU')
79 text = sourcef.read()
80 sourcef.close()
81 text = text.replace('\r\n', '\n')
82 self.lines = text.split('\n')
83
84 # Find lines which match an exclusion pattern.
85 if exclude:
86 re_exclude = re.compile(exclude)
87 for i, ltext in enumerate(self.lines):
88 if re_exclude.search(ltext):
89 self.excluded.add(i+1)
90
91 # Tokenize, to find excluded suites, to find docstrings, and to find
92 # multi-line statements.
93 indent = 0
94 exclude_indent = 0
95 excluding = False
96 prev_toktype = token.INDENT
97 first_line = None
98
99 tokgen = tokenize.generate_tokens(io.StringIO(text).readline)
100 for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
101 if self.show_tokens:
102 print(("%10s %5s %-20r %r" % (
103 tokenize.tok_name.get(toktype, toktype),
104 nice_pair((slineno, elineno)), ttext, ltext
105 )))
106 if toktype == token.INDENT:
107 indent += 1
108 elif toktype == token.DEDENT:
109 indent -= 1
110 elif toktype == token.OP and ttext == ':':
111 if not excluding and elineno in self.excluded:
112 # Start excluding a suite. We trigger off of the colon
113 # token so that the #pragma comment will be recognized on
114 # the same line as the colon.
115 exclude_indent = indent
116 excluding = True
117 elif toktype == token.STRING and prev_toktype == token.INDENT:
118 # Strings that are first on an indented line are docstrings.
119 # (a trick from trace.py in the stdlib.)
120 for i in range(slineno, elineno+1):
121 self.docstrings.add(i)
122 elif toktype == token.NEWLINE:
123 if first_line is not None and elineno != first_line:
124 # We're at the end of a line, and we've ended on a
125 # different line than the first line of the statement,
126 # so record a multi-line range.
127 rng = (first_line, elineno)
128 for l in range(first_line, elineno+1):
129 self.multiline[l] = rng
130 first_line = None
131
132 if ttext.strip() and toktype != tokenize.COMMENT:
133 # A non-whitespace token.
134 if first_line is None:
135 # The token is not whitespace, and is the first in a
136 # statement.
137 first_line = slineno
138 # Check whether to end an excluded suite.
139 if excluding and indent <= exclude_indent:
140 excluding = False
141 if excluding:
142 self.excluded.add(elineno)
143
144 prev_toktype = toktype
145
146 # Find the starts of the executable statements.
147 filename = filename or "<code>"
148 try:
149 # Python 2.3 and 2.4 don't like partial last lines, so be sure the
150 # text ends nicely for them.
151 text += '\n'
152 code = compile(text, filename, "exec")
153 except SyntaxError as synerr:
154 raise CoverageException(
155 "Couldn't parse '%s' as Python source: '%s' at line %d" %
156 (filename, synerr.msg, synerr.lineno)
157 )
158
159 self.find_statements(code)
160
161 def map_to_first_line(self, lines, ignore=None):
162 """Map the line numbers in `lines` to the correct first line of the
163 statement.
164
165 Skip any line mentioned in `ignore`.
166
167 Returns a sorted list of the first lines.
168
169 """
170 ignore = ignore or []
171 lset = set()
172 for l in lines:
173 if l in ignore:
174 continue
175 rng = self.multiline.get(l)
176 if rng:
177 new_l = rng[0]
178 else:
179 new_l = l
180 if new_l not in ignore:
181 lset.add(new_l)
182 lines = list(lset)
183 lines.sort()
184 return lines
185
186 def parse_source(self, text=None, filename=None, exclude=None):
187 """Parse source text to find executable lines, excluded lines, etc.
188
189 Source can be provided as `text`, the text itself, or `filename`, from
190 which text will be read. Excluded lines are those that match `exclude`,
191 a regex.
192
193 Return values are 1) a sorted list of executable line numbers,
194 2) a sorted list of excluded line numbers, and 3) a dict mapping line
195 numbers to pairs (lo,hi) for multi-line statements.
196
197 """
198 self.raw_parse(text, filename, exclude)
199
200 excluded_lines = self.map_to_first_line(self.excluded)
201 ignore = excluded_lines + list(self.docstrings)
202 lines = self.map_to_first_line(self.statement_starts, ignore)
203
204 return lines, excluded_lines, self.multiline
205
206 def print_parse_results(self):
207 """Print the results of the parsing."""
208 for i, ltext in enumerate(self.lines):
209 lineno = i+1
210 m0 = m1 = m2 = ' '
211 if lineno in self.statement_starts:
212 m0 = '-'
213 if lineno in self.docstrings:
214 m1 = '"'
215 if lineno in self.excluded:
216 m2 = 'x'
217 print(("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext)))
218
219
220 if __name__ == '__main__':
221 import sys
222
223 parser = CodeParser(show_tokens=True)
224 parser.raw_parse(filename=sys.argv[1], exclude=r"no\s*cover")
225 parser.print_parse_results()

eric ide

mercurial