DebugClients/Python/coverage/collector.py

changeset 4489
d0d6e4ad31bd
parent 3499
f2d4b02c7e88
child 4491
0d8612e24fef
equal deleted inserted replaced
4481:456c58fc64b0 4489:d0d6e4ad31bd
1 """Raw data collector for Coverage.""" 1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3 import os, sys, threading 3
4 """Raw data collector for coverage.py."""
5
6 import os, sys
7
8 from coverage import env
9 from coverage.backward import iitems
10 from coverage.files import abs_file
11 from coverage.misc import CoverageException
12 from coverage.pytracer import PyTracer
4 13
5 try: 14 try:
6 # Use the C extension code when we can, for speed. 15 # Use the C extension code when we can, for speed.
7 from .tracer import CTracer # pylint: disable=F0401,E0611 16 from coverage.tracer import CTracer, CFileDisposition # pylint: disable=no-name-in-module
8 except ImportError: 17 except ImportError:
9 # Couldn't import the C extension, maybe it isn't built. 18 # Couldn't import the C extension, maybe it isn't built.
10 if os.getenv('COVERAGE_TEST_TRACER') == 'c': 19 if os.getenv('COVERAGE_TEST_TRACER') == 'c':
11 # During testing, we use the COVERAGE_TEST_TRACER env var to indicate 20 # During testing, we use the COVERAGE_TEST_TRACER environment variable
12 # that we've fiddled with the environment to test this fallback code. 21 # to indicate that we've fiddled with the environment to test this
13 # If we thought we had a C tracer, but couldn't import it, then exit 22 # fallback code. If we thought we had a C tracer, but couldn't import
14 # quickly and clearly instead of dribbling confusing errors. I'm using 23 # it, then exit quickly and clearly instead of dribbling confusing
15 # sys.exit here instead of an exception because an exception here 24 # errors. I'm using sys.exit here instead of an exception because an
16 # causes all sorts of other noise in unittest. 25 # exception here causes all sorts of other noise in unittest.
17 sys.stderr.write( 26 sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n")
18 "*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n"
19 )
20 sys.exit(1) 27 sys.exit(1)
21 CTracer = None 28 CTracer = None
22 29
23 30
24 class PyTracer(object): 31 class FileDisposition(object):
25 """Python implementation of the raw data tracer.""" 32 """A simple value type for recording what to do with a file."""
26 33 pass
27 # Because of poor implementations of trace-function-manipulating tools,
28 # the Python trace function must be kept very simple. In particular, there
29 # must be only one function ever set as the trace function, both through
30 # sys.settrace, and as the return value from the trace function. Put
31 # another way, the trace function must always return itself. It cannot
32 # swap in other functions, or return None to avoid tracing a particular
33 # frame.
34 #
35 # The trace manipulator that introduced this restriction is DecoratorTools,
36 # which sets a trace function, and then later restores the pre-existing one
37 # by calling sys.settrace with a function it found in the current frame.
38 #
39 # Systems that use DecoratorTools (or similar trace manipulations) must use
40 # PyTracer to get accurate results. The command-line --timid argument is
41 # used to force the use of this tracer.
42
43 def __init__(self):
44 self.data = None
45 self.should_trace = None
46 self.should_trace_cache = None
47 self.warn = None
48 self.cur_file_data = None
49 self.last_line = 0
50 self.data_stack = []
51 self.last_exc_back = None
52 self.last_exc_firstlineno = 0
53 self.arcs = False
54 self.thread = None
55 self.stopped = False
56
57 def _trace(self, frame, event, arg_unused):
58 """The trace function passed to sys.settrace."""
59
60 if self.stopped:
61 return
62
63 if 0:
64 sys.stderr.write("trace event: %s %r @%d\n" % (
65 event, frame.f_code.co_filename, frame.f_lineno
66 ))
67
68 if self.last_exc_back:
69 if frame == self.last_exc_back:
70 # Someone forgot a return event.
71 if self.arcs and self.cur_file_data:
72 pair = (self.last_line, -self.last_exc_firstlineno)
73 self.cur_file_data[pair] = None
74 self.cur_file_data, self.last_line = self.data_stack.pop()
75 self.last_exc_back = None
76
77 if event == 'call':
78 # Entering a new function context. Decide if we should trace
79 # in this file.
80 self.data_stack.append((self.cur_file_data, self.last_line))
81 filename = frame.f_code.co_filename
82 if filename not in self.should_trace_cache:
83 tracename = self.should_trace(filename, frame)
84 self.should_trace_cache[filename] = tracename
85 else:
86 tracename = self.should_trace_cache[filename]
87 #print("called, stack is %d deep, tracename is %r" % (
88 # len(self.data_stack), tracename))
89 if tracename:
90 if tracename not in self.data:
91 self.data[tracename] = {}
92 self.cur_file_data = self.data[tracename]
93 else:
94 self.cur_file_data = None
95 # Set the last_line to -1 because the next arc will be entering a
96 # code block, indicated by (-1, n).
97 self.last_line = -1
98 elif event == 'line':
99 # Record an executed line.
100 if self.cur_file_data is not None:
101 if self.arcs:
102 #print("lin", self.last_line, frame.f_lineno)
103 self.cur_file_data[(self.last_line, frame.f_lineno)] = None
104 else:
105 #print("lin", frame.f_lineno)
106 self.cur_file_data[frame.f_lineno] = None
107 self.last_line = frame.f_lineno
108 elif event == 'return':
109 if self.arcs and self.cur_file_data:
110 first = frame.f_code.co_firstlineno
111 self.cur_file_data[(self.last_line, -first)] = None
112 # Leaving this function, pop the filename stack.
113 self.cur_file_data, self.last_line = self.data_stack.pop()
114 #print("returned, stack is %d deep" % (len(self.data_stack)))
115 elif event == 'exception':
116 #print("exc", self.last_line, frame.f_lineno)
117 self.last_exc_back = frame.f_back
118 self.last_exc_firstlineno = frame.f_code.co_firstlineno
119 return self._trace
120
121 def start(self):
122 """Start this Tracer.
123
124 Return a Python function suitable for use with sys.settrace().
125
126 """
127 self.thread = threading.currentThread()
128 sys.settrace(self._trace)
129 return self._trace
130
131 def stop(self):
132 """Stop this Tracer."""
133 self.stopped = True
134 if self.thread != threading.currentThread():
135 # Called on a different thread than started us: we can't unhook
136 # ourseves, but we've set the flag that we should stop, so we won't
137 # do any more tracing.
138 return
139
140 if hasattr(sys, "gettrace") and self.warn:
141 if sys.gettrace() != self._trace:
142 msg = "Trace function changed, measurement is likely wrong: %r"
143 self.warn(msg % (sys.gettrace(),))
144 #print("Stopping tracer on %s" % threading.current_thread().ident)
145 sys.settrace(None)
146
147 def get_stats(self):
148 """Return a dictionary of statistics, or None."""
149 return None
150 34
151 35
152 class Collector(object): 36 class Collector(object):
153 """Collects trace data. 37 """Collects trace data.
154 38
168 # The stack of active Collectors. Collectors are added here when started, 52 # The stack of active Collectors. Collectors are added here when started,
169 # and popped when stopped. Collectors on the stack are paused when not 53 # and popped when stopped. Collectors on the stack are paused when not
170 # the top, and resumed when they become the top again. 54 # the top, and resumed when they become the top again.
171 _collectors = [] 55 _collectors = []
172 56
173 def __init__(self, should_trace, timid, branch, warn): 57 def __init__(self, should_trace, check_include, timid, branch, warn, concurrency):
174 """Create a collector. 58 """Create a collector.
175 59
176 `should_trace` is a function, taking a filename, and returning a 60 `should_trace` is a function, taking a file name, and returning a
177 canonicalized filename, or None depending on whether the file should 61 `coverage.FileDisposition object`.
178 be traced or not. 62
63 `check_include` is a function taking a file name and a frame. It returns
64 a boolean: True if the file should be traced, False if not.
179 65
180 If `timid` is true, then a slower simpler trace function will be 66 If `timid` is true, then a slower simpler trace function will be
181 used. This is important for some environments where manipulation of 67 used. This is important for some environments where manipulation of
182 tracing functions make the faster more sophisticated trace function not 68 tracing functions make the faster more sophisticated trace function not
183 operate properly. 69 operate properly.
187 `get_arc_data` to get the arc data. 73 `get_arc_data` to get the arc data.
188 74
189 `warn` is a warning function, taking a single string message argument, 75 `warn` is a warning function, taking a single string message argument,
190 to be used if a warning needs to be issued. 76 to be used if a warning needs to be issued.
191 77
78 `concurrency` is a string indicating the concurrency library in use.
79 Valid values are "greenlet", "eventlet", "gevent", or "thread" (the
80 default).
81
192 """ 82 """
193 self.should_trace = should_trace 83 self.should_trace = should_trace
84 self.check_include = check_include
194 self.warn = warn 85 self.warn = warn
195 self.branch = branch 86 self.branch = branch
87 self.threading = None
88 self.concurrency = concurrency
89
90 self.concur_id_func = None
91
92 try:
93 if concurrency == "greenlet":
94 import greenlet
95 self.concur_id_func = greenlet.getcurrent
96 elif concurrency == "eventlet":
97 import eventlet.greenthread # pylint: disable=import-error,useless-suppression
98 self.concur_id_func = eventlet.greenthread.getcurrent
99 elif concurrency == "gevent":
100 import gevent # pylint: disable=import-error,useless-suppression
101 self.concur_id_func = gevent.getcurrent
102 elif concurrency == "thread" or not concurrency:
103 # It's important to import threading only if we need it. If
104 # it's imported early, and the program being measured uses
105 # gevent, then gevent's monkey-patching won't work properly.
106 import threading
107 self.threading = threading
108 else:
109 raise CoverageException("Don't understand concurrency=%s" % concurrency)
110 except ImportError:
111 raise CoverageException(
112 "Couldn't trace with concurrency=%s, the module isn't installed." % concurrency
113 )
114
196 self.reset() 115 self.reset()
197 116
198 if timid: 117 if timid:
199 # Being timid: use the simple Python trace function. 118 # Being timid: use the simple Python trace function.
200 self._trace_class = PyTracer 119 self._trace_class = PyTracer
201 else: 120 else:
202 # Being fast: use the C Tracer if it is available, else the Python 121 # Being fast: use the C Tracer if it is available, else the Python
203 # trace function. 122 # trace function.
204 self._trace_class = CTracer or PyTracer 123 self._trace_class = CTracer or PyTracer
205 124
125 if self._trace_class is CTracer:
126 self.file_disposition_class = CFileDisposition
127 self.supports_plugins = True
128 else:
129 self.file_disposition_class = FileDisposition
130 self.supports_plugins = False
131
206 def __repr__(self): 132 def __repr__(self):
207 return "<Collector at 0x%x>" % id(self) 133 return "<Collector at 0x%x: %s>" % (id(self), self.tracer_name())
208 134
209 def tracer_name(self): 135 def tracer_name(self):
210 """Return the class name of the tracer we're using.""" 136 """Return the class name of the tracer we're using."""
211 return self._trace_class.__name__ 137 return self._trace_class.__name__
212 138
213 def reset(self): 139 def reset(self):
214 """Clear collected data, and prepare to collect more.""" 140 """Clear collected data, and prepare to collect more."""
215 # A dictionary mapping filenames to dicts with linenumber keys, 141 # A dictionary mapping file names to dicts with line number keys (if not
216 # or mapping filenames to dicts with linenumber pairs as keys. 142 # branch coverage), or mapping file names to dicts with line number
143 # pairs as keys (if branch coverage).
217 self.data = {} 144 self.data = {}
218 145
219 # A cache of the results from should_trace, the decision about whether 146 # A dictionary mapping file names to file tracer plugin names that will
220 # to trace execution in a file. A dict of filename to (filename or 147 # handle them.
221 # None). 148 self.file_tracers = {}
222 self.should_trace_cache = {} 149
150 # The .should_trace_cache attribute is a cache from file names to
151 # coverage.FileDisposition objects, or None. When a file is first
152 # considered for tracing, a FileDisposition is obtained from
153 # Coverage.should_trace. Its .trace attribute indicates whether the
154 # file should be traced or not. If it should be, a plugin with dynamic
155 # file names can decide not to trace it based on the dynamic file name
156 # being excluded by the inclusion rules, in which case the
157 # FileDisposition will be replaced by None in the cache.
158 if env.PYPY:
159 import __pypy__ # pylint: disable=import-error
160 # Alex Gaynor said:
161 # should_trace_cache is a strictly growing key: once a key is in
162 # it, it never changes. Further, the keys used to access it are
163 # generally constant, given sufficient context. That is to say, at
164 # any given point _trace() is called, pypy is able to know the key.
165 # This is because the key is determined by the physical source code
166 # line, and that's invariant with the call site.
167 #
168 # This property of a dict with immutable keys, combined with
169 # call-site-constant keys is a match for PyPy's module dict,
170 # which is optimized for such workloads.
171 #
172 # This gives a 20% benefit on the workload described at
173 # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage
174 self.should_trace_cache = __pypy__.newdict("module")
175 else:
176 self.should_trace_cache = {}
223 177
224 # Our active Tracers. 178 # Our active Tracers.
225 self.tracers = [] 179 self.tracers = []
226 180
227 def _start_tracer(self): 181 def _start_tracer(self):
228 """Start a new Tracer object, and store it in self.tracers.""" 182 """Start a new Tracer object, and store it in self.tracers."""
229 tracer = self._trace_class() 183 tracer = self._trace_class()
230 tracer.data = self.data 184 tracer.data = self.data
231 tracer.arcs = self.branch 185 tracer.trace_arcs = self.branch
232 tracer.should_trace = self.should_trace 186 tracer.should_trace = self.should_trace
233 tracer.should_trace_cache = self.should_trace_cache 187 tracer.should_trace_cache = self.should_trace_cache
234 tracer.warn = self.warn 188 tracer.warn = self.warn
189
190 if hasattr(tracer, 'concur_id_func'):
191 tracer.concur_id_func = self.concur_id_func
192 elif self.concur_id_func:
193 raise CoverageException(
194 "Can't support concurrency=%s with %s, only threads are supported" % (
195 self.concurrency, self.tracer_name(),
196 )
197 )
198
199 if hasattr(tracer, 'file_tracers'):
200 tracer.file_tracers = self.file_tracers
201 if hasattr(tracer, 'threading'):
202 tracer.threading = self.threading
203 if hasattr(tracer, 'check_include'):
204 tracer.check_include = self.check_include
205
235 fn = tracer.start() 206 fn = tracer.start()
236 self.tracers.append(tracer) 207 self.tracers.append(tracer)
208
237 return fn 209 return fn
238 210
239 # The trace function has to be set individually on each thread before 211 # The trace function has to be set individually on each thread before
240 # execution begins. Ironically, the only support the threading module has 212 # execution begins. Ironically, the only support the threading module has
241 # for running code before the thread main is the tracing function. So we 213 # for running code before the thread main is the tracing function. So we
242 # install this as a trace function, and the first time it's called, it does 214 # install this as a trace function, and the first time it's called, it does
243 # the real trace installation. 215 # the real trace installation.
244 216
245 def _installation_trace(self, frame_unused, event_unused, arg_unused): 217 def _installation_trace(self, frame, event, arg):
246 """Called on new threads, installs the real tracer.""" 218 """Called on new threads, installs the real tracer."""
247 # Remove ourselves as the trace function 219 # Remove ourselves as the trace function.
248 sys.settrace(None) 220 sys.settrace(None)
249 # Install the real tracer. 221 # Install the real tracer.
250 fn = self._start_tracer() 222 fn = self._start_tracer()
251 # Invoke the real trace function with the current event, to be sure 223 # Invoke the real trace function with the current event, to be sure
252 # not to lose an event. 224 # not to lose an event.
253 if fn: 225 if fn:
254 fn = fn(frame_unused, event_unused, arg_unused) 226 fn = fn(frame, event, arg)
255 # Return the new trace function to continue tracing in this scope. 227 # Return the new trace function to continue tracing in this scope.
256 return fn 228 return fn
257 229
258 def start(self): 230 def start(self):
259 """Start collecting trace information.""" 231 """Start collecting trace information."""
260 if self._collectors: 232 if self._collectors:
261 self._collectors[-1].pause() 233 self._collectors[-1].pause()
234
235 # Check to see whether we had a fullcoverage tracer installed. If so,
236 # get the stack frames it stashed away for us.
237 traces0 = []
238 fn0 = sys.gettrace()
239 if fn0:
240 tracer0 = getattr(fn0, '__self__', None)
241 if tracer0:
242 traces0 = getattr(tracer0, 'traces', [])
243
244 try:
245 # Install the tracer on this thread.
246 fn = self._start_tracer()
247 except:
248 if self._collectors:
249 self._collectors[-1].resume()
250 raise
251
252 # If _start_tracer succeeded, then we add ourselves to the global
253 # stack of collectors.
262 self._collectors.append(self) 254 self._collectors.append(self)
263 #print("Started: %r" % self._collectors, file=sys.stderr) 255
264 256 # Replay all the events from fullcoverage into the new trace function.
265 # Check to see whether we had a fullcoverage tracer installed.
266 traces0 = []
267 if hasattr(sys, "gettrace"):
268 fn0 = sys.gettrace()
269 if fn0:
270 tracer0 = getattr(fn0, '__self__', None)
271 if tracer0:
272 traces0 = getattr(tracer0, 'traces', [])
273
274 # Install the tracer on this thread.
275 fn = self._start_tracer()
276
277 for args in traces0: 257 for args in traces0:
278 (frame, event, arg), lineno = args 258 (frame, event, arg), lineno = args
279 try: 259 try:
280 fn(frame, event, arg, lineno=lineno) 260 fn(frame, event, arg, lineno=lineno)
281 except TypeError: 261 except TypeError:
282 raise Exception( 262 raise Exception("fullcoverage must be run with the C trace function.")
283 "fullcoverage must be run with the C trace function."
284 )
285 263
286 # Install our installation tracer in threading, to jump start other 264 # Install our installation tracer in threading, to jump start other
287 # threads. 265 # threads.
288 threading.settrace(self._installation_trace) 266 if self.threading:
267 self.threading.settrace(self._installation_trace)
289 268
290 def stop(self): 269 def stop(self):
291 """Stop collecting trace information.""" 270 """Stop collecting trace information."""
292 #print("Stopping: %r" % self._collectors, file=sys.stderr)
293 if not self._collectors:
294 return
295 assert self._collectors 271 assert self._collectors
296 assert self._collectors[-1] is self 272 assert self._collectors[-1] is self, (
273 "Expected current collector to be %r, but it's %r" % (self, self._collectors[-1])
274 )
297 275
298 self.pause() 276 self.pause()
299 self.tracers = [] 277 self.tracers = []
300 278
301 # Remove this Collector from the stack, and resume the one underneath 279 # Remove this Collector from the stack, and resume the one underneath
311 stats = tracer.get_stats() 289 stats = tracer.get_stats()
312 if stats: 290 if stats:
313 print("\nCoverage.py tracer stats:") 291 print("\nCoverage.py tracer stats:")
314 for k in sorted(stats.keys()): 292 for k in sorted(stats.keys()):
315 print("%16s: %s" % (k, stats[k])) 293 print("%16s: %s" % (k, stats[k]))
316 threading.settrace(None) 294 if self.threading:
295 self.threading.settrace(None)
317 296
318 def resume(self): 297 def resume(self):
319 """Resume tracing after a `pause`.""" 298 """Resume tracing after a `pause`."""
320 for tracer in self.tracers: 299 for tracer in self.tracers:
321 tracer.start() 300 tracer.start()
322 threading.settrace(self._installation_trace) 301 if self.threading:
323 302 self.threading.settrace(self._installation_trace)
324 def get_line_data(self): 303 else:
325 """Return the line data collected. 304 self._start_tracer()
326 305
327 Data is { filename: { lineno: None, ...}, ...} 306 def save_data(self, covdata):
307 """Save the collected data to a `CoverageData`.
308
309 Also resets the collector.
328 310
329 """ 311 """
312 def abs_file_dict(d):
313 """Return a dict like d, but with keys modified by `abs_file`."""
314 return dict((abs_file(k), v) for k, v in iitems(d))
315
330 if self.branch: 316 if self.branch:
331 # If we were measuring branches, then we have to re-build the dict 317 covdata.add_arcs(abs_file_dict(self.data))
332 # to show line data. 318 else:
333 line_data = {} 319 covdata.add_lines(abs_file_dict(self.data))
334 for f, arcs in self.data.items(): 320 covdata.add_file_tracers(abs_file_dict(self.file_tracers))
335 line_data[f] = ldf = {} 321
336 for l1, _ in list(arcs.keys()): 322 self.reset()
337 if l1:
338 ldf[l1] = None
339 return line_data
340 else:
341 return self.data
342
343 def get_arc_data(self):
344 """Return the arc data collected.
345
346 Data is { filename: { (l1, l2): None, ...}, ...}
347
348 Note that no data is collected or returned if the Collector wasn't
349 created with `branch` true.
350
351 """
352 if self.branch:
353 return self.data
354 else:
355 return {}
356
357 #
358 # eflag: FileType = Python2

eric ide

mercurial