|
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
|
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt |
|
3 |
|
4 """Raw data collector for coverage.py.""" |
|
5 |
|
6 import os |
|
7 import sys |
|
8 |
|
9 from coverage import env |
|
10 from coverage.config import CoverageConfig |
|
11 from coverage.debug import short_stack |
|
12 from coverage.disposition import FileDisposition |
|
13 from coverage.exceptions import ConfigError |
|
14 from coverage.misc import human_sorted, isolate_module |
|
15 from coverage.pytracer import PyTracer |
|
16 |
|
17 os = isolate_module(os) |
|
18 |
|
19 |
|
20 try: |
|
21 # Use the C extension code when we can, for speed. |
|
22 from coverage.tracer import CTracer, CFileDisposition |
|
23 except ImportError: |
|
24 # Couldn't import the C extension, maybe it isn't built. |
|
25 if os.getenv('COVERAGE_TEST_TRACER') == 'c': # pragma: part covered |
|
26 # During testing, we use the COVERAGE_TEST_TRACER environment variable |
|
27 # to indicate that we've fiddled with the environment to test this |
|
28 # fallback code. If we thought we had a C tracer, but couldn't import |
|
29 # it, then exit quickly and clearly instead of dribbling confusing |
|
30 # errors. I'm using sys.exit here instead of an exception because an |
|
31 # exception here causes all sorts of other noise in unittest. |
|
32 sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n") |
|
33 sys.exit(1) |
|
34 CTracer = None |
|
35 |
|
36 |
|
37 class Collector: |
|
38 """Collects trace data. |
|
39 |
|
40 Creates a Tracer object for each thread, since they track stack |
|
41 information. Each Tracer points to the same shared data, contributing |
|
42 traced data points. |
|
43 |
|
44 When the Collector is started, it creates a Tracer for the current thread, |
|
45 and installs a function to create Tracers for each new thread started. |
|
46 When the Collector is stopped, all active Tracers are stopped. |
|
47 |
|
48 Threads started while the Collector is stopped will never have Tracers |
|
49 associated with them. |
|
50 |
|
51 """ |
|
52 |
|
53 # The stack of active Collectors. Collectors are added here when started, |
|
54 # and popped when stopped. Collectors on the stack are paused when not |
|
55 # the top, and resumed when they become the top again. |
|
56 _collectors = [] |
|
57 |
|
58 # The concurrency settings we support here. |
|
59 LIGHT_THREADS = {"greenlet", "eventlet", "gevent"} |
|
60 |
|
61 def __init__( |
|
62 self, should_trace, check_include, should_start_context, file_mapper, |
|
63 timid, branch, warn, concurrency, |
|
64 ): |
|
65 """Create a collector. |
|
66 |
|
67 `should_trace` is a function, taking a file name and a frame, and |
|
68 returning a `coverage.FileDisposition object`. |
|
69 |
|
70 `check_include` is a function taking a file name and a frame. It returns |
|
71 a boolean: True if the file should be traced, False if not. |
|
72 |
|
73 `should_start_context` is a function taking a frame, and returning a |
|
74 string. If the frame should be the start of a new context, the string |
|
75 is the new context. If the frame should not be the start of a new |
|
76 context, return None. |
|
77 |
|
78 `file_mapper` is a function taking a filename, and returning a Unicode |
|
79 filename. The result is the name that will be recorded in the data |
|
80 file. |
|
81 |
|
82 If `timid` is true, then a slower simpler trace function will be |
|
83 used. This is important for some environments where manipulation of |
|
84 tracing functions make the faster more sophisticated trace function not |
|
85 operate properly. |
|
86 |
|
87 If `branch` is true, then branches will be measured. This involves |
|
88 collecting data on which statements followed each other (arcs). Use |
|
89 `get_arc_data` to get the arc data. |
|
90 |
|
91 `warn` is a warning function, taking a single string message argument |
|
92 and an optional slug argument which will be a string or None, to be |
|
93 used if a warning needs to be issued. |
|
94 |
|
95 `concurrency` is a list of strings indicating the concurrency libraries |
|
96 in use. Valid values are "greenlet", "eventlet", "gevent", or "thread" |
|
97 (the default). "thread" can be combined with one of the other three. |
|
98 Other values are ignored. |
|
99 |
|
100 """ |
|
101 self.should_trace = should_trace |
|
102 self.check_include = check_include |
|
103 self.should_start_context = should_start_context |
|
104 self.file_mapper = file_mapper |
|
105 self.branch = branch |
|
106 self.warn = warn |
|
107 self.concurrency = concurrency |
|
108 assert isinstance(self.concurrency, list), f"Expected a list: {self.concurrency!r}" |
|
109 |
|
110 self.threading = None |
|
111 self.covdata = None |
|
112 self.static_context = None |
|
113 |
|
114 self.origin = short_stack() |
|
115 |
|
116 self.concur_id_func = None |
|
117 self.mapped_file_cache = {} |
|
118 |
|
119 if timid: |
|
120 # Being timid: use the simple Python trace function. |
|
121 self._trace_class = PyTracer |
|
122 else: |
|
123 # Being fast: use the C Tracer if it is available, else the Python |
|
124 # trace function. |
|
125 self._trace_class = CTracer or PyTracer |
|
126 |
|
127 if self._trace_class is CTracer: |
|
128 self.file_disposition_class = CFileDisposition |
|
129 self.supports_plugins = True |
|
130 self.packed_arcs = True |
|
131 else: |
|
132 self.file_disposition_class = FileDisposition |
|
133 self.supports_plugins = False |
|
134 self.packed_arcs = False |
|
135 |
|
136 # We can handle a few concurrency options here, but only one at a time. |
|
137 concurrencies = set(self.concurrency) |
|
138 unknown = concurrencies - CoverageConfig.CONCURRENCY_CHOICES |
|
139 if unknown: |
|
140 show = ", ".join(sorted(unknown)) |
|
141 raise ConfigError(f"Unknown concurrency choices: {show}") |
|
142 light_threads = concurrencies & self.LIGHT_THREADS |
|
143 if len(light_threads) > 1: |
|
144 show = ", ".join(sorted(light_threads)) |
|
145 raise ConfigError(f"Conflicting concurrency settings: {show}") |
|
146 do_threading = False |
|
147 |
|
148 tried = "nothing" # to satisfy pylint |
|
149 try: |
|
150 if "greenlet" in concurrencies: |
|
151 tried = "greenlet" |
|
152 import greenlet |
|
153 self.concur_id_func = greenlet.getcurrent |
|
154 elif "eventlet" in concurrencies: |
|
155 tried = "eventlet" |
|
156 import eventlet.greenthread # pylint: disable=import-error,useless-suppression |
|
157 self.concur_id_func = eventlet.greenthread.getcurrent |
|
158 elif "gevent" in concurrencies: |
|
159 tried = "gevent" |
|
160 import gevent # pylint: disable=import-error,useless-suppression |
|
161 self.concur_id_func = gevent.getcurrent |
|
162 |
|
163 if "thread" in concurrencies: |
|
164 do_threading = True |
|
165 except ImportError as ex: |
|
166 msg = f"Couldn't trace with concurrency={tried}, the module isn't installed." |
|
167 raise ConfigError(msg) from ex |
|
168 |
|
169 if self.concur_id_func and not hasattr(self._trace_class, "concur_id_func"): |
|
170 raise ConfigError( |
|
171 "Can't support concurrency={} with {}, only threads are supported.".format( |
|
172 tried, self.tracer_name(), |
|
173 ) |
|
174 ) |
|
175 |
|
176 if do_threading or not concurrencies: |
|
177 # It's important to import threading only if we need it. If |
|
178 # it's imported early, and the program being measured uses |
|
179 # gevent, then gevent's monkey-patching won't work properly. |
|
180 import threading |
|
181 self.threading = threading |
|
182 |
|
183 self.reset() |
|
184 |
|
185 def __repr__(self): |
|
186 return f"<Collector at 0x{id(self):x}: {self.tracer_name()}>" |
|
187 |
|
188 def use_data(self, covdata, context): |
|
189 """Use `covdata` for recording data.""" |
|
190 self.covdata = covdata |
|
191 self.static_context = context |
|
192 self.covdata.set_context(self.static_context) |
|
193 |
|
194 def tracer_name(self): |
|
195 """Return the class name of the tracer we're using.""" |
|
196 return self._trace_class.__name__ |
|
197 |
|
198 def _clear_data(self): |
|
199 """Clear out existing data, but stay ready for more collection.""" |
|
200 # We used to used self.data.clear(), but that would remove filename |
|
201 # keys and data values that were still in use higher up the stack |
|
202 # when we are called as part of switch_context. |
|
203 for d in self.data.values(): |
|
204 d.clear() |
|
205 |
|
206 for tracer in self.tracers: |
|
207 tracer.reset_activity() |
|
208 |
|
209 def reset(self): |
|
210 """Clear collected data, and prepare to collect more.""" |
|
211 # A dictionary mapping file names to dicts with line number keys (if not |
|
212 # branch coverage), or mapping file names to dicts with line number |
|
213 # pairs as keys (if branch coverage). |
|
214 self.data = {} |
|
215 |
|
216 # A dictionary mapping file names to file tracer plugin names that will |
|
217 # handle them. |
|
218 self.file_tracers = {} |
|
219 |
|
220 self.disabled_plugins = set() |
|
221 |
|
222 # The .should_trace_cache attribute is a cache from file names to |
|
223 # coverage.FileDisposition objects, or None. When a file is first |
|
224 # considered for tracing, a FileDisposition is obtained from |
|
225 # Coverage.should_trace. Its .trace attribute indicates whether the |
|
226 # file should be traced or not. If it should be, a plugin with dynamic |
|
227 # file names can decide not to trace it based on the dynamic file name |
|
228 # being excluded by the inclusion rules, in which case the |
|
229 # FileDisposition will be replaced by None in the cache. |
|
230 if env.PYPY: |
|
231 import __pypy__ # pylint: disable=import-error |
|
232 # Alex Gaynor said: |
|
233 # should_trace_cache is a strictly growing key: once a key is in |
|
234 # it, it never changes. Further, the keys used to access it are |
|
235 # generally constant, given sufficient context. That is to say, at |
|
236 # any given point _trace() is called, pypy is able to know the key. |
|
237 # This is because the key is determined by the physical source code |
|
238 # line, and that's invariant with the call site. |
|
239 # |
|
240 # This property of a dict with immutable keys, combined with |
|
241 # call-site-constant keys is a match for PyPy's module dict, |
|
242 # which is optimized for such workloads. |
|
243 # |
|
244 # This gives a 20% benefit on the workload described at |
|
245 # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage |
|
246 self.should_trace_cache = __pypy__.newdict("module") |
|
247 else: |
|
248 self.should_trace_cache = {} |
|
249 |
|
250 # Our active Tracers. |
|
251 self.tracers = [] |
|
252 |
|
253 self._clear_data() |
|
254 |
|
255 def _start_tracer(self): |
|
256 """Start a new Tracer object, and store it in self.tracers.""" |
|
257 tracer = self._trace_class() |
|
258 tracer.data = self.data |
|
259 tracer.trace_arcs = self.branch |
|
260 tracer.should_trace = self.should_trace |
|
261 tracer.should_trace_cache = self.should_trace_cache |
|
262 tracer.warn = self.warn |
|
263 |
|
264 if hasattr(tracer, 'concur_id_func'): |
|
265 tracer.concur_id_func = self.concur_id_func |
|
266 if hasattr(tracer, 'file_tracers'): |
|
267 tracer.file_tracers = self.file_tracers |
|
268 if hasattr(tracer, 'threading'): |
|
269 tracer.threading = self.threading |
|
270 if hasattr(tracer, 'check_include'): |
|
271 tracer.check_include = self.check_include |
|
272 if hasattr(tracer, 'should_start_context'): |
|
273 tracer.should_start_context = self.should_start_context |
|
274 tracer.switch_context = self.switch_context |
|
275 if hasattr(tracer, 'disable_plugin'): |
|
276 tracer.disable_plugin = self.disable_plugin |
|
277 |
|
278 fn = tracer.start() |
|
279 self.tracers.append(tracer) |
|
280 |
|
281 return fn |
|
282 |
|
283 # The trace function has to be set individually on each thread before |
|
284 # execution begins. Ironically, the only support the threading module has |
|
285 # for running code before the thread main is the tracing function. So we |
|
286 # install this as a trace function, and the first time it's called, it does |
|
287 # the real trace installation. |
|
288 |
|
289 def _installation_trace(self, frame, event, arg): |
|
290 """Called on new threads, installs the real tracer.""" |
|
291 # Remove ourselves as the trace function. |
|
292 sys.settrace(None) |
|
293 # Install the real tracer. |
|
294 fn = self._start_tracer() |
|
295 # Invoke the real trace function with the current event, to be sure |
|
296 # not to lose an event. |
|
297 if fn: |
|
298 fn = fn(frame, event, arg) |
|
299 # Return the new trace function to continue tracing in this scope. |
|
300 return fn |
|
301 |
|
302 def start(self): |
|
303 """Start collecting trace information.""" |
|
304 if self._collectors: |
|
305 self._collectors[-1].pause() |
|
306 |
|
307 self.tracers = [] |
|
308 |
|
309 # Check to see whether we had a fullcoverage tracer installed. If so, |
|
310 # get the stack frames it stashed away for us. |
|
311 traces0 = [] |
|
312 fn0 = sys.gettrace() |
|
313 if fn0: |
|
314 tracer0 = getattr(fn0, '__self__', None) |
|
315 if tracer0: |
|
316 traces0 = getattr(tracer0, 'traces', []) |
|
317 |
|
318 try: |
|
319 # Install the tracer on this thread. |
|
320 fn = self._start_tracer() |
|
321 except: |
|
322 if self._collectors: |
|
323 self._collectors[-1].resume() |
|
324 raise |
|
325 |
|
326 # If _start_tracer succeeded, then we add ourselves to the global |
|
327 # stack of collectors. |
|
328 self._collectors.append(self) |
|
329 |
|
330 # Replay all the events from fullcoverage into the new trace function. |
|
331 for (frame, event, arg), lineno in traces0: |
|
332 try: |
|
333 fn(frame, event, arg, lineno=lineno) |
|
334 except TypeError as ex: |
|
335 raise Exception("fullcoverage must be run with the C trace function.") from ex |
|
336 |
|
337 # Install our installation tracer in threading, to jump-start other |
|
338 # threads. |
|
339 if self.threading: |
|
340 self.threading.settrace(self._installation_trace) |
|
341 |
|
342 def stop(self): |
|
343 """Stop collecting trace information.""" |
|
344 assert self._collectors |
|
345 if self._collectors[-1] is not self: |
|
346 print("self._collectors:") |
|
347 for c in self._collectors: |
|
348 print(f" {c!r}\n{c.origin}") |
|
349 assert self._collectors[-1] is self, ( |
|
350 f"Expected current collector to be {self!r}, but it's {self._collectors[-1]!r}" |
|
351 ) |
|
352 |
|
353 self.pause() |
|
354 |
|
355 # Remove this Collector from the stack, and resume the one underneath |
|
356 # (if any). |
|
357 self._collectors.pop() |
|
358 if self._collectors: |
|
359 self._collectors[-1].resume() |
|
360 |
|
361 def pause(self): |
|
362 """Pause tracing, but be prepared to `resume`.""" |
|
363 for tracer in self.tracers: |
|
364 tracer.stop() |
|
365 stats = tracer.get_stats() |
|
366 if stats: |
|
367 print("\nCoverage.py tracer stats:") |
|
368 for k in human_sorted(stats.keys()): |
|
369 print(f"{k:>20}: {stats[k]}") |
|
370 if self.threading: |
|
371 self.threading.settrace(None) |
|
372 |
|
373 def resume(self): |
|
374 """Resume tracing after a `pause`.""" |
|
375 for tracer in self.tracers: |
|
376 tracer.start() |
|
377 if self.threading: |
|
378 self.threading.settrace(self._installation_trace) |
|
379 else: |
|
380 self._start_tracer() |
|
381 |
|
382 def _activity(self): |
|
383 """Has any activity been traced? |
|
384 |
|
385 Returns a boolean, True if any trace function was invoked. |
|
386 |
|
387 """ |
|
388 return any(tracer.activity() for tracer in self.tracers) |
|
389 |
|
390 def switch_context(self, new_context): |
|
391 """Switch to a new dynamic context.""" |
|
392 self.flush_data() |
|
393 if self.static_context: |
|
394 context = self.static_context |
|
395 if new_context: |
|
396 context += "|" + new_context |
|
397 else: |
|
398 context = new_context |
|
399 self.covdata.set_context(context) |
|
400 |
|
401 def disable_plugin(self, disposition): |
|
402 """Disable the plugin mentioned in `disposition`.""" |
|
403 file_tracer = disposition.file_tracer |
|
404 plugin = file_tracer._coverage_plugin |
|
405 plugin_name = plugin._coverage_plugin_name |
|
406 self.warn(f"Disabling plug-in {plugin_name!r} due to previous exception") |
|
407 plugin._coverage_enabled = False |
|
408 disposition.trace = False |
|
409 |
|
410 def cached_mapped_file(self, filename): |
|
411 """A locally cached version of file names mapped through file_mapper.""" |
|
412 key = (type(filename), filename) |
|
413 try: |
|
414 return self.mapped_file_cache[key] |
|
415 except KeyError: |
|
416 return self.mapped_file_cache.setdefault(key, self.file_mapper(filename)) |
|
417 |
|
418 def mapped_file_dict(self, d): |
|
419 """Return a dict like d, but with keys modified by file_mapper.""" |
|
420 # The call to list(items()) ensures that the GIL protects the dictionary |
|
421 # iterator against concurrent modifications by tracers running |
|
422 # in other threads. We try three times in case of concurrent |
|
423 # access, hoping to get a clean copy. |
|
424 runtime_err = None |
|
425 for _ in range(3): # pragma: part covered |
|
426 try: |
|
427 items = list(d.items()) |
|
428 except RuntimeError as ex: # pragma: cant happen |
|
429 runtime_err = ex |
|
430 else: |
|
431 break |
|
432 else: |
|
433 raise runtime_err # pragma: cant happen |
|
434 |
|
435 return {self.cached_mapped_file(k): v for k, v in items if v} |
|
436 |
|
437 def plugin_was_disabled(self, plugin): |
|
438 """Record that `plugin` was disabled during the run.""" |
|
439 self.disabled_plugins.add(plugin._coverage_plugin_name) |
|
440 |
|
441 def flush_data(self): |
|
442 """Save the collected data to our associated `CoverageData`. |
|
443 |
|
444 Data may have also been saved along the way. This forces the |
|
445 last of the data to be saved. |
|
446 |
|
447 Returns True if there was data to save, False if not. |
|
448 """ |
|
449 if not self._activity(): |
|
450 return False |
|
451 |
|
452 if self.branch: |
|
453 if self.packed_arcs: |
|
454 # Unpack the line number pairs packed into integers. See |
|
455 # tracer.c:CTracer_record_pair for the C code that creates |
|
456 # these packed ints. |
|
457 data = {} |
|
458 for fname, packeds in self.data.items(): |
|
459 tuples = [] |
|
460 for packed in packeds: |
|
461 l1 = packed & 0xFFFFF |
|
462 l2 = (packed & (0xFFFFF << 20)) >> 20 |
|
463 if packed & (1 << 40): |
|
464 l1 *= -1 |
|
465 if packed & (1 << 41): |
|
466 l2 *= -1 |
|
467 tuples.append((l1, l2)) |
|
468 data[fname] = tuples |
|
469 else: |
|
470 data = self.data |
|
471 self.covdata.add_arcs(self.mapped_file_dict(data)) |
|
472 else: |
|
473 self.covdata.add_lines(self.mapped_file_dict(self.data)) |
|
474 |
|
475 file_tracers = { |
|
476 k: v for k, v in self.file_tracers.items() |
|
477 if v not in self.disabled_plugins |
|
478 } |
|
479 self.covdata.add_file_tracers(self.mapped_file_dict(file_tracers)) |
|
480 |
|
481 self._clear_data() |
|
482 return True |