src/eric7/DebugClients/Python/coverage/collector.py

branch
eric7
changeset 9209
b99e7fd55fd3
parent 9099
0e511e0e94a3
child 9374
ed79209469ad
equal deleted inserted replaced
9208:3fc8dfeb6ebe 9209:b99e7fd55fd3
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4 """Raw data collector for coverage.py."""
5
6 import os
7 import sys
8
9 from coverage import env
10 from coverage.config import CoverageConfig
11 from coverage.debug import short_stack
12 from coverage.disposition import FileDisposition
13 from coverage.exceptions import ConfigError
14 from coverage.misc import human_sorted, isolate_module
15 from coverage.pytracer import PyTracer
16
17 os = isolate_module(os)
18
19
20 try:
21 # Use the C extension code when we can, for speed.
22 from coverage.tracer import CTracer, CFileDisposition
23 except ImportError:
24 # Couldn't import the C extension, maybe it isn't built.
25 if os.getenv('COVERAGE_TEST_TRACER') == 'c': # pragma: part covered
26 # During testing, we use the COVERAGE_TEST_TRACER environment variable
27 # to indicate that we've fiddled with the environment to test this
28 # fallback code. If we thought we had a C tracer, but couldn't import
29 # it, then exit quickly and clearly instead of dribbling confusing
30 # errors. I'm using sys.exit here instead of an exception because an
31 # exception here causes all sorts of other noise in unittest.
32 sys.stderr.write("*** COVERAGE_TEST_TRACER is 'c' but can't import CTracer!\n")
33 sys.exit(1)
34 CTracer = None
35
36
37 class Collector:
38 """Collects trace data.
39
40 Creates a Tracer object for each thread, since they track stack
41 information. Each Tracer points to the same shared data, contributing
42 traced data points.
43
44 When the Collector is started, it creates a Tracer for the current thread,
45 and installs a function to create Tracers for each new thread started.
46 When the Collector is stopped, all active Tracers are stopped.
47
48 Threads started while the Collector is stopped will never have Tracers
49 associated with them.
50
51 """
52
53 # The stack of active Collectors. Collectors are added here when started,
54 # and popped when stopped. Collectors on the stack are paused when not
55 # the top, and resumed when they become the top again.
56 _collectors = []
57
58 # The concurrency settings we support here.
59 LIGHT_THREADS = {"greenlet", "eventlet", "gevent"}
60
61 def __init__(
62 self, should_trace, check_include, should_start_context, file_mapper,
63 timid, branch, warn, concurrency,
64 ):
65 """Create a collector.
66
67 `should_trace` is a function, taking a file name and a frame, and
68 returning a `coverage.FileDisposition object`.
69
70 `check_include` is a function taking a file name and a frame. It returns
71 a boolean: True if the file should be traced, False if not.
72
73 `should_start_context` is a function taking a frame, and returning a
74 string. If the frame should be the start of a new context, the string
75 is the new context. If the frame should not be the start of a new
76 context, return None.
77
78 `file_mapper` is a function taking a filename, and returning a Unicode
79 filename. The result is the name that will be recorded in the data
80 file.
81
82 If `timid` is true, then a slower simpler trace function will be
83 used. This is important for some environments where manipulation of
84 tracing functions make the faster more sophisticated trace function not
85 operate properly.
86
87 If `branch` is true, then branches will be measured. This involves
88 collecting data on which statements followed each other (arcs). Use
89 `get_arc_data` to get the arc data.
90
91 `warn` is a warning function, taking a single string message argument
92 and an optional slug argument which will be a string or None, to be
93 used if a warning needs to be issued.
94
95 `concurrency` is a list of strings indicating the concurrency libraries
96 in use. Valid values are "greenlet", "eventlet", "gevent", or "thread"
97 (the default). "thread" can be combined with one of the other three.
98 Other values are ignored.
99
100 """
101 self.should_trace = should_trace
102 self.check_include = check_include
103 self.should_start_context = should_start_context
104 self.file_mapper = file_mapper
105 self.branch = branch
106 self.warn = warn
107 self.concurrency = concurrency
108 assert isinstance(self.concurrency, list), f"Expected a list: {self.concurrency!r}"
109
110 self.threading = None
111 self.covdata = None
112 self.static_context = None
113
114 self.origin = short_stack()
115
116 self.concur_id_func = None
117 self.mapped_file_cache = {}
118
119 if timid:
120 # Being timid: use the simple Python trace function.
121 self._trace_class = PyTracer
122 else:
123 # Being fast: use the C Tracer if it is available, else the Python
124 # trace function.
125 self._trace_class = CTracer or PyTracer
126
127 if self._trace_class is CTracer:
128 self.file_disposition_class = CFileDisposition
129 self.supports_plugins = True
130 self.packed_arcs = True
131 else:
132 self.file_disposition_class = FileDisposition
133 self.supports_plugins = False
134 self.packed_arcs = False
135
136 # We can handle a few concurrency options here, but only one at a time.
137 concurrencies = set(self.concurrency)
138 unknown = concurrencies - CoverageConfig.CONCURRENCY_CHOICES
139 if unknown:
140 show = ", ".join(sorted(unknown))
141 raise ConfigError(f"Unknown concurrency choices: {show}")
142 light_threads = concurrencies & self.LIGHT_THREADS
143 if len(light_threads) > 1:
144 show = ", ".join(sorted(light_threads))
145 raise ConfigError(f"Conflicting concurrency settings: {show}")
146 do_threading = False
147
148 tried = "nothing" # to satisfy pylint
149 try:
150 if "greenlet" in concurrencies:
151 tried = "greenlet"
152 import greenlet
153 self.concur_id_func = greenlet.getcurrent
154 elif "eventlet" in concurrencies:
155 tried = "eventlet"
156 import eventlet.greenthread # pylint: disable=import-error,useless-suppression
157 self.concur_id_func = eventlet.greenthread.getcurrent
158 elif "gevent" in concurrencies:
159 tried = "gevent"
160 import gevent # pylint: disable=import-error,useless-suppression
161 self.concur_id_func = gevent.getcurrent
162
163 if "thread" in concurrencies:
164 do_threading = True
165 except ImportError as ex:
166 msg = f"Couldn't trace with concurrency={tried}, the module isn't installed."
167 raise ConfigError(msg) from ex
168
169 if self.concur_id_func and not hasattr(self._trace_class, "concur_id_func"):
170 raise ConfigError(
171 "Can't support concurrency={} with {}, only threads are supported.".format(
172 tried, self.tracer_name(),
173 )
174 )
175
176 if do_threading or not concurrencies:
177 # It's important to import threading only if we need it. If
178 # it's imported early, and the program being measured uses
179 # gevent, then gevent's monkey-patching won't work properly.
180 import threading
181 self.threading = threading
182
183 self.reset()
184
185 def __repr__(self):
186 return f"<Collector at 0x{id(self):x}: {self.tracer_name()}>"
187
188 def use_data(self, covdata, context):
189 """Use `covdata` for recording data."""
190 self.covdata = covdata
191 self.static_context = context
192 self.covdata.set_context(self.static_context)
193
194 def tracer_name(self):
195 """Return the class name of the tracer we're using."""
196 return self._trace_class.__name__
197
198 def _clear_data(self):
199 """Clear out existing data, but stay ready for more collection."""
200 # We used to used self.data.clear(), but that would remove filename
201 # keys and data values that were still in use higher up the stack
202 # when we are called as part of switch_context.
203 for d in self.data.values():
204 d.clear()
205
206 for tracer in self.tracers:
207 tracer.reset_activity()
208
209 def reset(self):
210 """Clear collected data, and prepare to collect more."""
211 # A dictionary mapping file names to dicts with line number keys (if not
212 # branch coverage), or mapping file names to dicts with line number
213 # pairs as keys (if branch coverage).
214 self.data = {}
215
216 # A dictionary mapping file names to file tracer plugin names that will
217 # handle them.
218 self.file_tracers = {}
219
220 self.disabled_plugins = set()
221
222 # The .should_trace_cache attribute is a cache from file names to
223 # coverage.FileDisposition objects, or None. When a file is first
224 # considered for tracing, a FileDisposition is obtained from
225 # Coverage.should_trace. Its .trace attribute indicates whether the
226 # file should be traced or not. If it should be, a plugin with dynamic
227 # file names can decide not to trace it based on the dynamic file name
228 # being excluded by the inclusion rules, in which case the
229 # FileDisposition will be replaced by None in the cache.
230 if env.PYPY:
231 import __pypy__ # pylint: disable=import-error
232 # Alex Gaynor said:
233 # should_trace_cache is a strictly growing key: once a key is in
234 # it, it never changes. Further, the keys used to access it are
235 # generally constant, given sufficient context. That is to say, at
236 # any given point _trace() is called, pypy is able to know the key.
237 # This is because the key is determined by the physical source code
238 # line, and that's invariant with the call site.
239 #
240 # This property of a dict with immutable keys, combined with
241 # call-site-constant keys is a match for PyPy's module dict,
242 # which is optimized for such workloads.
243 #
244 # This gives a 20% benefit on the workload described at
245 # https://bitbucket.org/pypy/pypy/issue/1871/10x-slower-than-cpython-under-coverage
246 self.should_trace_cache = __pypy__.newdict("module")
247 else:
248 self.should_trace_cache = {}
249
250 # Our active Tracers.
251 self.tracers = []
252
253 self._clear_data()
254
255 def _start_tracer(self):
256 """Start a new Tracer object, and store it in self.tracers."""
257 tracer = self._trace_class()
258 tracer.data = self.data
259 tracer.trace_arcs = self.branch
260 tracer.should_trace = self.should_trace
261 tracer.should_trace_cache = self.should_trace_cache
262 tracer.warn = self.warn
263
264 if hasattr(tracer, 'concur_id_func'):
265 tracer.concur_id_func = self.concur_id_func
266 if hasattr(tracer, 'file_tracers'):
267 tracer.file_tracers = self.file_tracers
268 if hasattr(tracer, 'threading'):
269 tracer.threading = self.threading
270 if hasattr(tracer, 'check_include'):
271 tracer.check_include = self.check_include
272 if hasattr(tracer, 'should_start_context'):
273 tracer.should_start_context = self.should_start_context
274 tracer.switch_context = self.switch_context
275 if hasattr(tracer, 'disable_plugin'):
276 tracer.disable_plugin = self.disable_plugin
277
278 fn = tracer.start()
279 self.tracers.append(tracer)
280
281 return fn
282
283 # The trace function has to be set individually on each thread before
284 # execution begins. Ironically, the only support the threading module has
285 # for running code before the thread main is the tracing function. So we
286 # install this as a trace function, and the first time it's called, it does
287 # the real trace installation.
288
289 def _installation_trace(self, frame, event, arg):
290 """Called on new threads, installs the real tracer."""
291 # Remove ourselves as the trace function.
292 sys.settrace(None)
293 # Install the real tracer.
294 fn = self._start_tracer()
295 # Invoke the real trace function with the current event, to be sure
296 # not to lose an event.
297 if fn:
298 fn = fn(frame, event, arg)
299 # Return the new trace function to continue tracing in this scope.
300 return fn
301
302 def start(self):
303 """Start collecting trace information."""
304 if self._collectors:
305 self._collectors[-1].pause()
306
307 self.tracers = []
308
309 # Check to see whether we had a fullcoverage tracer installed. If so,
310 # get the stack frames it stashed away for us.
311 traces0 = []
312 fn0 = sys.gettrace()
313 if fn0:
314 tracer0 = getattr(fn0, '__self__', None)
315 if tracer0:
316 traces0 = getattr(tracer0, 'traces', [])
317
318 try:
319 # Install the tracer on this thread.
320 fn = self._start_tracer()
321 except:
322 if self._collectors:
323 self._collectors[-1].resume()
324 raise
325
326 # If _start_tracer succeeded, then we add ourselves to the global
327 # stack of collectors.
328 self._collectors.append(self)
329
330 # Replay all the events from fullcoverage into the new trace function.
331 for (frame, event, arg), lineno in traces0:
332 try:
333 fn(frame, event, arg, lineno=lineno)
334 except TypeError as ex:
335 raise Exception("fullcoverage must be run with the C trace function.") from ex
336
337 # Install our installation tracer in threading, to jump-start other
338 # threads.
339 if self.threading:
340 self.threading.settrace(self._installation_trace)
341
342 def stop(self):
343 """Stop collecting trace information."""
344 assert self._collectors
345 if self._collectors[-1] is not self:
346 print("self._collectors:")
347 for c in self._collectors:
348 print(f" {c!r}\n{c.origin}")
349 assert self._collectors[-1] is self, (
350 f"Expected current collector to be {self!r}, but it's {self._collectors[-1]!r}"
351 )
352
353 self.pause()
354
355 # Remove this Collector from the stack, and resume the one underneath
356 # (if any).
357 self._collectors.pop()
358 if self._collectors:
359 self._collectors[-1].resume()
360
361 def pause(self):
362 """Pause tracing, but be prepared to `resume`."""
363 for tracer in self.tracers:
364 tracer.stop()
365 stats = tracer.get_stats()
366 if stats:
367 print("\nCoverage.py tracer stats:")
368 for k in human_sorted(stats.keys()):
369 print(f"{k:>20}: {stats[k]}")
370 if self.threading:
371 self.threading.settrace(None)
372
373 def resume(self):
374 """Resume tracing after a `pause`."""
375 for tracer in self.tracers:
376 tracer.start()
377 if self.threading:
378 self.threading.settrace(self._installation_trace)
379 else:
380 self._start_tracer()
381
382 def _activity(self):
383 """Has any activity been traced?
384
385 Returns a boolean, True if any trace function was invoked.
386
387 """
388 return any(tracer.activity() for tracer in self.tracers)
389
390 def switch_context(self, new_context):
391 """Switch to a new dynamic context."""
392 self.flush_data()
393 if self.static_context:
394 context = self.static_context
395 if new_context:
396 context += "|" + new_context
397 else:
398 context = new_context
399 self.covdata.set_context(context)
400
401 def disable_plugin(self, disposition):
402 """Disable the plugin mentioned in `disposition`."""
403 file_tracer = disposition.file_tracer
404 plugin = file_tracer._coverage_plugin
405 plugin_name = plugin._coverage_plugin_name
406 self.warn(f"Disabling plug-in {plugin_name!r} due to previous exception")
407 plugin._coverage_enabled = False
408 disposition.trace = False
409
410 def cached_mapped_file(self, filename):
411 """A locally cached version of file names mapped through file_mapper."""
412 key = (type(filename), filename)
413 try:
414 return self.mapped_file_cache[key]
415 except KeyError:
416 return self.mapped_file_cache.setdefault(key, self.file_mapper(filename))
417
418 def mapped_file_dict(self, d):
419 """Return a dict like d, but with keys modified by file_mapper."""
420 # The call to list(items()) ensures that the GIL protects the dictionary
421 # iterator against concurrent modifications by tracers running
422 # in other threads. We try three times in case of concurrent
423 # access, hoping to get a clean copy.
424 runtime_err = None
425 for _ in range(3): # pragma: part covered
426 try:
427 items = list(d.items())
428 except RuntimeError as ex: # pragma: cant happen
429 runtime_err = ex
430 else:
431 break
432 else:
433 raise runtime_err # pragma: cant happen
434
435 return {self.cached_mapped_file(k): v for k, v in items if v}
436
437 def plugin_was_disabled(self, plugin):
438 """Record that `plugin` was disabled during the run."""
439 self.disabled_plugins.add(plugin._coverage_plugin_name)
440
441 def flush_data(self):
442 """Save the collected data to our associated `CoverageData`.
443
444 Data may have also been saved along the way. This forces the
445 last of the data to be saved.
446
447 Returns True if there was data to save, False if not.
448 """
449 if not self._activity():
450 return False
451
452 if self.branch:
453 if self.packed_arcs:
454 # Unpack the line number pairs packed into integers. See
455 # tracer.c:CTracer_record_pair for the C code that creates
456 # these packed ints.
457 data = {}
458 for fname, packeds in self.data.items():
459 tuples = []
460 for packed in packeds:
461 l1 = packed & 0xFFFFF
462 l2 = (packed & (0xFFFFF << 20)) >> 20
463 if packed & (1 << 40):
464 l1 *= -1
465 if packed & (1 << 41):
466 l2 *= -1
467 tuples.append((l1, l2))
468 data[fname] = tuples
469 else:
470 data = self.data
471 self.covdata.add_arcs(self.mapped_file_dict(data))
472 else:
473 self.covdata.add_lines(self.mapped_file_dict(self.data))
474
475 file_tracers = {
476 k: v for k, v in self.file_tracers.items()
477 if v not in self.disabled_plugins
478 }
479 self.covdata.add_file_tracers(self.mapped_file_dict(file_tracers))
480
481 self._clear_data()
482 return True

eric ide

mercurial