|
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
|
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt |
|
3 |
|
4 """Sqlite coverage data.""" |
|
5 |
|
6 # TODO: factor out dataop debugging to a wrapper class? |
|
7 # TODO: make sure all dataop debugging is in place somehow |
|
8 |
|
9 import collections |
|
10 import datetime |
|
11 import glob |
|
12 import itertools |
|
13 import os |
|
14 import re |
|
15 import sqlite3 |
|
16 import sys |
|
17 import zlib |
|
18 |
|
19 from coverage import env |
|
20 from coverage.backward import get_thread_id, iitems, to_bytes, to_string |
|
21 from coverage.debug import NoDebugging, SimpleReprMixin, clipped_repr |
|
22 from coverage.files import PathAliases |
|
23 from coverage.misc import CoverageException, contract, file_be_gone, filename_suffix, isolate_module |
|
24 from coverage.numbits import numbits_to_nums, numbits_union, nums_to_numbits |
|
25 from coverage.version import __version__ |
|
26 |
|
27 os = isolate_module(os) |
|
28 |
|
29 # If you change the schema, increment the SCHEMA_VERSION, and update the |
|
30 # docs in docs/dbschema.rst also. |
|
31 |
|
32 SCHEMA_VERSION = 7 |
|
33 |
|
34 # Schema versions: |
|
35 # 1: Released in 5.0a2 |
|
36 # 2: Added contexts in 5.0a3. |
|
37 # 3: Replaced line table with line_map table. |
|
38 # 4: Changed line_map.bitmap to line_map.numbits. |
|
39 # 5: Added foreign key declarations. |
|
40 # 6: Key-value in meta. |
|
41 # 7: line_map -> line_bits |
|
42 |
|
43 SCHEMA = """\ |
|
44 CREATE TABLE coverage_schema ( |
|
45 -- One row, to record the version of the schema in this db. |
|
46 version integer |
|
47 ); |
|
48 |
|
49 CREATE TABLE meta ( |
|
50 -- Key-value pairs, to record metadata about the data |
|
51 key text, |
|
52 value text, |
|
53 unique (key) |
|
54 -- Keys: |
|
55 -- 'has_arcs' boolean -- Is this data recording branches? |
|
56 -- 'sys_argv' text -- The coverage command line that recorded the data. |
|
57 -- 'version' text -- The version of coverage.py that made the file. |
|
58 -- 'when' text -- Datetime when the file was created. |
|
59 ); |
|
60 |
|
61 CREATE TABLE file ( |
|
62 -- A row per file measured. |
|
63 id integer primary key, |
|
64 path text, |
|
65 unique (path) |
|
66 ); |
|
67 |
|
68 CREATE TABLE context ( |
|
69 -- A row per context measured. |
|
70 id integer primary key, |
|
71 context text, |
|
72 unique (context) |
|
73 ); |
|
74 |
|
75 CREATE TABLE line_bits ( |
|
76 -- If recording lines, a row per context per file executed. |
|
77 -- All of the line numbers for that file/context are in one numbits. |
|
78 file_id integer, -- foreign key to `file`. |
|
79 context_id integer, -- foreign key to `context`. |
|
80 numbits blob, -- see the numbits functions in coverage.numbits |
|
81 foreign key (file_id) references file (id), |
|
82 foreign key (context_id) references context (id), |
|
83 unique (file_id, context_id) |
|
84 ); |
|
85 |
|
86 CREATE TABLE arc ( |
|
87 -- If recording branches, a row per context per from/to line transition executed. |
|
88 file_id integer, -- foreign key to `file`. |
|
89 context_id integer, -- foreign key to `context`. |
|
90 fromno integer, -- line number jumped from. |
|
91 tono integer, -- line number jumped to. |
|
92 foreign key (file_id) references file (id), |
|
93 foreign key (context_id) references context (id), |
|
94 unique (file_id, context_id, fromno, tono) |
|
95 ); |
|
96 |
|
97 CREATE TABLE tracer ( |
|
98 -- A row per file indicating the tracer used for that file. |
|
99 file_id integer primary key, |
|
100 tracer text, |
|
101 foreign key (file_id) references file (id) |
|
102 ); |
|
103 """ |
|
104 |
|
105 class CoverageData(SimpleReprMixin): |
|
106 """Manages collected coverage data, including file storage. |
|
107 |
|
108 This class is the public supported API to the data that coverage.py |
|
109 collects during program execution. It includes information about what code |
|
110 was executed. It does not include information from the analysis phase, to |
|
111 determine what lines could have been executed, or what lines were not |
|
112 executed. |
|
113 |
|
114 .. note:: |
|
115 |
|
116 The data file is currently a SQLite database file, with a |
|
117 :ref:`documented schema <dbschema>`. The schema is subject to change |
|
118 though, so be careful about querying it directly. Use this API if you |
|
119 can to isolate yourself from changes. |
|
120 |
|
121 There are a number of kinds of data that can be collected: |
|
122 |
|
123 * **lines**: the line numbers of source lines that were executed. |
|
124 These are always available. |
|
125 |
|
126 * **arcs**: pairs of source and destination line numbers for transitions |
|
127 between source lines. These are only available if branch coverage was |
|
128 used. |
|
129 |
|
130 * **file tracer names**: the module names of the file tracer plugins that |
|
131 handled each file in the data. |
|
132 |
|
133 Lines, arcs, and file tracer names are stored for each source file. File |
|
134 names in this API are case-sensitive, even on platforms with |
|
135 case-insensitive file systems. |
|
136 |
|
137 A data file either stores lines, or arcs, but not both. |
|
138 |
|
139 A data file is associated with the data when the :class:`CoverageData` |
|
140 is created, using the parameters `basename`, `suffix`, and `no_disk`. The |
|
141 base name can be queried with :meth:`base_filename`, and the actual file |
|
142 name being used is available from :meth:`data_filename`. |
|
143 |
|
144 To read an existing coverage.py data file, use :meth:`read`. You can then |
|
145 access the line, arc, or file tracer data with :meth:`lines`, :meth:`arcs`, |
|
146 or :meth:`file_tracer`. |
|
147 |
|
148 The :meth:`has_arcs` method indicates whether arc data is available. You |
|
149 can get a set of the files in the data with :meth:`measured_files`. As |
|
150 with most Python containers, you can determine if there is any data at all |
|
151 by using this object as a boolean value. |
|
152 |
|
153 The contexts for each line in a file can be read with |
|
154 :meth:`contexts_by_lineno`. |
|
155 |
|
156 To limit querying to certain contexts, use :meth:`set_query_context` or |
|
157 :meth:`set_query_contexts`. These will narrow the focus of subsequent |
|
158 :meth:`lines`, :meth:`arcs`, and :meth:`contexts_by_lineno` calls. The set |
|
159 of all measured context names can be retrieved with |
|
160 :meth:`measured_contexts`. |
|
161 |
|
162 Most data files will be created by coverage.py itself, but you can use |
|
163 methods here to create data files if you like. The :meth:`add_lines`, |
|
164 :meth:`add_arcs`, and :meth:`add_file_tracers` methods add data, in ways |
|
165 that are convenient for coverage.py. |
|
166 |
|
167 To record data for contexts, use :meth:`set_context` to set a context to |
|
168 be used for subsequent :meth:`add_lines` and :meth:`add_arcs` calls. |
|
169 |
|
170 To add a source file without any measured data, use :meth:`touch_file`. |
|
171 |
|
172 Write the data to its file with :meth:`write`. |
|
173 |
|
174 You can clear the data in memory with :meth:`erase`. Two data collections |
|
175 can be combined by using :meth:`update` on one :class:`CoverageData`, |
|
176 passing it the other. |
|
177 |
|
178 Data in a :class:`CoverageData` can be serialized and deserialized with |
|
179 :meth:`dumps` and :meth:`loads`. |
|
180 |
|
181 """ |
|
182 |
|
183 def __init__(self, basename=None, suffix=None, no_disk=False, warn=None, debug=None): |
|
184 """Create a :class:`CoverageData` object to hold coverage-measured data. |
|
185 |
|
186 Arguments: |
|
187 basename (str): the base name of the data file, defaulting to |
|
188 ".coverage". |
|
189 suffix (str or bool): has the same meaning as the `data_suffix` |
|
190 argument to :class:`coverage.Coverage`. |
|
191 no_disk (bool): if True, keep all data in memory, and don't |
|
192 write any disk file. |
|
193 warn: a warning callback function, accepting a warning message |
|
194 argument. |
|
195 debug: a `DebugControl` object (optional) |
|
196 |
|
197 """ |
|
198 self._no_disk = no_disk |
|
199 self._basename = os.path.abspath(basename or ".coverage") |
|
200 self._suffix = suffix |
|
201 self._warn = warn |
|
202 self._debug = debug or NoDebugging() |
|
203 |
|
204 self._choose_filename() |
|
205 self._file_map = {} |
|
206 # Maps thread ids to SqliteDb objects. |
|
207 self._dbs = {} |
|
208 self._pid = os.getpid() |
|
209 |
|
210 # Are we in sync with the data file? |
|
211 self._have_used = False |
|
212 |
|
213 self._has_lines = False |
|
214 self._has_arcs = False |
|
215 |
|
216 self._current_context = None |
|
217 self._current_context_id = None |
|
218 self._query_context_ids = None |
|
219 |
|
220 def _choose_filename(self): |
|
221 """Set self._filename based on inited attributes.""" |
|
222 if self._no_disk: |
|
223 self._filename = ":memory:" |
|
224 else: |
|
225 self._filename = self._basename |
|
226 suffix = filename_suffix(self._suffix) |
|
227 if suffix: |
|
228 self._filename += "." + suffix |
|
229 |
|
230 def _reset(self): |
|
231 """Reset our attributes.""" |
|
232 if self._dbs: |
|
233 for db in self._dbs.values(): |
|
234 db.close() |
|
235 self._dbs = {} |
|
236 self._file_map = {} |
|
237 self._have_used = False |
|
238 self._current_context_id = None |
|
239 |
|
240 def _create_db(self): |
|
241 """Create a db file that doesn't exist yet. |
|
242 |
|
243 Initializes the schema and certain metadata. |
|
244 """ |
|
245 if self._debug.should('dataio'): |
|
246 self._debug.write("Creating data file {!r}".format(self._filename)) |
|
247 self._dbs[get_thread_id()] = db = SqliteDb(self._filename, self._debug) |
|
248 with db: |
|
249 db.executescript(SCHEMA) |
|
250 db.execute("insert into coverage_schema (version) values (?)", (SCHEMA_VERSION,)) |
|
251 db.executemany( |
|
252 "insert into meta (key, value) values (?, ?)", |
|
253 [ |
|
254 ('sys_argv', str(getattr(sys, 'argv', None))), |
|
255 ('version', __version__), |
|
256 ('when', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')), |
|
257 ] |
|
258 ) |
|
259 |
|
260 def _open_db(self): |
|
261 """Open an existing db file, and read its metadata.""" |
|
262 if self._debug.should('dataio'): |
|
263 self._debug.write("Opening data file {!r}".format(self._filename)) |
|
264 self._dbs[get_thread_id()] = SqliteDb(self._filename, self._debug) |
|
265 self._read_db() |
|
266 |
|
267 def _read_db(self): |
|
268 """Read the metadata from a database so that we are ready to use it.""" |
|
269 with self._dbs[get_thread_id()] as db: |
|
270 try: |
|
271 schema_version, = db.execute("select version from coverage_schema").fetchone() |
|
272 except Exception as exc: |
|
273 raise CoverageException( |
|
274 "Data file {!r} doesn't seem to be a coverage data file: {}".format( |
|
275 self._filename, exc |
|
276 ) |
|
277 ) |
|
278 else: |
|
279 if schema_version != SCHEMA_VERSION: |
|
280 raise CoverageException( |
|
281 "Couldn't use data file {!r}: wrong schema: {} instead of {}".format( |
|
282 self._filename, schema_version, SCHEMA_VERSION |
|
283 ) |
|
284 ) |
|
285 |
|
286 for row in db.execute("select value from meta where key = 'has_arcs'"): |
|
287 self._has_arcs = bool(int(row[0])) |
|
288 self._has_lines = not self._has_arcs |
|
289 |
|
290 for path, file_id in db.execute("select path, id from file"): |
|
291 self._file_map[path] = file_id |
|
292 |
|
293 def _connect(self): |
|
294 """Get the SqliteDb object to use.""" |
|
295 if get_thread_id() not in self._dbs: |
|
296 if os.path.exists(self._filename): |
|
297 self._open_db() |
|
298 else: |
|
299 self._create_db() |
|
300 return self._dbs[get_thread_id()] |
|
301 |
|
302 def __nonzero__(self): |
|
303 if (get_thread_id() not in self._dbs and not os.path.exists(self._filename)): |
|
304 return False |
|
305 try: |
|
306 with self._connect() as con: |
|
307 rows = con.execute("select * from file limit 1") |
|
308 return bool(list(rows)) |
|
309 except CoverageException: |
|
310 return False |
|
311 |
|
312 __bool__ = __nonzero__ |
|
313 |
|
314 @contract(returns='bytes') |
|
315 def dumps(self): |
|
316 """Serialize the current data to a byte string. |
|
317 |
|
318 The format of the serialized data is not documented. It is only |
|
319 suitable for use with :meth:`loads` in the same version of |
|
320 coverage.py. |
|
321 |
|
322 Returns: |
|
323 A byte string of serialized data. |
|
324 |
|
325 .. versionadded:: 5.0 |
|
326 |
|
327 """ |
|
328 if self._debug.should('dataio'): |
|
329 self._debug.write("Dumping data from data file {!r}".format(self._filename)) |
|
330 with self._connect() as con: |
|
331 return b'z' + zlib.compress(to_bytes(con.dump())) |
|
332 |
|
333 @contract(data='bytes') |
|
334 def loads(self, data): |
|
335 """Deserialize data from :meth:`dumps` |
|
336 |
|
337 Use with a newly-created empty :class:`CoverageData` object. It's |
|
338 undefined what happens if the object already has data in it. |
|
339 |
|
340 Arguments: |
|
341 data: A byte string of serialized data produced by :meth:`dumps`. |
|
342 |
|
343 .. versionadded:: 5.0 |
|
344 |
|
345 """ |
|
346 if self._debug.should('dataio'): |
|
347 self._debug.write("Loading data into data file {!r}".format(self._filename)) |
|
348 if data[:1] != b'z': |
|
349 raise CoverageException( |
|
350 "Unrecognized serialization: {!r} (head of {} bytes)".format(data[:40], len(data)) |
|
351 ) |
|
352 script = to_string(zlib.decompress(data[1:])) |
|
353 self._dbs[get_thread_id()] = db = SqliteDb(self._filename, self._debug) |
|
354 with db: |
|
355 db.executescript(script) |
|
356 self._read_db() |
|
357 self._have_used = True |
|
358 |
|
359 def _file_id(self, filename, add=False): |
|
360 """Get the file id for `filename`. |
|
361 |
|
362 If filename is not in the database yet, add it if `add` is True. |
|
363 If `add` is not True, return None. |
|
364 """ |
|
365 if filename not in self._file_map: |
|
366 if add: |
|
367 with self._connect() as con: |
|
368 cur = con.execute("insert or replace into file (path) values (?)", (filename,)) |
|
369 self._file_map[filename] = cur.lastrowid |
|
370 return self._file_map.get(filename) |
|
371 |
|
372 def _context_id(self, context): |
|
373 """Get the id for a context.""" |
|
374 assert context is not None |
|
375 self._start_using() |
|
376 with self._connect() as con: |
|
377 row = con.execute("select id from context where context = ?", (context,)).fetchone() |
|
378 if row is not None: |
|
379 return row[0] |
|
380 else: |
|
381 return None |
|
382 |
|
383 def set_context(self, context): |
|
384 """Set the current context for future :meth:`add_lines` etc. |
|
385 |
|
386 `context` is a str, the name of the context to use for the next data |
|
387 additions. The context persists until the next :meth:`set_context`. |
|
388 |
|
389 .. versionadded:: 5.0 |
|
390 |
|
391 """ |
|
392 if self._debug.should('dataop'): |
|
393 self._debug.write("Setting context: %r" % (context,)) |
|
394 self._current_context = context |
|
395 self._current_context_id = None |
|
396 |
|
397 def _set_context_id(self): |
|
398 """Use the _current_context to set _current_context_id.""" |
|
399 context = self._current_context or "" |
|
400 context_id = self._context_id(context) |
|
401 if context_id is not None: |
|
402 self._current_context_id = context_id |
|
403 else: |
|
404 with self._connect() as con: |
|
405 cur = con.execute("insert into context (context) values (?)", (context,)) |
|
406 self._current_context_id = cur.lastrowid |
|
407 |
|
408 def base_filename(self): |
|
409 """The base filename for storing data. |
|
410 |
|
411 .. versionadded:: 5.0 |
|
412 |
|
413 """ |
|
414 return self._basename |
|
415 |
|
416 def data_filename(self): |
|
417 """Where is the data stored? |
|
418 |
|
419 .. versionadded:: 5.0 |
|
420 |
|
421 """ |
|
422 return self._filename |
|
423 |
|
424 def add_lines(self, line_data): |
|
425 """Add measured line data. |
|
426 |
|
427 `line_data` is a dictionary mapping file names to dictionaries:: |
|
428 |
|
429 { filename: { lineno: None, ... }, ...} |
|
430 |
|
431 """ |
|
432 if self._debug.should('dataop'): |
|
433 self._debug.write("Adding lines: %d files, %d lines total" % ( |
|
434 len(line_data), sum(len(lines) for lines in line_data.values()) |
|
435 )) |
|
436 self._start_using() |
|
437 self._choose_lines_or_arcs(lines=True) |
|
438 if not line_data: |
|
439 return |
|
440 with self._connect() as con: |
|
441 self._set_context_id() |
|
442 for filename, linenos in iitems(line_data): |
|
443 linemap = nums_to_numbits(linenos) |
|
444 file_id = self._file_id(filename, add=True) |
|
445 query = "select numbits from line_bits where file_id = ? and context_id = ?" |
|
446 existing = list(con.execute(query, (file_id, self._current_context_id))) |
|
447 if existing: |
|
448 linemap = numbits_union(linemap, existing[0][0]) |
|
449 |
|
450 con.execute( |
|
451 "insert or replace into line_bits " |
|
452 " (file_id, context_id, numbits) values (?, ?, ?)", |
|
453 (file_id, self._current_context_id, linemap), |
|
454 ) |
|
455 |
|
456 def add_arcs(self, arc_data): |
|
457 """Add measured arc data. |
|
458 |
|
459 `arc_data` is a dictionary mapping file names to dictionaries:: |
|
460 |
|
461 { filename: { (l1,l2): None, ... }, ...} |
|
462 |
|
463 """ |
|
464 if self._debug.should('dataop'): |
|
465 self._debug.write("Adding arcs: %d files, %d arcs total" % ( |
|
466 len(arc_data), sum(len(arcs) for arcs in arc_data.values()) |
|
467 )) |
|
468 self._start_using() |
|
469 self._choose_lines_or_arcs(arcs=True) |
|
470 if not arc_data: |
|
471 return |
|
472 with self._connect() as con: |
|
473 self._set_context_id() |
|
474 for filename, arcs in iitems(arc_data): |
|
475 file_id = self._file_id(filename, add=True) |
|
476 data = [(file_id, self._current_context_id, fromno, tono) for fromno, tono in arcs] |
|
477 con.executemany( |
|
478 "insert or ignore into arc " |
|
479 "(file_id, context_id, fromno, tono) values (?, ?, ?, ?)", |
|
480 data, |
|
481 ) |
|
482 |
|
483 def _choose_lines_or_arcs(self, lines=False, arcs=False): |
|
484 """Force the data file to choose between lines and arcs.""" |
|
485 assert lines or arcs |
|
486 assert not (lines and arcs) |
|
487 if lines and self._has_arcs: |
|
488 raise CoverageException("Can't add lines to existing arc data") |
|
489 if arcs and self._has_lines: |
|
490 raise CoverageException("Can't add arcs to existing line data") |
|
491 if not self._has_arcs and not self._has_lines: |
|
492 self._has_lines = lines |
|
493 self._has_arcs = arcs |
|
494 with self._connect() as con: |
|
495 con.execute( |
|
496 "insert into meta (key, value) values (?, ?)", |
|
497 ('has_arcs', str(int(arcs))) |
|
498 ) |
|
499 |
|
500 def add_file_tracers(self, file_tracers): |
|
501 """Add per-file plugin information. |
|
502 |
|
503 `file_tracers` is { filename: plugin_name, ... } |
|
504 |
|
505 """ |
|
506 if self._debug.should('dataop'): |
|
507 self._debug.write("Adding file tracers: %d files" % (len(file_tracers),)) |
|
508 if not file_tracers: |
|
509 return |
|
510 self._start_using() |
|
511 with self._connect() as con: |
|
512 for filename, plugin_name in iitems(file_tracers): |
|
513 file_id = self._file_id(filename) |
|
514 if file_id is None: |
|
515 raise CoverageException( |
|
516 "Can't add file tracer data for unmeasured file '%s'" % (filename,) |
|
517 ) |
|
518 |
|
519 existing_plugin = self.file_tracer(filename) |
|
520 if existing_plugin: |
|
521 if existing_plugin != plugin_name: |
|
522 raise CoverageException( |
|
523 "Conflicting file tracer name for '%s': %r vs %r" % ( |
|
524 filename, existing_plugin, plugin_name, |
|
525 ) |
|
526 ) |
|
527 elif plugin_name: |
|
528 con.execute( |
|
529 "insert into tracer (file_id, tracer) values (?, ?)", |
|
530 (file_id, plugin_name) |
|
531 ) |
|
532 |
|
533 def touch_file(self, filename, plugin_name=""): |
|
534 """Ensure that `filename` appears in the data, empty if needed. |
|
535 |
|
536 `plugin_name` is the name of the plugin responsible for this file. It is used |
|
537 to associate the right filereporter, etc. |
|
538 """ |
|
539 if self._debug.should('dataop'): |
|
540 self._debug.write("Touching %r" % (filename,)) |
|
541 self._start_using() |
|
542 if not self._has_arcs and not self._has_lines: |
|
543 raise CoverageException("Can't touch files in an empty CoverageData") |
|
544 |
|
545 self._file_id(filename, add=True) |
|
546 if plugin_name: |
|
547 # Set the tracer for this file |
|
548 self.add_file_tracers({filename: plugin_name}) |
|
549 |
|
550 def update(self, other_data, aliases=None): |
|
551 """Update this data with data from several other :class:`CoverageData` instances. |
|
552 |
|
553 If `aliases` is provided, it's a `PathAliases` object that is used to |
|
554 re-map paths to match the local machine's. |
|
555 """ |
|
556 if self._debug.should('dataop'): |
|
557 self._debug.write("Updating with data from %r" % ( |
|
558 getattr(other_data, '_filename', '???'), |
|
559 )) |
|
560 if self._has_lines and other_data._has_arcs: |
|
561 raise CoverageException("Can't combine arc data with line data") |
|
562 if self._has_arcs and other_data._has_lines: |
|
563 raise CoverageException("Can't combine line data with arc data") |
|
564 |
|
565 aliases = aliases or PathAliases() |
|
566 |
|
567 # Force the database we're writing to to exist before we start nesting |
|
568 # contexts. |
|
569 self._start_using() |
|
570 |
|
571 # Collector for all arcs, lines and tracers |
|
572 other_data.read() |
|
573 with other_data._connect() as conn: |
|
574 # Get files data. |
|
575 cur = conn.execute('select path from file') |
|
576 files = {path: aliases.map(path) for (path,) in cur} |
|
577 cur.close() |
|
578 |
|
579 # Get contexts data. |
|
580 cur = conn.execute('select context from context') |
|
581 contexts = [context for (context,) in cur] |
|
582 cur.close() |
|
583 |
|
584 # Get arc data. |
|
585 cur = conn.execute( |
|
586 'select file.path, context.context, arc.fromno, arc.tono ' |
|
587 'from arc ' |
|
588 'inner join file on file.id = arc.file_id ' |
|
589 'inner join context on context.id = arc.context_id' |
|
590 ) |
|
591 arcs = [(files[path], context, fromno, tono) for (path, context, fromno, tono) in cur] |
|
592 cur.close() |
|
593 |
|
594 # Get line data. |
|
595 cur = conn.execute( |
|
596 'select file.path, context.context, line_bits.numbits ' |
|
597 'from line_bits ' |
|
598 'inner join file on file.id = line_bits.file_id ' |
|
599 'inner join context on context.id = line_bits.context_id' |
|
600 ) |
|
601 lines = { |
|
602 (files[path], context): numbits |
|
603 for (path, context, numbits) in cur |
|
604 } |
|
605 cur.close() |
|
606 |
|
607 # Get tracer data. |
|
608 cur = conn.execute( |
|
609 'select file.path, tracer ' |
|
610 'from tracer ' |
|
611 'inner join file on file.id = tracer.file_id' |
|
612 ) |
|
613 tracers = {files[path]: tracer for (path, tracer) in cur} |
|
614 cur.close() |
|
615 |
|
616 with self._connect() as conn: |
|
617 conn.con.isolation_level = 'IMMEDIATE' |
|
618 |
|
619 # Get all tracers in the DB. Files not in the tracers are assumed |
|
620 # to have an empty string tracer. Since Sqlite does not support |
|
621 # full outer joins, we have to make two queries to fill the |
|
622 # dictionary. |
|
623 this_tracers = {path: '' for path, in conn.execute('select path from file')} |
|
624 this_tracers.update({ |
|
625 aliases.map(path): tracer |
|
626 for path, tracer in conn.execute( |
|
627 'select file.path, tracer from tracer ' |
|
628 'inner join file on file.id = tracer.file_id' |
|
629 ) |
|
630 }) |
|
631 |
|
632 # Create all file and context rows in the DB. |
|
633 conn.executemany( |
|
634 'insert or ignore into file (path) values (?)', |
|
635 ((file,) for file in files.values()) |
|
636 ) |
|
637 file_ids = { |
|
638 path: id |
|
639 for id, path in conn.execute('select id, path from file') |
|
640 } |
|
641 conn.executemany( |
|
642 'insert or ignore into context (context) values (?)', |
|
643 ((context,) for context in contexts) |
|
644 ) |
|
645 context_ids = { |
|
646 context: id |
|
647 for id, context in conn.execute('select id, context from context') |
|
648 } |
|
649 |
|
650 # Prepare tracers and fail, if a conflict is found. |
|
651 # tracer_paths is used to ensure consistency over the tracer data |
|
652 # and tracer_map tracks the tracers to be inserted. |
|
653 tracer_map = {} |
|
654 for path in files.values(): |
|
655 this_tracer = this_tracers.get(path) |
|
656 other_tracer = tracers.get(path, '') |
|
657 # If there is no tracer, there is always the None tracer. |
|
658 if this_tracer is not None and this_tracer != other_tracer: |
|
659 raise CoverageException( |
|
660 "Conflicting file tracer name for '%s': %r vs %r" % ( |
|
661 path, this_tracer, other_tracer |
|
662 ) |
|
663 ) |
|
664 tracer_map[path] = other_tracer |
|
665 |
|
666 # Prepare arc and line rows to be inserted by converting the file |
|
667 # and context strings with integer ids. Then use the efficient |
|
668 # `executemany()` to insert all rows at once. |
|
669 arc_rows = ( |
|
670 (file_ids[file], context_ids[context], fromno, tono) |
|
671 for file, context, fromno, tono in arcs |
|
672 ) |
|
673 |
|
674 # Get line data. |
|
675 cur = conn.execute( |
|
676 'select file.path, context.context, line_bits.numbits ' |
|
677 'from line_bits ' |
|
678 'inner join file on file.id = line_bits.file_id ' |
|
679 'inner join context on context.id = line_bits.context_id' |
|
680 ) |
|
681 for path, context, numbits in cur: |
|
682 key = (aliases.map(path), context) |
|
683 if key in lines: |
|
684 numbits = numbits_union(lines[key], numbits) |
|
685 lines[key] = numbits |
|
686 cur.close() |
|
687 |
|
688 if arcs: |
|
689 self._choose_lines_or_arcs(arcs=True) |
|
690 |
|
691 # Write the combined data. |
|
692 conn.executemany( |
|
693 'insert or ignore into arc ' |
|
694 '(file_id, context_id, fromno, tono) values (?, ?, ?, ?)', |
|
695 arc_rows |
|
696 ) |
|
697 |
|
698 if lines: |
|
699 self._choose_lines_or_arcs(lines=True) |
|
700 conn.execute("delete from line_bits") |
|
701 conn.executemany( |
|
702 "insert into line_bits " |
|
703 "(file_id, context_id, numbits) values (?, ?, ?)", |
|
704 [ |
|
705 (file_ids[file], context_ids[context], numbits) |
|
706 for (file, context), numbits in lines.items() |
|
707 ] |
|
708 ) |
|
709 conn.executemany( |
|
710 'insert or ignore into tracer (file_id, tracer) values (?, ?)', |
|
711 ((file_ids[filename], tracer) for filename, tracer in tracer_map.items()) |
|
712 ) |
|
713 |
|
714 # Update all internal cache data. |
|
715 self._reset() |
|
716 self.read() |
|
717 |
|
718 def erase(self, parallel=False): |
|
719 """Erase the data in this object. |
|
720 |
|
721 If `parallel` is true, then also deletes data files created from the |
|
722 basename by parallel-mode. |
|
723 |
|
724 """ |
|
725 self._reset() |
|
726 if self._no_disk: |
|
727 return |
|
728 if self._debug.should('dataio'): |
|
729 self._debug.write("Erasing data file {!r}".format(self._filename)) |
|
730 file_be_gone(self._filename) |
|
731 if parallel: |
|
732 data_dir, local = os.path.split(self._filename) |
|
733 localdot = local + '.*' |
|
734 pattern = os.path.join(os.path.abspath(data_dir), localdot) |
|
735 for filename in glob.glob(pattern): |
|
736 if self._debug.should('dataio'): |
|
737 self._debug.write("Erasing parallel data file {!r}".format(filename)) |
|
738 file_be_gone(filename) |
|
739 |
|
740 def read(self): |
|
741 """Start using an existing data file.""" |
|
742 with self._connect(): # TODO: doesn't look right |
|
743 self._have_used = True |
|
744 |
|
745 def write(self): |
|
746 """Ensure the data is written to the data file.""" |
|
747 pass |
|
748 |
|
749 def _start_using(self): |
|
750 """Call this before using the database at all.""" |
|
751 if self._pid != os.getpid(): |
|
752 # Looks like we forked! Have to start a new data file. |
|
753 self._reset() |
|
754 self._choose_filename() |
|
755 self._pid = os.getpid() |
|
756 if not self._have_used: |
|
757 self.erase() |
|
758 self._have_used = True |
|
759 |
|
760 def has_arcs(self): |
|
761 """Does the database have arcs (True) or lines (False).""" |
|
762 return bool(self._has_arcs) |
|
763 |
|
764 def measured_files(self): |
|
765 """A set of all files that had been measured.""" |
|
766 return set(self._file_map) |
|
767 |
|
768 def measured_contexts(self): |
|
769 """A set of all contexts that have been measured. |
|
770 |
|
771 .. versionadded:: 5.0 |
|
772 |
|
773 """ |
|
774 self._start_using() |
|
775 with self._connect() as con: |
|
776 contexts = set(row[0] for row in con.execute("select distinct(context) from context")) |
|
777 return contexts |
|
778 |
|
779 def file_tracer(self, filename): |
|
780 """Get the plugin name of the file tracer for a file. |
|
781 |
|
782 Returns the name of the plugin that handles this file. If the file was |
|
783 measured, but didn't use a plugin, then "" is returned. If the file |
|
784 was not measured, then None is returned. |
|
785 |
|
786 """ |
|
787 self._start_using() |
|
788 with self._connect() as con: |
|
789 file_id = self._file_id(filename) |
|
790 if file_id is None: |
|
791 return None |
|
792 row = con.execute("select tracer from tracer where file_id = ?", (file_id,)).fetchone() |
|
793 if row is not None: |
|
794 return row[0] or "" |
|
795 return "" # File was measured, but no tracer associated. |
|
796 |
|
797 def set_query_context(self, context): |
|
798 """Set a context for subsequent querying. |
|
799 |
|
800 The next :meth:`lines`, :meth:`arcs`, or :meth:`contexts_by_lineno` |
|
801 calls will be limited to only one context. `context` is a string which |
|
802 must match a context exactly. If it does not, no exception is raised, |
|
803 but queries will return no data. |
|
804 |
|
805 .. versionadded:: 5.0 |
|
806 |
|
807 """ |
|
808 self._start_using() |
|
809 with self._connect() as con: |
|
810 cur = con.execute("select id from context where context = ?", (context,)) |
|
811 self._query_context_ids = [row[0] for row in cur.fetchall()] |
|
812 |
|
813 def set_query_contexts(self, contexts): |
|
814 """Set a number of contexts for subsequent querying. |
|
815 |
|
816 The next :meth:`lines`, :meth:`arcs`, or :meth:`contexts_by_lineno` |
|
817 calls will be limited to the specified contexts. `contexts` is a list |
|
818 of Python regular expressions. Contexts will be matched using |
|
819 :func:`re.search <python:re.search>`. Data will be included in query |
|
820 results if they are part of any of the contexts matched. |
|
821 |
|
822 .. versionadded:: 5.0 |
|
823 |
|
824 """ |
|
825 self._start_using() |
|
826 if contexts: |
|
827 with self._connect() as con: |
|
828 context_clause = ' or '.join(['context regexp ?'] * len(contexts)) |
|
829 cur = con.execute("select id from context where " + context_clause, contexts) |
|
830 self._query_context_ids = [row[0] for row in cur.fetchall()] |
|
831 else: |
|
832 self._query_context_ids = None |
|
833 |
|
834 def lines(self, filename): |
|
835 """Get the list of lines executed for a file. |
|
836 |
|
837 If the file was not measured, returns None. A file might be measured, |
|
838 and have no lines executed, in which case an empty list is returned. |
|
839 |
|
840 If the file was executed, returns a list of integers, the line numbers |
|
841 executed in the file. The list is in no particular order. |
|
842 |
|
843 """ |
|
844 self._start_using() |
|
845 if self.has_arcs(): |
|
846 arcs = self.arcs(filename) |
|
847 if arcs is not None: |
|
848 all_lines = itertools.chain.from_iterable(arcs) |
|
849 return list(set(l for l in all_lines if l > 0)) |
|
850 |
|
851 with self._connect() as con: |
|
852 file_id = self._file_id(filename) |
|
853 if file_id is None: |
|
854 return None |
|
855 else: |
|
856 query = "select numbits from line_bits where file_id = ?" |
|
857 data = [file_id] |
|
858 if self._query_context_ids is not None: |
|
859 ids_array = ', '.join('?' * len(self._query_context_ids)) |
|
860 query += " and context_id in (" + ids_array + ")" |
|
861 data += self._query_context_ids |
|
862 bitmaps = list(con.execute(query, data)) |
|
863 nums = set() |
|
864 for row in bitmaps: |
|
865 nums.update(numbits_to_nums(row[0])) |
|
866 return list(nums) |
|
867 |
|
868 def arcs(self, filename): |
|
869 """Get the list of arcs executed for a file. |
|
870 |
|
871 If the file was not measured, returns None. A file might be measured, |
|
872 and have no arcs executed, in which case an empty list is returned. |
|
873 |
|
874 If the file was executed, returns a list of 2-tuples of integers. Each |
|
875 pair is a starting line number and an ending line number for a |
|
876 transition from one line to another. The list is in no particular |
|
877 order. |
|
878 |
|
879 Negative numbers have special meaning. If the starting line number is |
|
880 -N, it represents an entry to the code object that starts at line N. |
|
881 If the ending ling number is -N, it's an exit from the code object that |
|
882 starts at line N. |
|
883 |
|
884 """ |
|
885 self._start_using() |
|
886 with self._connect() as con: |
|
887 file_id = self._file_id(filename) |
|
888 if file_id is None: |
|
889 return None |
|
890 else: |
|
891 query = "select distinct fromno, tono from arc where file_id = ?" |
|
892 data = [file_id] |
|
893 if self._query_context_ids is not None: |
|
894 ids_array = ', '.join('?' * len(self._query_context_ids)) |
|
895 query += " and context_id in (" + ids_array + ")" |
|
896 data += self._query_context_ids |
|
897 arcs = con.execute(query, data) |
|
898 return list(arcs) |
|
899 |
|
900 def contexts_by_lineno(self, filename): |
|
901 """Get the contexts for each line in a file. |
|
902 |
|
903 Returns: |
|
904 A dict mapping line numbers to a list of context names. |
|
905 |
|
906 .. versionadded:: 5.0 |
|
907 |
|
908 """ |
|
909 lineno_contexts_map = collections.defaultdict(list) |
|
910 self._start_using() |
|
911 with self._connect() as con: |
|
912 file_id = self._file_id(filename) |
|
913 if file_id is None: |
|
914 return lineno_contexts_map |
|
915 if self.has_arcs(): |
|
916 query = ( |
|
917 "select arc.fromno, arc.tono, context.context " |
|
918 "from arc, context " |
|
919 "where arc.file_id = ? and arc.context_id = context.id" |
|
920 ) |
|
921 data = [file_id] |
|
922 if self._query_context_ids is not None: |
|
923 ids_array = ', '.join('?' * len(self._query_context_ids)) |
|
924 query += " and arc.context_id in (" + ids_array + ")" |
|
925 data += self._query_context_ids |
|
926 for fromno, tono, context in con.execute(query, data): |
|
927 if context not in lineno_contexts_map[fromno]: |
|
928 lineno_contexts_map[fromno].append(context) |
|
929 if context not in lineno_contexts_map[tono]: |
|
930 lineno_contexts_map[tono].append(context) |
|
931 else: |
|
932 query = ( |
|
933 "select l.numbits, c.context from line_bits l, context c " |
|
934 "where l.context_id = c.id " |
|
935 "and file_id = ?" |
|
936 ) |
|
937 data = [file_id] |
|
938 if self._query_context_ids is not None: |
|
939 ids_array = ', '.join('?' * len(self._query_context_ids)) |
|
940 query += " and l.context_id in (" + ids_array + ")" |
|
941 data += self._query_context_ids |
|
942 for numbits, context in con.execute(query, data): |
|
943 for lineno in numbits_to_nums(numbits): |
|
944 lineno_contexts_map[lineno].append(context) |
|
945 return lineno_contexts_map |
|
946 |
|
947 @classmethod |
|
948 def sys_info(cls): |
|
949 """Our information for `Coverage.sys_info`. |
|
950 |
|
951 Returns a list of (key, value) pairs. |
|
952 |
|
953 """ |
|
954 with SqliteDb(":memory:", debug=NoDebugging()) as db: |
|
955 compile_options = [row[0] for row in db.execute("pragma compile_options")] |
|
956 |
|
957 return [ |
|
958 ('sqlite3_version', sqlite3.version), |
|
959 ('sqlite3_sqlite_version', sqlite3.sqlite_version), |
|
960 ('sqlite3_compile_options', compile_options), |
|
961 ] |
|
962 |
|
963 |
|
964 class SqliteDb(SimpleReprMixin): |
|
965 """A simple abstraction over a SQLite database. |
|
966 |
|
967 Use as a context manager, then you can use it like a |
|
968 :class:`python:sqlite3.Connection` object:: |
|
969 |
|
970 with SqliteDb(filename, debug_control) as db: |
|
971 db.execute("insert into schema (version) values (?)", (SCHEMA_VERSION,)) |
|
972 |
|
973 """ |
|
974 def __init__(self, filename, debug): |
|
975 self.debug = debug if debug.should('sql') else None |
|
976 self.filename = filename |
|
977 self.nest = 0 |
|
978 self.con = None |
|
979 |
|
980 def _connect(self): |
|
981 """Connect to the db and do universal initialization.""" |
|
982 if self.con is not None: |
|
983 return |
|
984 |
|
985 # SQLite on Windows on py2 won't open a file if the filename argument |
|
986 # has non-ascii characters in it. Opening a relative file name avoids |
|
987 # a problem if the current directory has non-ascii. |
|
988 filename = self.filename |
|
989 if env.WINDOWS and env.PY2: |
|
990 try: |
|
991 filename = os.path.relpath(self.filename) |
|
992 except ValueError: |
|
993 # ValueError can be raised under Windows when os.getcwd() returns a |
|
994 # folder from a different drive than the drive of self.filename in |
|
995 # which case we keep the original value of self.filename unchanged, |
|
996 # hoping that we won't face the non-ascii directory problem. |
|
997 pass |
|
998 |
|
999 # It can happen that Python switches threads while the tracer writes |
|
1000 # data. The second thread will also try to write to the data, |
|
1001 # effectively causing a nested context. However, given the idempotent |
|
1002 # nature of the tracer operations, sharing a connection among threads |
|
1003 # is not a problem. |
|
1004 if self.debug: |
|
1005 self.debug.write("Connecting to {!r}".format(self.filename)) |
|
1006 self.con = sqlite3.connect(filename, check_same_thread=False) |
|
1007 self.con.create_function('REGEXP', 2, _regexp) |
|
1008 |
|
1009 # This pragma makes writing faster. It disables rollbacks, but we never need them. |
|
1010 # PyPy needs the .close() calls here, or sqlite gets twisted up: |
|
1011 # https://bitbucket.org/pypy/pypy/issues/2872/default-isolation-mode-is-different-on |
|
1012 self.execute("pragma journal_mode=off").close() |
|
1013 # This pragma makes writing faster. |
|
1014 self.execute("pragma synchronous=off").close() |
|
1015 |
|
1016 def close(self): |
|
1017 """If needed, close the connection.""" |
|
1018 if self.con is not None and self.filename != ":memory:": |
|
1019 self.con.close() |
|
1020 self.con = None |
|
1021 |
|
1022 def __enter__(self): |
|
1023 if self.nest == 0: |
|
1024 self._connect() |
|
1025 self.con.__enter__() |
|
1026 self.nest += 1 |
|
1027 return self |
|
1028 |
|
1029 def __exit__(self, exc_type, exc_value, traceback): |
|
1030 self.nest -= 1 |
|
1031 if self.nest == 0: |
|
1032 try: |
|
1033 self.con.__exit__(exc_type, exc_value, traceback) |
|
1034 self.close() |
|
1035 except Exception as exc: |
|
1036 if self.debug: |
|
1037 self.debug.write("EXCEPTION from __exit__: {}".format(exc)) |
|
1038 raise |
|
1039 |
|
1040 def execute(self, sql, parameters=()): |
|
1041 """Same as :meth:`python:sqlite3.Connection.execute`.""" |
|
1042 if self.debug: |
|
1043 tail = " with {!r}".format(parameters) if parameters else "" |
|
1044 self.debug.write("Executing {!r}{}".format(sql, tail)) |
|
1045 try: |
|
1046 return self.con.execute(sql, parameters) |
|
1047 except sqlite3.Error as exc: |
|
1048 msg = str(exc) |
|
1049 try: |
|
1050 # `execute` is the first thing we do with the database, so try |
|
1051 # hard to provide useful hints if something goes wrong now. |
|
1052 with open(self.filename, "rb") as bad_file: |
|
1053 cov4_sig = b"!coverage.py: This is a private format" |
|
1054 if bad_file.read(len(cov4_sig)) == cov4_sig: |
|
1055 msg = ( |
|
1056 "Looks like a coverage 4.x data file. " |
|
1057 "Are you mixing versions of coverage?" |
|
1058 ) |
|
1059 except Exception: |
|
1060 pass |
|
1061 if self.debug: |
|
1062 self.debug.write("EXCEPTION from execute: {}".format(msg)) |
|
1063 raise CoverageException("Couldn't use data file {!r}: {}".format(self.filename, msg)) |
|
1064 |
|
1065 def executemany(self, sql, data): |
|
1066 """Same as :meth:`python:sqlite3.Connection.executemany`.""" |
|
1067 if self.debug: |
|
1068 data = list(data) |
|
1069 self.debug.write("Executing many {!r} with {} rows".format(sql, len(data))) |
|
1070 return self.con.executemany(sql, data) |
|
1071 |
|
1072 def executescript(self, script): |
|
1073 """Same as :meth:`python:sqlite3.Connection.executescript`.""" |
|
1074 if self.debug: |
|
1075 self.debug.write("Executing script with {} chars: {}".format( |
|
1076 len(script), clipped_repr(script, 100), |
|
1077 )) |
|
1078 self.con.executescript(script) |
|
1079 |
|
1080 def dump(self): |
|
1081 """Return a multi-line string, the SQL dump of the database.""" |
|
1082 return "\n".join(self.con.iterdump()) |
|
1083 |
|
1084 |
|
1085 def _regexp(text, pattern): |
|
1086 """A regexp function for SQLite.""" |
|
1087 return re.search(text, pattern) is not None |