|
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
|
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt |
|
3 |
|
4 """File wrangling.""" |
|
5 |
|
6 import hashlib |
|
7 import fnmatch |
|
8 import ntpath |
|
9 import os |
|
10 import os.path |
|
11 import posixpath |
|
12 import re |
|
13 import sys |
|
14 |
|
15 from coverage import env |
|
16 from coverage.exceptions import ConfigError |
|
17 from coverage.misc import contract, human_sorted, isolate_module, join_regex |
|
18 |
|
19 |
|
20 os = isolate_module(os) |
|
21 |
|
22 |
|
23 def set_relative_directory(): |
|
24 """Set the directory that `relative_filename` will be relative to.""" |
|
25 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE |
|
26 |
|
27 # The absolute path to our current directory. |
|
28 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep) |
|
29 |
|
30 # Cache of results of calling the canonical_filename() method, to |
|
31 # avoid duplicating work. |
|
32 CANONICAL_FILENAME_CACHE = {} |
|
33 |
|
34 |
|
35 def relative_directory(): |
|
36 """Return the directory that `relative_filename` is relative to.""" |
|
37 return RELATIVE_DIR |
|
38 |
|
39 |
|
40 @contract(returns='unicode') |
|
41 def relative_filename(filename): |
|
42 """Return the relative form of `filename`. |
|
43 |
|
44 The file name will be relative to the current directory when the |
|
45 `set_relative_directory` was called. |
|
46 |
|
47 """ |
|
48 fnorm = os.path.normcase(filename) |
|
49 if fnorm.startswith(RELATIVE_DIR): |
|
50 filename = filename[len(RELATIVE_DIR):] |
|
51 return filename |
|
52 |
|
53 |
|
54 @contract(returns='unicode') |
|
55 def canonical_filename(filename): |
|
56 """Return a canonical file name for `filename`. |
|
57 |
|
58 An absolute path with no redundant components and normalized case. |
|
59 |
|
60 """ |
|
61 if filename not in CANONICAL_FILENAME_CACHE: |
|
62 cf = filename |
|
63 if not os.path.isabs(filename): |
|
64 for path in [os.curdir] + sys.path: |
|
65 if path is None: |
|
66 continue |
|
67 f = os.path.join(path, filename) |
|
68 try: |
|
69 exists = os.path.exists(f) |
|
70 except UnicodeError: |
|
71 exists = False |
|
72 if exists: |
|
73 cf = f |
|
74 break |
|
75 cf = abs_file(cf) |
|
76 CANONICAL_FILENAME_CACHE[filename] = cf |
|
77 return CANONICAL_FILENAME_CACHE[filename] |
|
78 |
|
79 |
|
80 MAX_FLAT = 100 |
|
81 |
|
82 @contract(filename='unicode', returns='unicode') |
|
83 def flat_rootname(filename): |
|
84 """A base for a flat file name to correspond to this file. |
|
85 |
|
86 Useful for writing files about the code where you want all the files in |
|
87 the same directory, but need to differentiate same-named files from |
|
88 different directories. |
|
89 |
|
90 For example, the file a/b/c.py will return 'd_86bbcbe134d28fd2_c_py' |
|
91 |
|
92 """ |
|
93 dirname, basename = ntpath.split(filename) |
|
94 if dirname: |
|
95 fp = hashlib.new("sha3_256", dirname.encode("UTF-8")).hexdigest()[:16] |
|
96 prefix = f"d_{fp}_" |
|
97 else: |
|
98 prefix = "" |
|
99 return prefix + basename.replace(".", "_") |
|
100 |
|
101 |
|
102 if env.WINDOWS: |
|
103 |
|
104 _ACTUAL_PATH_CACHE = {} |
|
105 _ACTUAL_PATH_LIST_CACHE = {} |
|
106 |
|
107 def actual_path(path): |
|
108 """Get the actual path of `path`, including the correct case.""" |
|
109 if path in _ACTUAL_PATH_CACHE: |
|
110 return _ACTUAL_PATH_CACHE[path] |
|
111 |
|
112 head, tail = os.path.split(path) |
|
113 if not tail: |
|
114 # This means head is the drive spec: normalize it. |
|
115 actpath = head.upper() |
|
116 elif not head: |
|
117 actpath = tail |
|
118 else: |
|
119 head = actual_path(head) |
|
120 if head in _ACTUAL_PATH_LIST_CACHE: |
|
121 files = _ACTUAL_PATH_LIST_CACHE[head] |
|
122 else: |
|
123 try: |
|
124 files = os.listdir(head) |
|
125 except Exception: |
|
126 # This will raise OSError, or this bizarre TypeError: |
|
127 # https://bugs.python.org/issue1776160 |
|
128 files = [] |
|
129 _ACTUAL_PATH_LIST_CACHE[head] = files |
|
130 normtail = os.path.normcase(tail) |
|
131 for f in files: |
|
132 if os.path.normcase(f) == normtail: |
|
133 tail = f |
|
134 break |
|
135 actpath = os.path.join(head, tail) |
|
136 _ACTUAL_PATH_CACHE[path] = actpath |
|
137 return actpath |
|
138 |
|
139 else: |
|
140 def actual_path(path): |
|
141 """The actual path for non-Windows platforms.""" |
|
142 return path |
|
143 |
|
144 |
|
145 @contract(returns='unicode') |
|
146 def abs_file(path): |
|
147 """Return the absolute normalized form of `path`.""" |
|
148 return actual_path(os.path.abspath(os.path.realpath(path))) |
|
149 |
|
150 |
|
151 def python_reported_file(filename): |
|
152 """Return the string as Python would describe this file name.""" |
|
153 if env.PYBEHAVIOR.report_absolute_files: |
|
154 filename = os.path.abspath(filename) |
|
155 return filename |
|
156 |
|
157 |
|
158 RELATIVE_DIR = None |
|
159 CANONICAL_FILENAME_CACHE = None |
|
160 set_relative_directory() |
|
161 |
|
162 |
|
163 def isabs_anywhere(filename): |
|
164 """Is `filename` an absolute path on any OS?""" |
|
165 return ntpath.isabs(filename) or posixpath.isabs(filename) |
|
166 |
|
167 |
|
168 def prep_patterns(patterns): |
|
169 """Prepare the file patterns for use in a `FnmatchMatcher`. |
|
170 |
|
171 If a pattern starts with a wildcard, it is used as a pattern |
|
172 as-is. If it does not start with a wildcard, then it is made |
|
173 absolute with the current directory. |
|
174 |
|
175 If `patterns` is None, an empty list is returned. |
|
176 |
|
177 """ |
|
178 prepped = [] |
|
179 for p in patterns or []: |
|
180 if p.startswith(("*", "?")): |
|
181 prepped.append(p) |
|
182 else: |
|
183 prepped.append(abs_file(p)) |
|
184 return prepped |
|
185 |
|
186 |
|
187 class TreeMatcher: |
|
188 """A matcher for files in a tree. |
|
189 |
|
190 Construct with a list of paths, either files or directories. Paths match |
|
191 with the `match` method if they are one of the files, or if they are |
|
192 somewhere in a subtree rooted at one of the directories. |
|
193 |
|
194 """ |
|
195 def __init__(self, paths, name="unknown"): |
|
196 self.original_paths = human_sorted(paths) |
|
197 self.paths = list(map(os.path.normcase, paths)) |
|
198 self.name = name |
|
199 |
|
200 def __repr__(self): |
|
201 return f"<TreeMatcher {self.name} {self.original_paths!r}>" |
|
202 |
|
203 def info(self): |
|
204 """A list of strings for displaying when dumping state.""" |
|
205 return self.original_paths |
|
206 |
|
207 def match(self, fpath): |
|
208 """Does `fpath` indicate a file in one of our trees?""" |
|
209 fpath = os.path.normcase(fpath) |
|
210 for p in self.paths: |
|
211 if fpath.startswith(p): |
|
212 if fpath == p: |
|
213 # This is the same file! |
|
214 return True |
|
215 if fpath[len(p)] == os.sep: |
|
216 # This is a file in the directory |
|
217 return True |
|
218 return False |
|
219 |
|
220 |
|
221 class ModuleMatcher: |
|
222 """A matcher for modules in a tree.""" |
|
223 def __init__(self, module_names, name="unknown"): |
|
224 self.modules = list(module_names) |
|
225 self.name = name |
|
226 |
|
227 def __repr__(self): |
|
228 return f"<ModuleMatcher {self.name} {self.modules!r}>" |
|
229 |
|
230 def info(self): |
|
231 """A list of strings for displaying when dumping state.""" |
|
232 return self.modules |
|
233 |
|
234 def match(self, module_name): |
|
235 """Does `module_name` indicate a module in one of our packages?""" |
|
236 if not module_name: |
|
237 return False |
|
238 |
|
239 for m in self.modules: |
|
240 if module_name.startswith(m): |
|
241 if module_name == m: |
|
242 return True |
|
243 if module_name[len(m)] == '.': |
|
244 # This is a module in the package |
|
245 return True |
|
246 |
|
247 return False |
|
248 |
|
249 |
|
250 class FnmatchMatcher: |
|
251 """A matcher for files by file name pattern.""" |
|
252 def __init__(self, pats, name="unknown"): |
|
253 self.pats = list(pats) |
|
254 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS) |
|
255 self.name = name |
|
256 |
|
257 def __repr__(self): |
|
258 return f"<FnmatchMatcher {self.name} {self.pats!r}>" |
|
259 |
|
260 def info(self): |
|
261 """A list of strings for displaying when dumping state.""" |
|
262 return self.pats |
|
263 |
|
264 def match(self, fpath): |
|
265 """Does `fpath` match one of our file name patterns?""" |
|
266 return self.re.match(fpath) is not None |
|
267 |
|
268 |
|
269 def sep(s): |
|
270 """Find the path separator used in this string, or os.sep if none.""" |
|
271 sep_match = re.search(r"[\\/]", s) |
|
272 if sep_match: |
|
273 the_sep = sep_match.group(0) |
|
274 else: |
|
275 the_sep = os.sep |
|
276 return the_sep |
|
277 |
|
278 |
|
279 def fnmatches_to_regex(patterns, case_insensitive=False, partial=False): |
|
280 """Convert fnmatch patterns to a compiled regex that matches any of them. |
|
281 |
|
282 Slashes are always converted to match either slash or backslash, for |
|
283 Windows support, even when running elsewhere. |
|
284 |
|
285 If `partial` is true, then the pattern will match if the target string |
|
286 starts with the pattern. Otherwise, it must match the entire string. |
|
287 |
|
288 Returns: a compiled regex object. Use the .match method to compare target |
|
289 strings. |
|
290 |
|
291 """ |
|
292 regexes = (fnmatch.translate(pattern) for pattern in patterns) |
|
293 # Python3.7 fnmatch translates "/" as "/". Before that, it translates as "\/", |
|
294 # so we have to deal with maybe a backslash. |
|
295 regexes = (re.sub(r"\\?/", r"[\\\\/]", regex) for regex in regexes) |
|
296 |
|
297 if partial: |
|
298 # fnmatch always adds a \Z to match the whole string, which we don't |
|
299 # want, so we remove the \Z. While removing it, we only replace \Z if |
|
300 # followed by paren (introducing flags), or at end, to keep from |
|
301 # destroying a literal \Z in the pattern. |
|
302 regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes) |
|
303 |
|
304 flags = 0 |
|
305 if case_insensitive: |
|
306 flags |= re.IGNORECASE |
|
307 compiled = re.compile(join_regex(regexes), flags=flags) |
|
308 |
|
309 return compiled |
|
310 |
|
311 |
|
312 class PathAliases: |
|
313 """A collection of aliases for paths. |
|
314 |
|
315 When combining data files from remote machines, often the paths to source |
|
316 code are different, for example, due to OS differences, or because of |
|
317 serialized checkouts on continuous integration machines. |
|
318 |
|
319 A `PathAliases` object tracks a list of pattern/result pairs, and can |
|
320 map a path through those aliases to produce a unified path. |
|
321 |
|
322 """ |
|
323 def __init__(self, relative=False): |
|
324 self.aliases = [] |
|
325 self.relative = relative |
|
326 |
|
327 def pprint(self): # pragma: debugging |
|
328 """Dump the important parts of the PathAliases, for debugging.""" |
|
329 print(f"Aliases (relative={self.relative}):") |
|
330 for regex, result in self.aliases: |
|
331 print(f"{regex.pattern!r} --> {result!r}") |
|
332 |
|
333 def add(self, pattern, result): |
|
334 """Add the `pattern`/`result` pair to the list of aliases. |
|
335 |
|
336 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
|
337 string. When mapping paths, if a path starts with a match against |
|
338 `pattern`, then that match is replaced with `result`. This models |
|
339 isomorphic source trees being rooted at different places on two |
|
340 different machines. |
|
341 |
|
342 `pattern` can't end with a wildcard component, since that would |
|
343 match an entire tree, and not just its root. |
|
344 |
|
345 """ |
|
346 pattern_sep = sep(pattern) |
|
347 |
|
348 if len(pattern) > 1: |
|
349 pattern = pattern.rstrip(r"\/") |
|
350 |
|
351 # The pattern can't end with a wildcard component. |
|
352 if pattern.endswith("*"): |
|
353 raise ConfigError("Pattern must not end with wildcards.") |
|
354 |
|
355 # The pattern is meant to match a filepath. Let's make it absolute |
|
356 # unless it already is, or is meant to match any prefix. |
|
357 if not pattern.startswith('*') and not isabs_anywhere(pattern + |
|
358 pattern_sep): |
|
359 pattern = abs_file(pattern) |
|
360 if not pattern.endswith(pattern_sep): |
|
361 pattern += pattern_sep |
|
362 |
|
363 # Make a regex from the pattern. |
|
364 regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True) |
|
365 |
|
366 # Normalize the result: it must end with a path separator. |
|
367 result_sep = sep(result) |
|
368 result = result.rstrip(r"\/") + result_sep |
|
369 self.aliases.append((regex, result)) |
|
370 |
|
371 def map(self, path): |
|
372 """Map `path` through the aliases. |
|
373 |
|
374 `path` is checked against all of the patterns. The first pattern to |
|
375 match is used to replace the root of the path with the result root. |
|
376 Only one pattern is ever used. If no patterns match, `path` is |
|
377 returned unchanged. |
|
378 |
|
379 The separator style in the result is made to match that of the result |
|
380 in the alias. |
|
381 |
|
382 Returns the mapped path. If a mapping has happened, this is a |
|
383 canonical path. If no mapping has happened, it is the original value |
|
384 of `path` unchanged. |
|
385 |
|
386 """ |
|
387 for regex, result in self.aliases: |
|
388 m = regex.match(path) |
|
389 if m: |
|
390 new = path.replace(m.group(0), result) |
|
391 new = new.replace(sep(path), sep(result)) |
|
392 if not self.relative: |
|
393 new = canonical_filename(new) |
|
394 return new |
|
395 return path |
|
396 |
|
397 |
|
398 def find_python_files(dirname): |
|
399 """Yield all of the importable Python files in `dirname`, recursively. |
|
400 |
|
401 To be importable, the files have to be in a directory with a __init__.py, |
|
402 except for `dirname` itself, which isn't required to have one. The |
|
403 assumption is that `dirname` was specified directly, so the user knows |
|
404 best, but sub-directories are checked for a __init__.py to be sure we only |
|
405 find the importable files. |
|
406 |
|
407 """ |
|
408 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
|
409 if i > 0 and '__init__.py' not in filenames: |
|
410 # If a directory doesn't have __init__.py, then it isn't |
|
411 # importable and neither are its files |
|
412 del dirnames[:] |
|
413 continue |
|
414 for filename in filenames: |
|
415 # We're only interested in files that look like reasonable Python |
|
416 # files: Must end with .py or .pyw, and must not have certain funny |
|
417 # characters that probably mean they are editor junk. |
|
418 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
|
419 yield os.path.join(dirpath, filename) |