|
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
|
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt |
|
3 |
|
4 """File wrangling.""" |
|
5 |
|
6 import hashlib |
|
7 import fnmatch |
|
8 import ntpath |
|
9 import os |
|
10 import os.path |
|
11 import posixpath |
|
12 import re |
|
13 import sys |
|
14 |
|
15 from coverage import env |
|
16 from coverage.backward import unicode_class |
|
17 from coverage.misc import contract, CoverageException, join_regex, isolate_module |
|
18 |
|
19 |
|
20 os = isolate_module(os) |
|
21 |
|
22 |
|
23 def set_relative_directory(): |
|
24 """Set the directory that `relative_filename` will be relative to.""" |
|
25 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE |
|
26 |
|
27 # The absolute path to our current directory. |
|
28 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep) |
|
29 |
|
30 # Cache of results of calling the canonical_filename() method, to |
|
31 # avoid duplicating work. |
|
32 CANONICAL_FILENAME_CACHE = {} |
|
33 |
|
34 |
|
35 def relative_directory(): |
|
36 """Return the directory that `relative_filename` is relative to.""" |
|
37 return RELATIVE_DIR |
|
38 |
|
39 |
|
40 @contract(returns='unicode') |
|
41 def relative_filename(filename): |
|
42 """Return the relative form of `filename`. |
|
43 |
|
44 The file name will be relative to the current directory when the |
|
45 `set_relative_directory` was called. |
|
46 |
|
47 """ |
|
48 fnorm = os.path.normcase(filename) |
|
49 if fnorm.startswith(RELATIVE_DIR): |
|
50 filename = filename[len(RELATIVE_DIR):] |
|
51 return unicode_filename(filename) |
|
52 |
|
53 |
|
54 @contract(returns='unicode') |
|
55 def canonical_filename(filename): |
|
56 """Return a canonical file name for `filename`. |
|
57 |
|
58 An absolute path with no redundant components and normalized case. |
|
59 |
|
60 """ |
|
61 if filename not in CANONICAL_FILENAME_CACHE: |
|
62 cf = filename |
|
63 if not os.path.isabs(filename): |
|
64 for path in [os.curdir] + sys.path: |
|
65 if path is None: |
|
66 continue |
|
67 f = os.path.join(path, filename) |
|
68 try: |
|
69 exists = os.path.exists(f) |
|
70 except UnicodeError: |
|
71 exists = False |
|
72 if exists: |
|
73 cf = f |
|
74 break |
|
75 cf = abs_file(cf) |
|
76 CANONICAL_FILENAME_CACHE[filename] = cf |
|
77 return CANONICAL_FILENAME_CACHE[filename] |
|
78 |
|
79 |
|
80 MAX_FLAT = 200 |
|
81 |
|
82 @contract(filename='unicode', returns='unicode') |
|
83 def flat_rootname(filename): |
|
84 """A base for a flat file name to correspond to this file. |
|
85 |
|
86 Useful for writing files about the code where you want all the files in |
|
87 the same directory, but need to differentiate same-named files from |
|
88 different directories. |
|
89 |
|
90 For example, the file a/b/c.py will return 'a_b_c_py' |
|
91 |
|
92 """ |
|
93 name = ntpath.splitdrive(filename)[1] |
|
94 name = re.sub(r"[\\/.:]", "_", name) |
|
95 if len(name) > MAX_FLAT: |
|
96 h = hashlib.sha1(name.encode('UTF-8')).hexdigest() |
|
97 name = name[-(MAX_FLAT-len(h)-1):] + '_' + h |
|
98 return name |
|
99 |
|
100 |
|
101 if env.WINDOWS: |
|
102 |
|
103 _ACTUAL_PATH_CACHE = {} |
|
104 _ACTUAL_PATH_LIST_CACHE = {} |
|
105 |
|
106 def actual_path(path): |
|
107 """Get the actual path of `path`, including the correct case.""" |
|
108 if env.PY2 and isinstance(path, unicode_class): |
|
109 path = path.encode(sys.getfilesystemencoding()) |
|
110 if path in _ACTUAL_PATH_CACHE: |
|
111 return _ACTUAL_PATH_CACHE[path] |
|
112 |
|
113 head, tail = os.path.split(path) |
|
114 if not tail: |
|
115 # This means head is the drive spec: normalize it. |
|
116 actpath = head.upper() |
|
117 elif not head: |
|
118 actpath = tail |
|
119 else: |
|
120 head = actual_path(head) |
|
121 if head in _ACTUAL_PATH_LIST_CACHE: |
|
122 files = _ACTUAL_PATH_LIST_CACHE[head] |
|
123 else: |
|
124 try: |
|
125 files = os.listdir(head) |
|
126 except Exception: |
|
127 # This will raise OSError, or this bizarre TypeError: |
|
128 # https://bugs.python.org/issue1776160 |
|
129 files = [] |
|
130 _ACTUAL_PATH_LIST_CACHE[head] = files |
|
131 normtail = os.path.normcase(tail) |
|
132 for f in files: |
|
133 if os.path.normcase(f) == normtail: |
|
134 tail = f |
|
135 break |
|
136 actpath = os.path.join(head, tail) |
|
137 _ACTUAL_PATH_CACHE[path] = actpath |
|
138 return actpath |
|
139 |
|
140 else: |
|
141 def actual_path(filename): |
|
142 """The actual path for non-Windows platforms.""" |
|
143 return filename |
|
144 |
|
145 |
|
146 if env.PY2: |
|
147 @contract(returns='unicode') |
|
148 def unicode_filename(filename): |
|
149 """Return a Unicode version of `filename`.""" |
|
150 if isinstance(filename, str): |
|
151 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() |
|
152 filename = filename.decode(encoding, "replace") |
|
153 return filename |
|
154 else: |
|
155 @contract(filename='unicode', returns='unicode') |
|
156 def unicode_filename(filename): |
|
157 """Return a Unicode version of `filename`.""" |
|
158 return filename |
|
159 |
|
160 |
|
161 @contract(returns='unicode') |
|
162 def abs_file(path): |
|
163 """Return the absolute normalized form of `path`.""" |
|
164 try: |
|
165 path = os.path.realpath(path) |
|
166 except UnicodeError: |
|
167 pass |
|
168 path = os.path.abspath(path) |
|
169 path = actual_path(path) |
|
170 path = unicode_filename(path) |
|
171 return path |
|
172 |
|
173 |
|
174 def python_reported_file(filename): |
|
175 """Return the string as Python would describe this file name.""" |
|
176 if env.PYBEHAVIOR.report_absolute_files: |
|
177 filename = os.path.abspath(filename) |
|
178 return filename |
|
179 |
|
180 |
|
181 RELATIVE_DIR = None |
|
182 CANONICAL_FILENAME_CACHE = None |
|
183 set_relative_directory() |
|
184 |
|
185 |
|
186 def isabs_anywhere(filename): |
|
187 """Is `filename` an absolute path on any OS?""" |
|
188 return ntpath.isabs(filename) or posixpath.isabs(filename) |
|
189 |
|
190 |
|
191 def prep_patterns(patterns): |
|
192 """Prepare the file patterns for use in a `FnmatchMatcher`. |
|
193 |
|
194 If a pattern starts with a wildcard, it is used as a pattern |
|
195 as-is. If it does not start with a wildcard, then it is made |
|
196 absolute with the current directory. |
|
197 |
|
198 If `patterns` is None, an empty list is returned. |
|
199 |
|
200 """ |
|
201 prepped = [] |
|
202 for p in patterns or []: |
|
203 if p.startswith(("*", "?")): |
|
204 prepped.append(p) |
|
205 else: |
|
206 prepped.append(abs_file(p)) |
|
207 return prepped |
|
208 |
|
209 |
|
210 class TreeMatcher(object): |
|
211 """A matcher for files in a tree. |
|
212 |
|
213 Construct with a list of paths, either files or directories. Paths match |
|
214 with the `match` method if they are one of the files, or if they are |
|
215 somewhere in a subtree rooted at one of the directories. |
|
216 |
|
217 """ |
|
218 def __init__(self, paths): |
|
219 self.paths = list(paths) |
|
220 |
|
221 def __repr__(self): |
|
222 return "<TreeMatcher %r>" % self.paths |
|
223 |
|
224 def info(self): |
|
225 """A list of strings for displaying when dumping state.""" |
|
226 return self.paths |
|
227 |
|
228 def match(self, fpath): |
|
229 """Does `fpath` indicate a file in one of our trees?""" |
|
230 for p in self.paths: |
|
231 if fpath.startswith(p): |
|
232 if fpath == p: |
|
233 # This is the same file! |
|
234 return True |
|
235 if fpath[len(p)] == os.sep: |
|
236 # This is a file in the directory |
|
237 return True |
|
238 return False |
|
239 |
|
240 |
|
241 class ModuleMatcher(object): |
|
242 """A matcher for modules in a tree.""" |
|
243 def __init__(self, module_names): |
|
244 self.modules = list(module_names) |
|
245 |
|
246 def __repr__(self): |
|
247 return "<ModuleMatcher %r>" % (self.modules) |
|
248 |
|
249 def info(self): |
|
250 """A list of strings for displaying when dumping state.""" |
|
251 return self.modules |
|
252 |
|
253 def match(self, module_name): |
|
254 """Does `module_name` indicate a module in one of our packages?""" |
|
255 if not module_name: |
|
256 return False |
|
257 |
|
258 for m in self.modules: |
|
259 if module_name.startswith(m): |
|
260 if module_name == m: |
|
261 return True |
|
262 if module_name[len(m)] == '.': |
|
263 # This is a module in the package |
|
264 return True |
|
265 |
|
266 return False |
|
267 |
|
268 |
|
269 class FnmatchMatcher(object): |
|
270 """A matcher for files by file name pattern.""" |
|
271 def __init__(self, pats): |
|
272 self.pats = list(pats) |
|
273 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS) |
|
274 |
|
275 def __repr__(self): |
|
276 return "<FnmatchMatcher %r>" % self.pats |
|
277 |
|
278 def info(self): |
|
279 """A list of strings for displaying when dumping state.""" |
|
280 return self.pats |
|
281 |
|
282 def match(self, fpath): |
|
283 """Does `fpath` match one of our file name patterns?""" |
|
284 return self.re.match(fpath) is not None |
|
285 |
|
286 |
|
287 def sep(s): |
|
288 """Find the path separator used in this string, or os.sep if none.""" |
|
289 sep_match = re.search(r"[\\/]", s) |
|
290 if sep_match: |
|
291 the_sep = sep_match.group(0) |
|
292 else: |
|
293 the_sep = os.sep |
|
294 return the_sep |
|
295 |
|
296 |
|
297 def fnmatches_to_regex(patterns, case_insensitive=False, partial=False): |
|
298 """Convert fnmatch patterns to a compiled regex that matches any of them. |
|
299 |
|
300 Slashes are always converted to match either slash or backslash, for |
|
301 Windows support, even when running elsewhere. |
|
302 |
|
303 If `partial` is true, then the pattern will match if the target string |
|
304 starts with the pattern. Otherwise, it must match the entire string. |
|
305 |
|
306 Returns: a compiled regex object. Use the .match method to compare target |
|
307 strings. |
|
308 |
|
309 """ |
|
310 regexes = (fnmatch.translate(pattern) for pattern in patterns) |
|
311 # Python3.7 fnmatch translates "/" as "/". Before that, it translates as "\/", |
|
312 # so we have to deal with maybe a backslash. |
|
313 regexes = (re.sub(r"\\?/", r"[\\\\/]", regex) for regex in regexes) |
|
314 |
|
315 if partial: |
|
316 # fnmatch always adds a \Z to match the whole string, which we don't |
|
317 # want, so we remove the \Z. While removing it, we only replace \Z if |
|
318 # followed by paren (introducing flags), or at end, to keep from |
|
319 # destroying a literal \Z in the pattern. |
|
320 regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes) |
|
321 |
|
322 flags = 0 |
|
323 if case_insensitive: |
|
324 flags |= re.IGNORECASE |
|
325 compiled = re.compile(join_regex(regexes), flags=flags) |
|
326 |
|
327 return compiled |
|
328 |
|
329 |
|
330 class PathAliases(object): |
|
331 """A collection of aliases for paths. |
|
332 |
|
333 When combining data files from remote machines, often the paths to source |
|
334 code are different, for example, due to OS differences, or because of |
|
335 serialized checkouts on continuous integration machines. |
|
336 |
|
337 A `PathAliases` object tracks a list of pattern/result pairs, and can |
|
338 map a path through those aliases to produce a unified path. |
|
339 |
|
340 """ |
|
341 def __init__(self): |
|
342 self.aliases = [] |
|
343 |
|
344 def pprint(self): # pragma: debugging |
|
345 """Dump the important parts of the PathAliases, for debugging.""" |
|
346 for regex, result in self.aliases: |
|
347 print("{!r} --> {!r}".format(regex.pattern, result)) |
|
348 |
|
349 def add(self, pattern, result): |
|
350 """Add the `pattern`/`result` pair to the list of aliases. |
|
351 |
|
352 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
|
353 string. When mapping paths, if a path starts with a match against |
|
354 `pattern`, then that match is replaced with `result`. This models |
|
355 isomorphic source trees being rooted at different places on two |
|
356 different machines. |
|
357 |
|
358 `pattern` can't end with a wildcard component, since that would |
|
359 match an entire tree, and not just its root. |
|
360 |
|
361 """ |
|
362 if len(pattern) > 1: |
|
363 pattern = pattern.rstrip(r"\/") |
|
364 |
|
365 # The pattern can't end with a wildcard component. |
|
366 if pattern.endswith("*"): |
|
367 raise CoverageException("Pattern must not end with wildcards.") |
|
368 pattern_sep = sep(pattern) |
|
369 |
|
370 # The pattern is meant to match a filepath. Let's make it absolute |
|
371 # unless it already is, or is meant to match any prefix. |
|
372 if not pattern.startswith('*') and not isabs_anywhere(pattern): |
|
373 pattern = abs_file(pattern) |
|
374 if not pattern.endswith(pattern_sep): |
|
375 pattern += pattern_sep |
|
376 |
|
377 # Make a regex from the pattern. |
|
378 regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True) |
|
379 |
|
380 # Normalize the result: it must end with a path separator. |
|
381 result_sep = sep(result) |
|
382 result = result.rstrip(r"\/") + result_sep |
|
383 self.aliases.append((regex, result)) |
|
384 |
|
385 def map(self, path): |
|
386 """Map `path` through the aliases. |
|
387 |
|
388 `path` is checked against all of the patterns. The first pattern to |
|
389 match is used to replace the root of the path with the result root. |
|
390 Only one pattern is ever used. If no patterns match, `path` is |
|
391 returned unchanged. |
|
392 |
|
393 The separator style in the result is made to match that of the result |
|
394 in the alias. |
|
395 |
|
396 Returns the mapped path. If a mapping has happened, this is a |
|
397 canonical path. If no mapping has happened, it is the original value |
|
398 of `path` unchanged. |
|
399 |
|
400 """ |
|
401 for regex, result in self.aliases: |
|
402 m = regex.match(path) |
|
403 if m: |
|
404 new = path.replace(m.group(0), result) |
|
405 new = new.replace(sep(path), sep(result)) |
|
406 new = canonical_filename(new) |
|
407 return new |
|
408 return path |
|
409 |
|
410 |
|
411 def find_python_files(dirname): |
|
412 """Yield all of the importable Python files in `dirname`, recursively. |
|
413 |
|
414 To be importable, the files have to be in a directory with a __init__.py, |
|
415 except for `dirname` itself, which isn't required to have one. The |
|
416 assumption is that `dirname` was specified directly, so the user knows |
|
417 best, but sub-directories are checked for a __init__.py to be sure we only |
|
418 find the importable files. |
|
419 |
|
420 """ |
|
421 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
|
422 if i > 0 and '__init__.py' not in filenames: |
|
423 # If a directory doesn't have __init__.py, then it isn't |
|
424 # importable and neither are its files |
|
425 del dirnames[:] |
|
426 continue |
|
427 for filename in filenames: |
|
428 # We're only interested in files that look like reasonable Python |
|
429 # files: Must end with .py or .pyw, and must not have certain funny |
|
430 # characters that probably mean they are editor junk. |
|
431 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
|
432 yield os.path.join(dirpath, filename) |