|
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
|
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt |
|
3 |
|
4 """File wrangling.""" |
|
5 |
|
6 import hashlib |
|
7 import fnmatch |
|
8 import ntpath |
|
9 import os |
|
10 import os.path |
|
11 import posixpath |
|
12 import re |
|
13 import sys |
|
14 |
|
15 from coverage import env |
|
16 from coverage.backward import unicode_class |
|
17 from coverage.misc import contract, CoverageException, join_regex, isolate_module |
|
18 |
|
19 |
|
20 os = isolate_module(os) |
|
21 |
|
22 |
|
23 def set_relative_directory(): |
|
24 """Set the directory that `relative_filename` will be relative to.""" |
|
25 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE |
|
26 |
|
27 # The absolute path to our current directory. |
|
28 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep) |
|
29 |
|
30 # Cache of results of calling the canonical_filename() method, to |
|
31 # avoid duplicating work. |
|
32 CANONICAL_FILENAME_CACHE = {} |
|
33 |
|
34 |
|
35 def relative_directory(): |
|
36 """Return the directory that `relative_filename` is relative to.""" |
|
37 return RELATIVE_DIR |
|
38 |
|
39 |
|
40 @contract(returns='unicode') |
|
41 def relative_filename(filename): |
|
42 """Return the relative form of `filename`. |
|
43 |
|
44 The file name will be relative to the current directory when the |
|
45 `set_relative_directory` was called. |
|
46 |
|
47 """ |
|
48 fnorm = os.path.normcase(filename) |
|
49 if fnorm.startswith(RELATIVE_DIR): |
|
50 filename = filename[len(RELATIVE_DIR):] |
|
51 return unicode_filename(filename) |
|
52 |
|
53 |
|
54 @contract(returns='unicode') |
|
55 def canonical_filename(filename): |
|
56 """Return a canonical file name for `filename`. |
|
57 |
|
58 An absolute path with no redundant components and normalized case. |
|
59 |
|
60 """ |
|
61 if filename not in CANONICAL_FILENAME_CACHE: |
|
62 if not os.path.isabs(filename): |
|
63 for path in [os.curdir] + sys.path: |
|
64 if path is None: |
|
65 continue |
|
66 f = os.path.join(path, filename) |
|
67 try: |
|
68 exists = os.path.exists(f) |
|
69 except UnicodeError: |
|
70 exists = False |
|
71 if exists: |
|
72 filename = f |
|
73 break |
|
74 cf = abs_file(filename) |
|
75 CANONICAL_FILENAME_CACHE[filename] = cf |
|
76 return CANONICAL_FILENAME_CACHE[filename] |
|
77 |
|
78 |
|
79 MAX_FLAT = 200 |
|
80 |
|
81 @contract(filename='unicode', returns='unicode') |
|
82 def flat_rootname(filename): |
|
83 """A base for a flat file name to correspond to this file. |
|
84 |
|
85 Useful for writing files about the code where you want all the files in |
|
86 the same directory, but need to differentiate same-named files from |
|
87 different directories. |
|
88 |
|
89 For example, the file a/b/c.py will return 'a_b_c_py' |
|
90 |
|
91 """ |
|
92 name = ntpath.splitdrive(filename)[1] |
|
93 name = re.sub(r"[\\/.:]", "_", name) |
|
94 if len(name) > MAX_FLAT: |
|
95 h = hashlib.sha1(name.encode('UTF-8')).hexdigest() |
|
96 name = name[-(MAX_FLAT-len(h)-1):] + '_' + h |
|
97 return name |
|
98 |
|
99 |
|
100 if env.WINDOWS: |
|
101 |
|
102 _ACTUAL_PATH_CACHE = {} |
|
103 _ACTUAL_PATH_LIST_CACHE = {} |
|
104 |
|
105 def actual_path(path): |
|
106 """Get the actual path of `path`, including the correct case.""" |
|
107 if env.PY2 and isinstance(path, unicode_class): |
|
108 path = path.encode(sys.getfilesystemencoding()) |
|
109 if path in _ACTUAL_PATH_CACHE: |
|
110 return _ACTUAL_PATH_CACHE[path] |
|
111 |
|
112 head, tail = os.path.split(path) |
|
113 if not tail: |
|
114 # This means head is the drive spec: normalize it. |
|
115 actpath = head.upper() |
|
116 elif not head: |
|
117 actpath = tail |
|
118 else: |
|
119 head = actual_path(head) |
|
120 if head in _ACTUAL_PATH_LIST_CACHE: |
|
121 files = _ACTUAL_PATH_LIST_CACHE[head] |
|
122 else: |
|
123 try: |
|
124 files = os.listdir(head) |
|
125 except OSError: |
|
126 files = [] |
|
127 _ACTUAL_PATH_LIST_CACHE[head] = files |
|
128 normtail = os.path.normcase(tail) |
|
129 for f in files: |
|
130 if os.path.normcase(f) == normtail: |
|
131 tail = f |
|
132 break |
|
133 actpath = os.path.join(head, tail) |
|
134 _ACTUAL_PATH_CACHE[path] = actpath |
|
135 return actpath |
|
136 |
|
137 else: |
|
138 def actual_path(filename): |
|
139 """The actual path for non-Windows platforms.""" |
|
140 return filename |
|
141 |
|
142 |
|
143 if env.PY2: |
|
144 @contract(returns='unicode') |
|
145 def unicode_filename(filename): |
|
146 """Return a Unicode version of `filename`.""" |
|
147 if isinstance(filename, str): |
|
148 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() |
|
149 filename = filename.decode(encoding, "replace") |
|
150 return filename |
|
151 else: |
|
152 @contract(filename='unicode', returns='unicode') |
|
153 def unicode_filename(filename): |
|
154 """Return a Unicode version of `filename`.""" |
|
155 return filename |
|
156 |
|
157 |
|
158 @contract(returns='unicode') |
|
159 def abs_file(filename): |
|
160 """Return the absolute normalized form of `filename`.""" |
|
161 path = os.path.expandvars(os.path.expanduser(filename)) |
|
162 try: |
|
163 path = os.path.realpath(path) |
|
164 except UnicodeError: |
|
165 pass |
|
166 path = os.path.abspath(path) |
|
167 path = actual_path(path) |
|
168 path = unicode_filename(path) |
|
169 return path |
|
170 |
|
171 |
|
172 RELATIVE_DIR = None |
|
173 CANONICAL_FILENAME_CACHE = None |
|
174 set_relative_directory() |
|
175 |
|
176 |
|
177 def isabs_anywhere(filename): |
|
178 """Is `filename` an absolute path on any OS?""" |
|
179 return ntpath.isabs(filename) or posixpath.isabs(filename) |
|
180 |
|
181 |
|
182 def prep_patterns(patterns): |
|
183 """Prepare the file patterns for use in a `FnmatchMatcher`. |
|
184 |
|
185 If a pattern starts with a wildcard, it is used as a pattern |
|
186 as-is. If it does not start with a wildcard, then it is made |
|
187 absolute with the current directory. |
|
188 |
|
189 If `patterns` is None, an empty list is returned. |
|
190 |
|
191 """ |
|
192 prepped = [] |
|
193 for p in patterns or []: |
|
194 if p.startswith(("*", "?")): |
|
195 prepped.append(p) |
|
196 else: |
|
197 prepped.append(abs_file(p)) |
|
198 return prepped |
|
199 |
|
200 |
|
201 class TreeMatcher(object): |
|
202 """A matcher for files in a tree. |
|
203 |
|
204 Construct with a list of paths, either files or directories. Paths match |
|
205 with the `match` method if they are one of the files, or if they are |
|
206 somewhere in a subtree rooted at one of the directories. |
|
207 |
|
208 """ |
|
209 def __init__(self, paths): |
|
210 self.paths = list(paths) |
|
211 |
|
212 def __repr__(self): |
|
213 return "<TreeMatcher %r>" % self.paths |
|
214 |
|
215 def info(self): |
|
216 """A list of strings for displaying when dumping state.""" |
|
217 return self.paths |
|
218 |
|
219 def match(self, fpath): |
|
220 """Does `fpath` indicate a file in one of our trees?""" |
|
221 for p in self.paths: |
|
222 if fpath.startswith(p): |
|
223 if fpath == p: |
|
224 # This is the same file! |
|
225 return True |
|
226 if fpath[len(p)] == os.sep: |
|
227 # This is a file in the directory |
|
228 return True |
|
229 return False |
|
230 |
|
231 |
|
232 class ModuleMatcher(object): |
|
233 """A matcher for modules in a tree.""" |
|
234 def __init__(self, module_names): |
|
235 self.modules = list(module_names) |
|
236 |
|
237 def __repr__(self): |
|
238 return "<ModuleMatcher %r>" % (self.modules) |
|
239 |
|
240 def info(self): |
|
241 """A list of strings for displaying when dumping state.""" |
|
242 return self.modules |
|
243 |
|
244 def match(self, module_name): |
|
245 """Does `module_name` indicate a module in one of our packages?""" |
|
246 if not module_name: |
|
247 return False |
|
248 |
|
249 for m in self.modules: |
|
250 if module_name.startswith(m): |
|
251 if module_name == m: |
|
252 return True |
|
253 if module_name[len(m)] == '.': |
|
254 # This is a module in the package |
|
255 return True |
|
256 |
|
257 return False |
|
258 |
|
259 |
|
260 class FnmatchMatcher(object): |
|
261 """A matcher for files by file name pattern.""" |
|
262 def __init__(self, pats): |
|
263 self.pats = list(pats) |
|
264 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS) |
|
265 |
|
266 def __repr__(self): |
|
267 return "<FnmatchMatcher %r>" % self.pats |
|
268 |
|
269 def info(self): |
|
270 """A list of strings for displaying when dumping state.""" |
|
271 return self.pats |
|
272 |
|
273 def match(self, fpath): |
|
274 """Does `fpath` match one of our file name patterns?""" |
|
275 return self.re.match(fpath) is not None |
|
276 |
|
277 |
|
278 def sep(s): |
|
279 """Find the path separator used in this string, or os.sep if none.""" |
|
280 sep_match = re.search(r"[\\/]", s) |
|
281 if sep_match: |
|
282 the_sep = sep_match.group(0) |
|
283 else: |
|
284 the_sep = os.sep |
|
285 return the_sep |
|
286 |
|
287 |
|
288 def fnmatches_to_regex(patterns, case_insensitive=False, partial=False): |
|
289 """Convert fnmatch patterns to a compiled regex that matches any of them. |
|
290 |
|
291 Slashes are always converted to match either slash or backslash, for |
|
292 Windows support, even when running elsewhere. |
|
293 |
|
294 If `partial` is true, then the pattern will match if the target string |
|
295 starts with the pattern. Otherwise, it must match the entire string. |
|
296 |
|
297 Returns: a compiled regex object. Use the .match method to compare target |
|
298 strings. |
|
299 |
|
300 """ |
|
301 regexes = (fnmatch.translate(pattern) for pattern in patterns) |
|
302 # Python3.7 fnmatch translates "/" as "/". Before that, it translates as "\/", |
|
303 # so we have to deal with maybe a backslash. |
|
304 regexes = (re.sub(r"\\?/", r"[\\\\/]", regex) for regex in regexes) |
|
305 |
|
306 if partial: |
|
307 # fnmatch always adds a \Z to match the whole string, which we don't |
|
308 # want, so we remove the \Z. While removing it, we only replace \Z if |
|
309 # followed by paren (introducing flags), or at end, to keep from |
|
310 # destroying a literal \Z in the pattern. |
|
311 regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes) |
|
312 |
|
313 flags = 0 |
|
314 if case_insensitive: |
|
315 flags |= re.IGNORECASE |
|
316 compiled = re.compile(join_regex(regexes), flags=flags) |
|
317 |
|
318 return compiled |
|
319 |
|
320 |
|
321 class PathAliases(object): |
|
322 """A collection of aliases for paths. |
|
323 |
|
324 When combining data files from remote machines, often the paths to source |
|
325 code are different, for example, due to OS differences, or because of |
|
326 serialized checkouts on continuous integration machines. |
|
327 |
|
328 A `PathAliases` object tracks a list of pattern/result pairs, and can |
|
329 map a path through those aliases to produce a unified path. |
|
330 |
|
331 """ |
|
332 def __init__(self): |
|
333 self.aliases = [] |
|
334 |
|
335 def pprint(self): # pragma: debugging |
|
336 """Dump the important parts of the PathAliases, for debugging.""" |
|
337 for regex, result in self.aliases: |
|
338 print("{0!r} --> {1!r}".format(regex.pattern, result)) |
|
339 |
|
340 def add(self, pattern, result): |
|
341 """Add the `pattern`/`result` pair to the list of aliases. |
|
342 |
|
343 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
|
344 string. When mapping paths, if a path starts with a match against |
|
345 `pattern`, then that match is replaced with `result`. This models |
|
346 isomorphic source trees being rooted at different places on two |
|
347 different machines. |
|
348 |
|
349 `pattern` can't end with a wildcard component, since that would |
|
350 match an entire tree, and not just its root. |
|
351 |
|
352 """ |
|
353 if len(pattern) > 1: |
|
354 pattern = pattern.rstrip(r"\/") |
|
355 |
|
356 # The pattern can't end with a wildcard component. |
|
357 if pattern.endswith("*"): |
|
358 raise CoverageException("Pattern must not end with wildcards.") |
|
359 pattern_sep = sep(pattern) |
|
360 |
|
361 # The pattern is meant to match a filepath. Let's make it absolute |
|
362 # unless it already is, or is meant to match any prefix. |
|
363 if not pattern.startswith('*') and not isabs_anywhere(pattern): |
|
364 pattern = abs_file(pattern) |
|
365 if not pattern.endswith(pattern_sep): |
|
366 pattern += pattern_sep |
|
367 |
|
368 # Make a regex from the pattern. |
|
369 regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True) |
|
370 |
|
371 # Normalize the result: it must end with a path separator. |
|
372 result_sep = sep(result) |
|
373 result = result.rstrip(r"\/") + result_sep |
|
374 self.aliases.append((regex, result)) |
|
375 |
|
376 def map(self, path): |
|
377 """Map `path` through the aliases. |
|
378 |
|
379 `path` is checked against all of the patterns. The first pattern to |
|
380 match is used to replace the root of the path with the result root. |
|
381 Only one pattern is ever used. If no patterns match, `path` is |
|
382 returned unchanged. |
|
383 |
|
384 The separator style in the result is made to match that of the result |
|
385 in the alias. |
|
386 |
|
387 Returns the mapped path. If a mapping has happened, this is a |
|
388 canonical path. If no mapping has happened, it is the original value |
|
389 of `path` unchanged. |
|
390 |
|
391 """ |
|
392 for regex, result in self.aliases: |
|
393 m = regex.match(path) |
|
394 if m: |
|
395 new = path.replace(m.group(0), result) |
|
396 new = new.replace(sep(path), sep(result)) |
|
397 new = canonical_filename(new) |
|
398 return new |
|
399 return path |
|
400 |
|
401 |
|
402 def find_python_files(dirname): |
|
403 """Yield all of the importable Python files in `dirname`, recursively. |
|
404 |
|
405 To be importable, the files have to be in a directory with a __init__.py, |
|
406 except for `dirname` itself, which isn't required to have one. The |
|
407 assumption is that `dirname` was specified directly, so the user knows |
|
408 best, but sub-directories are checked for a __init__.py to be sure we only |
|
409 find the importable files. |
|
410 |
|
411 """ |
|
412 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
|
413 if i > 0 and '__init__.py' not in filenames: |
|
414 # If a directory doesn't have __init__.py, then it isn't |
|
415 # importable and neither are its files |
|
416 del dirnames[:] |
|
417 continue |
|
418 for filename in filenames: |
|
419 # We're only interested in files that look like reasonable Python |
|
420 # files: Must end with .py or .pyw, and must not have certain funny |
|
421 # characters that probably mean they are editor junk. |
|
422 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
|
423 yield os.path.join(dirpath, filename) |