1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 |
|
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt |
|
3 |
|
4 """File wrangling.""" |
|
5 |
|
6 import fnmatch |
|
7 import ntpath |
|
8 import os |
|
9 import os.path |
|
10 import posixpath |
|
11 import re |
|
12 import sys |
|
13 |
|
14 from coverage import env |
|
15 from coverage.backward import unicode_class |
|
16 from coverage.misc import contract, CoverageException, join_regex, isolate_module |
|
17 |
|
18 |
|
19 os = isolate_module(os) |
|
20 |
|
21 |
|
22 def set_relative_directory(): |
|
23 """Set the directory that `relative_filename` will be relative to.""" |
|
24 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE |
|
25 |
|
26 # The absolute path to our current directory. |
|
27 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep) |
|
28 |
|
29 # Cache of results of calling the canonical_filename() method, to |
|
30 # avoid duplicating work. |
|
31 CANONICAL_FILENAME_CACHE = {} |
|
32 |
|
33 |
|
34 def relative_directory(): |
|
35 """Return the directory that `relative_filename` is relative to.""" |
|
36 return RELATIVE_DIR |
|
37 |
|
38 |
|
39 @contract(returns='unicode') |
|
40 def relative_filename(filename): |
|
41 """Return the relative form of `filename`. |
|
42 |
|
43 The file name will be relative to the current directory when the |
|
44 `set_relative_directory` was called. |
|
45 |
|
46 """ |
|
47 fnorm = os.path.normcase(filename) |
|
48 if fnorm.startswith(RELATIVE_DIR): |
|
49 filename = filename[len(RELATIVE_DIR):] |
|
50 return unicode_filename(filename) |
|
51 |
|
52 |
|
53 @contract(returns='unicode') |
|
54 def canonical_filename(filename): |
|
55 """Return a canonical file name for `filename`. |
|
56 |
|
57 An absolute path with no redundant components and normalized case. |
|
58 |
|
59 """ |
|
60 if filename not in CANONICAL_FILENAME_CACHE: |
|
61 if not os.path.isabs(filename): |
|
62 for path in [os.curdir] + sys.path: |
|
63 if path is None: |
|
64 continue |
|
65 f = os.path.join(path, filename) |
|
66 if os.path.exists(f): |
|
67 filename = f |
|
68 break |
|
69 cf = abs_file(filename) |
|
70 CANONICAL_FILENAME_CACHE[filename] = cf |
|
71 return CANONICAL_FILENAME_CACHE[filename] |
|
72 |
|
73 |
|
74 def flat_rootname(filename): |
|
75 """A base for a flat file name to correspond to this file. |
|
76 |
|
77 Useful for writing files about the code where you want all the files in |
|
78 the same directory, but need to differentiate same-named files from |
|
79 different directories. |
|
80 |
|
81 For example, the file a/b/c.py will return 'a_b_c_py' |
|
82 |
|
83 """ |
|
84 name = ntpath.splitdrive(filename)[1] |
|
85 return re.sub(r"[\\/.:]", "_", name) |
|
86 |
|
87 |
|
88 if env.WINDOWS: |
|
89 |
|
90 _ACTUAL_PATH_CACHE = {} |
|
91 _ACTUAL_PATH_LIST_CACHE = {} |
|
92 |
|
93 def actual_path(path): |
|
94 """Get the actual path of `path`, including the correct case.""" |
|
95 if env.PY2 and isinstance(path, unicode_class): |
|
96 path = path.encode(sys.getfilesystemencoding()) |
|
97 if path in _ACTUAL_PATH_CACHE: |
|
98 return _ACTUAL_PATH_CACHE[path] |
|
99 |
|
100 head, tail = os.path.split(path) |
|
101 if not tail: |
|
102 # This means head is the drive spec: normalize it. |
|
103 actpath = head.upper() |
|
104 elif not head: |
|
105 actpath = tail |
|
106 else: |
|
107 head = actual_path(head) |
|
108 if head in _ACTUAL_PATH_LIST_CACHE: |
|
109 files = _ACTUAL_PATH_LIST_CACHE[head] |
|
110 else: |
|
111 try: |
|
112 files = os.listdir(head) |
|
113 except OSError: |
|
114 files = [] |
|
115 _ACTUAL_PATH_LIST_CACHE[head] = files |
|
116 normtail = os.path.normcase(tail) |
|
117 for f in files: |
|
118 if os.path.normcase(f) == normtail: |
|
119 tail = f |
|
120 break |
|
121 actpath = os.path.join(head, tail) |
|
122 _ACTUAL_PATH_CACHE[path] = actpath |
|
123 return actpath |
|
124 |
|
125 else: |
|
126 def actual_path(filename): |
|
127 """The actual path for non-Windows platforms.""" |
|
128 return filename |
|
129 |
|
130 |
|
131 if env.PY2: |
|
132 @contract(returns='unicode') |
|
133 def unicode_filename(filename): |
|
134 """Return a Unicode version of `filename`.""" |
|
135 if isinstance(filename, str): |
|
136 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() |
|
137 filename = filename.decode(encoding, "replace") |
|
138 return filename |
|
139 else: |
|
140 @contract(filename='unicode', returns='unicode') |
|
141 def unicode_filename(filename): |
|
142 """Return a Unicode version of `filename`.""" |
|
143 return filename |
|
144 |
|
145 |
|
146 @contract(returns='unicode') |
|
147 def abs_file(filename): |
|
148 """Return the absolute normalized form of `filename`.""" |
|
149 path = os.path.expandvars(os.path.expanduser(filename)) |
|
150 path = os.path.abspath(os.path.realpath(path)) |
|
151 path = actual_path(path) |
|
152 path = unicode_filename(path) |
|
153 return path |
|
154 |
|
155 |
|
156 RELATIVE_DIR = None |
|
157 CANONICAL_FILENAME_CACHE = None |
|
158 set_relative_directory() |
|
159 |
|
160 |
|
161 def isabs_anywhere(filename): |
|
162 """Is `filename` an absolute path on any OS?""" |
|
163 return ntpath.isabs(filename) or posixpath.isabs(filename) |
|
164 |
|
165 |
|
166 def prep_patterns(patterns): |
|
167 """Prepare the file patterns for use in a `FnmatchMatcher`. |
|
168 |
|
169 If a pattern starts with a wildcard, it is used as a pattern |
|
170 as-is. If it does not start with a wildcard, then it is made |
|
171 absolute with the current directory. |
|
172 |
|
173 If `patterns` is None, an empty list is returned. |
|
174 |
|
175 """ |
|
176 prepped = [] |
|
177 for p in patterns or []: |
|
178 if p.startswith(("*", "?")): |
|
179 prepped.append(p) |
|
180 else: |
|
181 prepped.append(abs_file(p)) |
|
182 return prepped |
|
183 |
|
184 |
|
185 class TreeMatcher(object): |
|
186 """A matcher for files in a tree.""" |
|
187 def __init__(self, directories): |
|
188 self.dirs = list(directories) |
|
189 |
|
190 def __repr__(self): |
|
191 return "<TreeMatcher %r>" % self.dirs |
|
192 |
|
193 def info(self): |
|
194 """A list of strings for displaying when dumping state.""" |
|
195 return self.dirs |
|
196 |
|
197 def match(self, fpath): |
|
198 """Does `fpath` indicate a file in one of our trees?""" |
|
199 for d in self.dirs: |
|
200 if fpath.startswith(d): |
|
201 if fpath == d: |
|
202 # This is the same file! |
|
203 return True |
|
204 if fpath[len(d)] == os.sep: |
|
205 # This is a file in the directory |
|
206 return True |
|
207 return False |
|
208 |
|
209 |
|
210 class ModuleMatcher(object): |
|
211 """A matcher for modules in a tree.""" |
|
212 def __init__(self, module_names): |
|
213 self.modules = list(module_names) |
|
214 |
|
215 def __repr__(self): |
|
216 return "<ModuleMatcher %r>" % (self.modules) |
|
217 |
|
218 def info(self): |
|
219 """A list of strings for displaying when dumping state.""" |
|
220 return self.modules |
|
221 |
|
222 def match(self, module_name): |
|
223 """Does `module_name` indicate a module in one of our packages?""" |
|
224 if not module_name: |
|
225 return False |
|
226 |
|
227 for m in self.modules: |
|
228 if module_name.startswith(m): |
|
229 if module_name == m: |
|
230 return True |
|
231 if module_name[len(m)] == '.': |
|
232 # This is a module in the package |
|
233 return True |
|
234 |
|
235 return False |
|
236 |
|
237 |
|
238 class FnmatchMatcher(object): |
|
239 """A matcher for files by file name pattern.""" |
|
240 def __init__(self, pats): |
|
241 self.pats = pats[:] |
|
242 # fnmatch is platform-specific. On Windows, it does the Windows thing |
|
243 # of treating / and \ as equivalent. But on other platforms, we need to |
|
244 # take care of that ourselves. |
|
245 fnpats = (fnmatch.translate(p) for p in pats) |
|
246 fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats) |
|
247 if env.WINDOWS: |
|
248 # Windows is also case-insensitive. BTW: the regex docs say that |
|
249 # flags like (?i) have to be at the beginning, but fnmatch puts |
|
250 # them at the end, and having two there seems to work fine. |
|
251 fnpats = (p + "(?i)" for p in fnpats) |
|
252 self.re = re.compile(join_regex(fnpats)) |
|
253 |
|
254 def __repr__(self): |
|
255 return "<FnmatchMatcher %r>" % self.pats |
|
256 |
|
257 def info(self): |
|
258 """A list of strings for displaying when dumping state.""" |
|
259 return self.pats |
|
260 |
|
261 def match(self, fpath): |
|
262 """Does `fpath` match one of our file name patterns?""" |
|
263 return self.re.match(fpath) is not None |
|
264 |
|
265 |
|
266 def sep(s): |
|
267 """Find the path separator used in this string, or os.sep if none.""" |
|
268 sep_match = re.search(r"[\\/]", s) |
|
269 if sep_match: |
|
270 the_sep = sep_match.group(0) |
|
271 else: |
|
272 the_sep = os.sep |
|
273 return the_sep |
|
274 |
|
275 |
|
276 class PathAliases(object): |
|
277 """A collection of aliases for paths. |
|
278 |
|
279 When combining data files from remote machines, often the paths to source |
|
280 code are different, for example, due to OS differences, or because of |
|
281 serialized checkouts on continuous integration machines. |
|
282 |
|
283 A `PathAliases` object tracks a list of pattern/result pairs, and can |
|
284 map a path through those aliases to produce a unified path. |
|
285 |
|
286 """ |
|
287 def __init__(self): |
|
288 self.aliases = [] |
|
289 |
|
290 def add(self, pattern, result): |
|
291 """Add the `pattern`/`result` pair to the list of aliases. |
|
292 |
|
293 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
|
294 string. When mapping paths, if a path starts with a match against |
|
295 `pattern`, then that match is replaced with `result`. This models |
|
296 isomorphic source trees being rooted at different places on two |
|
297 different machines. |
|
298 |
|
299 `pattern` can't end with a wildcard component, since that would |
|
300 match an entire tree, and not just its root. |
|
301 |
|
302 """ |
|
303 # The pattern can't end with a wildcard component. |
|
304 pattern = pattern.rstrip(r"\/") |
|
305 if pattern.endswith("*"): |
|
306 raise CoverageException("Pattern must not end with wildcards.") |
|
307 pattern_sep = sep(pattern) |
|
308 |
|
309 # The pattern is meant to match a filepath. Let's make it absolute |
|
310 # unless it already is, or is meant to match any prefix. |
|
311 if not pattern.startswith('*') and not isabs_anywhere(pattern): |
|
312 pattern = abs_file(pattern) |
|
313 pattern += pattern_sep |
|
314 |
|
315 # Make a regex from the pattern. fnmatch always adds a \Z to |
|
316 # match the whole string, which we don't want. |
|
317 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') |
|
318 |
|
319 # We want */a/b.py to match on Windows too, so change slash to match |
|
320 # either separator. |
|
321 regex_pat = regex_pat.replace(r"\/", r"[\\/]") |
|
322 # We want case-insensitive matching, so add that flag. |
|
323 regex = re.compile(r"(?i)" + regex_pat) |
|
324 |
|
325 # Normalize the result: it must end with a path separator. |
|
326 result_sep = sep(result) |
|
327 result = result.rstrip(r"\/") + result_sep |
|
328 self.aliases.append((regex, result, pattern_sep, result_sep)) |
|
329 |
|
330 def map(self, path): |
|
331 """Map `path` through the aliases. |
|
332 |
|
333 `path` is checked against all of the patterns. The first pattern to |
|
334 match is used to replace the root of the path with the result root. |
|
335 Only one pattern is ever used. If no patterns match, `path` is |
|
336 returned unchanged. |
|
337 |
|
338 The separator style in the result is made to match that of the result |
|
339 in the alias. |
|
340 |
|
341 Returns the mapped path. If a mapping has happened, this is a |
|
342 canonical path. If no mapping has happened, it is the original value |
|
343 of `path` unchanged. |
|
344 |
|
345 """ |
|
346 for regex, result, pattern_sep, result_sep in self.aliases: |
|
347 m = regex.match(path) |
|
348 if m: |
|
349 new = path.replace(m.group(0), result) |
|
350 if pattern_sep != result_sep: |
|
351 new = new.replace(pattern_sep, result_sep) |
|
352 new = canonical_filename(new) |
|
353 return new |
|
354 return path |
|
355 |
|
356 |
|
357 def find_python_files(dirname): |
|
358 """Yield all of the importable Python files in `dirname`, recursively. |
|
359 |
|
360 To be importable, the files have to be in a directory with a __init__.py, |
|
361 except for `dirname` itself, which isn't required to have one. The |
|
362 assumption is that `dirname` was specified directly, so the user knows |
|
363 best, but sub-directories are checked for a __init__.py to be sure we only |
|
364 find the importable files. |
|
365 |
|
366 """ |
|
367 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
|
368 if i > 0 and '__init__.py' not in filenames: |
|
369 # If a directory doesn't have __init__.py, then it isn't |
|
370 # importable and neither are its files |
|
371 del dirnames[:] |
|
372 continue |
|
373 for filename in filenames: |
|
374 # We're only interested in files that look like reasonable Python |
|
375 # files: Must end with .py or .pyw, and must not have certain funny |
|
376 # characters that probably mean they are editor junk. |
|
377 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
|
378 yield os.path.join(dirpath, filename) |
|
379 |
|
380 # |
|
381 # eflag: FileType = Python2 |
|