DebugClients/Python/coverage/files.py

changeset 5141
bc64243b7672
parent 5126
d28b92dabc2b
parent 5140
01484c0afbc6
child 5144
1ab536d25072
equal deleted inserted replaced
5126:d28b92dabc2b 5141:bc64243b7672
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4 """File wrangling."""
5
6 import fnmatch
7 import ntpath
8 import os
9 import os.path
10 import posixpath
11 import re
12 import sys
13
14 from coverage import env
15 from coverage.backward import unicode_class
16 from coverage.misc import contract, CoverageException, join_regex, isolate_module
17
18
19 os = isolate_module(os)
20
21
22 def set_relative_directory():
23 """Set the directory that `relative_filename` will be relative to."""
24 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE
25
26 # The absolute path to our current directory.
27 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep)
28
29 # Cache of results of calling the canonical_filename() method, to
30 # avoid duplicating work.
31 CANONICAL_FILENAME_CACHE = {}
32
33
34 def relative_directory():
35 """Return the directory that `relative_filename` is relative to."""
36 return RELATIVE_DIR
37
38
39 @contract(returns='unicode')
40 def relative_filename(filename):
41 """Return the relative form of `filename`.
42
43 The file name will be relative to the current directory when the
44 `set_relative_directory` was called.
45
46 """
47 fnorm = os.path.normcase(filename)
48 if fnorm.startswith(RELATIVE_DIR):
49 filename = filename[len(RELATIVE_DIR):]
50 return unicode_filename(filename)
51
52
53 @contract(returns='unicode')
54 def canonical_filename(filename):
55 """Return a canonical file name for `filename`.
56
57 An absolute path with no redundant components and normalized case.
58
59 """
60 if filename not in CANONICAL_FILENAME_CACHE:
61 if not os.path.isabs(filename):
62 for path in [os.curdir] + sys.path:
63 if path is None:
64 continue
65 f = os.path.join(path, filename)
66 if os.path.exists(f):
67 filename = f
68 break
69 cf = abs_file(filename)
70 CANONICAL_FILENAME_CACHE[filename] = cf
71 return CANONICAL_FILENAME_CACHE[filename]
72
73
74 def flat_rootname(filename):
75 """A base for a flat file name to correspond to this file.
76
77 Useful for writing files about the code where you want all the files in
78 the same directory, but need to differentiate same-named files from
79 different directories.
80
81 For example, the file a/b/c.py will return 'a_b_c_py'
82
83 """
84 name = ntpath.splitdrive(filename)[1]
85 return re.sub(r"[\\/.:]", "_", name)
86
87
88 if env.WINDOWS:
89
90 _ACTUAL_PATH_CACHE = {}
91 _ACTUAL_PATH_LIST_CACHE = {}
92
93 def actual_path(path):
94 """Get the actual path of `path`, including the correct case."""
95 if env.PY2 and isinstance(path, unicode_class):
96 path = path.encode(sys.getfilesystemencoding())
97 if path in _ACTUAL_PATH_CACHE:
98 return _ACTUAL_PATH_CACHE[path]
99
100 head, tail = os.path.split(path)
101 if not tail:
102 # This means head is the drive spec: normalize it.
103 actpath = head.upper()
104 elif not head:
105 actpath = tail
106 else:
107 head = actual_path(head)
108 if head in _ACTUAL_PATH_LIST_CACHE:
109 files = _ACTUAL_PATH_LIST_CACHE[head]
110 else:
111 try:
112 files = os.listdir(head)
113 except OSError:
114 files = []
115 _ACTUAL_PATH_LIST_CACHE[head] = files
116 normtail = os.path.normcase(tail)
117 for f in files:
118 if os.path.normcase(f) == normtail:
119 tail = f
120 break
121 actpath = os.path.join(head, tail)
122 _ACTUAL_PATH_CACHE[path] = actpath
123 return actpath
124
125 else:
126 def actual_path(filename):
127 """The actual path for non-Windows platforms."""
128 return filename
129
130
131 if env.PY2:
132 @contract(returns='unicode')
133 def unicode_filename(filename):
134 """Return a Unicode version of `filename`."""
135 if isinstance(filename, str):
136 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
137 filename = filename.decode(encoding, "replace")
138 return filename
139 else:
140 @contract(filename='unicode', returns='unicode')
141 def unicode_filename(filename):
142 """Return a Unicode version of `filename`."""
143 return filename
144
145
146 @contract(returns='unicode')
147 def abs_file(filename):
148 """Return the absolute normalized form of `filename`."""
149 path = os.path.expandvars(os.path.expanduser(filename))
150 path = os.path.abspath(os.path.realpath(path))
151 path = actual_path(path)
152 path = unicode_filename(path)
153 return path
154
155
156 RELATIVE_DIR = None
157 CANONICAL_FILENAME_CACHE = None
158 set_relative_directory()
159
160
161 def isabs_anywhere(filename):
162 """Is `filename` an absolute path on any OS?"""
163 return ntpath.isabs(filename) or posixpath.isabs(filename)
164
165
166 def prep_patterns(patterns):
167 """Prepare the file patterns for use in a `FnmatchMatcher`.
168
169 If a pattern starts with a wildcard, it is used as a pattern
170 as-is. If it does not start with a wildcard, then it is made
171 absolute with the current directory.
172
173 If `patterns` is None, an empty list is returned.
174
175 """
176 prepped = []
177 for p in patterns or []:
178 if p.startswith(("*", "?")):
179 prepped.append(p)
180 else:
181 prepped.append(abs_file(p))
182 return prepped
183
184
185 class TreeMatcher(object):
186 """A matcher for files in a tree."""
187 def __init__(self, directories):
188 self.dirs = list(directories)
189
190 def __repr__(self):
191 return "<TreeMatcher %r>" % self.dirs
192
193 def info(self):
194 """A list of strings for displaying when dumping state."""
195 return self.dirs
196
197 def match(self, fpath):
198 """Does `fpath` indicate a file in one of our trees?"""
199 for d in self.dirs:
200 if fpath.startswith(d):
201 if fpath == d:
202 # This is the same file!
203 return True
204 if fpath[len(d)] == os.sep:
205 # This is a file in the directory
206 return True
207 return False
208
209
210 class ModuleMatcher(object):
211 """A matcher for modules in a tree."""
212 def __init__(self, module_names):
213 self.modules = list(module_names)
214
215 def __repr__(self):
216 return "<ModuleMatcher %r>" % (self.modules)
217
218 def info(self):
219 """A list of strings for displaying when dumping state."""
220 return self.modules
221
222 def match(self, module_name):
223 """Does `module_name` indicate a module in one of our packages?"""
224 if not module_name:
225 return False
226
227 for m in self.modules:
228 if module_name.startswith(m):
229 if module_name == m:
230 return True
231 if module_name[len(m)] == '.':
232 # This is a module in the package
233 return True
234
235 return False
236
237
238 class FnmatchMatcher(object):
239 """A matcher for files by file name pattern."""
240 def __init__(self, pats):
241 self.pats = pats[:]
242 # fnmatch is platform-specific. On Windows, it does the Windows thing
243 # of treating / and \ as equivalent. But on other platforms, we need to
244 # take care of that ourselves.
245 fnpats = (fnmatch.translate(p) for p in pats)
246 fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats)
247 if env.WINDOWS:
248 # Windows is also case-insensitive. BTW: the regex docs say that
249 # flags like (?i) have to be at the beginning, but fnmatch puts
250 # them at the end, and having two there seems to work fine.
251 fnpats = (p + "(?i)" for p in fnpats)
252 self.re = re.compile(join_regex(fnpats))
253
254 def __repr__(self):
255 return "<FnmatchMatcher %r>" % self.pats
256
257 def info(self):
258 """A list of strings for displaying when dumping state."""
259 return self.pats
260
261 def match(self, fpath):
262 """Does `fpath` match one of our file name patterns?"""
263 return self.re.match(fpath) is not None
264
265
266 def sep(s):
267 """Find the path separator used in this string, or os.sep if none."""
268 sep_match = re.search(r"[\\/]", s)
269 if sep_match:
270 the_sep = sep_match.group(0)
271 else:
272 the_sep = os.sep
273 return the_sep
274
275
276 class PathAliases(object):
277 """A collection of aliases for paths.
278
279 When combining data files from remote machines, often the paths to source
280 code are different, for example, due to OS differences, or because of
281 serialized checkouts on continuous integration machines.
282
283 A `PathAliases` object tracks a list of pattern/result pairs, and can
284 map a path through those aliases to produce a unified path.
285
286 """
287 def __init__(self):
288 self.aliases = []
289
290 def add(self, pattern, result):
291 """Add the `pattern`/`result` pair to the list of aliases.
292
293 `pattern` is an `fnmatch`-style pattern. `result` is a simple
294 string. When mapping paths, if a path starts with a match against
295 `pattern`, then that match is replaced with `result`. This models
296 isomorphic source trees being rooted at different places on two
297 different machines.
298
299 `pattern` can't end with a wildcard component, since that would
300 match an entire tree, and not just its root.
301
302 """
303 # The pattern can't end with a wildcard component.
304 pattern = pattern.rstrip(r"\/")
305 if pattern.endswith("*"):
306 raise CoverageException("Pattern must not end with wildcards.")
307 pattern_sep = sep(pattern)
308
309 # The pattern is meant to match a filepath. Let's make it absolute
310 # unless it already is, or is meant to match any prefix.
311 if not pattern.startswith('*') and not isabs_anywhere(pattern):
312 pattern = abs_file(pattern)
313 pattern += pattern_sep
314
315 # Make a regex from the pattern. fnmatch always adds a \Z to
316 # match the whole string, which we don't want.
317 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
318
319 # We want */a/b.py to match on Windows too, so change slash to match
320 # either separator.
321 regex_pat = regex_pat.replace(r"\/", r"[\\/]")
322 # We want case-insensitive matching, so add that flag.
323 regex = re.compile(r"(?i)" + regex_pat)
324
325 # Normalize the result: it must end with a path separator.
326 result_sep = sep(result)
327 result = result.rstrip(r"\/") + result_sep
328 self.aliases.append((regex, result, pattern_sep, result_sep))
329
330 def map(self, path):
331 """Map `path` through the aliases.
332
333 `path` is checked against all of the patterns. The first pattern to
334 match is used to replace the root of the path with the result root.
335 Only one pattern is ever used. If no patterns match, `path` is
336 returned unchanged.
337
338 The separator style in the result is made to match that of the result
339 in the alias.
340
341 Returns the mapped path. If a mapping has happened, this is a
342 canonical path. If no mapping has happened, it is the original value
343 of `path` unchanged.
344
345 """
346 for regex, result, pattern_sep, result_sep in self.aliases:
347 m = regex.match(path)
348 if m:
349 new = path.replace(m.group(0), result)
350 if pattern_sep != result_sep:
351 new = new.replace(pattern_sep, result_sep)
352 new = canonical_filename(new)
353 return new
354 return path
355
356
357 def find_python_files(dirname):
358 """Yield all of the importable Python files in `dirname`, recursively.
359
360 To be importable, the files have to be in a directory with a __init__.py,
361 except for `dirname` itself, which isn't required to have one. The
362 assumption is that `dirname` was specified directly, so the user knows
363 best, but sub-directories are checked for a __init__.py to be sure we only
364 find the importable files.
365
366 """
367 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
368 if i > 0 and '__init__.py' not in filenames:
369 # If a directory doesn't have __init__.py, then it isn't
370 # importable and neither are its files
371 del dirnames[:]
372 continue
373 for filename in filenames:
374 # We're only interested in files that look like reasonable Python
375 # files: Must end with .py or .pyw, and must not have certain funny
376 # characters that probably mean they are editor junk.
377 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
378 yield os.path.join(dirpath, filename)
379
380 #
381 # eflag: FileType = Python2

eric ide

mercurial