eric6/DebugClients/Python/coverage/files.py

changeset 6942
2602857055c5
parent 6649
f1b3a73831c9
child 7427
362cd1b6f81a
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3
4 """File wrangling."""
5
6 import hashlib
7 import fnmatch
8 import ntpath
9 import os
10 import os.path
11 import posixpath
12 import re
13 import sys
14
15 from coverage import env
16 from coverage.backward import unicode_class
17 from coverage.misc import contract, CoverageException, join_regex, isolate_module
18
19
20 os = isolate_module(os)
21
22
23 def set_relative_directory():
24 """Set the directory that `relative_filename` will be relative to."""
25 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE
26
27 # The absolute path to our current directory.
28 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep)
29
30 # Cache of results of calling the canonical_filename() method, to
31 # avoid duplicating work.
32 CANONICAL_FILENAME_CACHE = {}
33
34
35 def relative_directory():
36 """Return the directory that `relative_filename` is relative to."""
37 return RELATIVE_DIR
38
39
40 @contract(returns='unicode')
41 def relative_filename(filename):
42 """Return the relative form of `filename`.
43
44 The file name will be relative to the current directory when the
45 `set_relative_directory` was called.
46
47 """
48 fnorm = os.path.normcase(filename)
49 if fnorm.startswith(RELATIVE_DIR):
50 filename = filename[len(RELATIVE_DIR):]
51 return unicode_filename(filename)
52
53
54 @contract(returns='unicode')
55 def canonical_filename(filename):
56 """Return a canonical file name for `filename`.
57
58 An absolute path with no redundant components and normalized case.
59
60 """
61 if filename not in CANONICAL_FILENAME_CACHE:
62 if not os.path.isabs(filename):
63 for path in [os.curdir] + sys.path:
64 if path is None:
65 continue
66 f = os.path.join(path, filename)
67 try:
68 exists = os.path.exists(f)
69 except UnicodeError:
70 exists = False
71 if exists:
72 filename = f
73 break
74 cf = abs_file(filename)
75 CANONICAL_FILENAME_CACHE[filename] = cf
76 return CANONICAL_FILENAME_CACHE[filename]
77
78
79 MAX_FLAT = 200
80
81 @contract(filename='unicode', returns='unicode')
82 def flat_rootname(filename):
83 """A base for a flat file name to correspond to this file.
84
85 Useful for writing files about the code where you want all the files in
86 the same directory, but need to differentiate same-named files from
87 different directories.
88
89 For example, the file a/b/c.py will return 'a_b_c_py'
90
91 """
92 name = ntpath.splitdrive(filename)[1]
93 name = re.sub(r"[\\/.:]", "_", name)
94 if len(name) > MAX_FLAT:
95 h = hashlib.sha1(name.encode('UTF-8')).hexdigest()
96 name = name[-(MAX_FLAT-len(h)-1):] + '_' + h
97 return name
98
99
100 if env.WINDOWS:
101
102 _ACTUAL_PATH_CACHE = {}
103 _ACTUAL_PATH_LIST_CACHE = {}
104
105 def actual_path(path):
106 """Get the actual path of `path`, including the correct case."""
107 if env.PY2 and isinstance(path, unicode_class):
108 path = path.encode(sys.getfilesystemencoding())
109 if path in _ACTUAL_PATH_CACHE:
110 return _ACTUAL_PATH_CACHE[path]
111
112 head, tail = os.path.split(path)
113 if not tail:
114 # This means head is the drive spec: normalize it.
115 actpath = head.upper()
116 elif not head:
117 actpath = tail
118 else:
119 head = actual_path(head)
120 if head in _ACTUAL_PATH_LIST_CACHE:
121 files = _ACTUAL_PATH_LIST_CACHE[head]
122 else:
123 try:
124 files = os.listdir(head)
125 except OSError:
126 files = []
127 _ACTUAL_PATH_LIST_CACHE[head] = files
128 normtail = os.path.normcase(tail)
129 for f in files:
130 if os.path.normcase(f) == normtail:
131 tail = f
132 break
133 actpath = os.path.join(head, tail)
134 _ACTUAL_PATH_CACHE[path] = actpath
135 return actpath
136
137 else:
138 def actual_path(filename):
139 """The actual path for non-Windows platforms."""
140 return filename
141
142
143 if env.PY2:
144 @contract(returns='unicode')
145 def unicode_filename(filename):
146 """Return a Unicode version of `filename`."""
147 if isinstance(filename, str):
148 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
149 filename = filename.decode(encoding, "replace")
150 return filename
151 else:
152 @contract(filename='unicode', returns='unicode')
153 def unicode_filename(filename):
154 """Return a Unicode version of `filename`."""
155 return filename
156
157
158 @contract(returns='unicode')
159 def abs_file(filename):
160 """Return the absolute normalized form of `filename`."""
161 path = os.path.expandvars(os.path.expanduser(filename))
162 try:
163 path = os.path.realpath(path)
164 except UnicodeError:
165 pass
166 path = os.path.abspath(path)
167 path = actual_path(path)
168 path = unicode_filename(path)
169 return path
170
171
172 RELATIVE_DIR = None
173 CANONICAL_FILENAME_CACHE = None
174 set_relative_directory()
175
176
177 def isabs_anywhere(filename):
178 """Is `filename` an absolute path on any OS?"""
179 return ntpath.isabs(filename) or posixpath.isabs(filename)
180
181
182 def prep_patterns(patterns):
183 """Prepare the file patterns for use in a `FnmatchMatcher`.
184
185 If a pattern starts with a wildcard, it is used as a pattern
186 as-is. If it does not start with a wildcard, then it is made
187 absolute with the current directory.
188
189 If `patterns` is None, an empty list is returned.
190
191 """
192 prepped = []
193 for p in patterns or []:
194 if p.startswith(("*", "?")):
195 prepped.append(p)
196 else:
197 prepped.append(abs_file(p))
198 return prepped
199
200
201 class TreeMatcher(object):
202 """A matcher for files in a tree.
203
204 Construct with a list of paths, either files or directories. Paths match
205 with the `match` method if they are one of the files, or if they are
206 somewhere in a subtree rooted at one of the directories.
207
208 """
209 def __init__(self, paths):
210 self.paths = list(paths)
211
212 def __repr__(self):
213 return "<TreeMatcher %r>" % self.paths
214
215 def info(self):
216 """A list of strings for displaying when dumping state."""
217 return self.paths
218
219 def match(self, fpath):
220 """Does `fpath` indicate a file in one of our trees?"""
221 for p in self.paths:
222 if fpath.startswith(p):
223 if fpath == p:
224 # This is the same file!
225 return True
226 if fpath[len(p)] == os.sep:
227 # This is a file in the directory
228 return True
229 return False
230
231
232 class ModuleMatcher(object):
233 """A matcher for modules in a tree."""
234 def __init__(self, module_names):
235 self.modules = list(module_names)
236
237 def __repr__(self):
238 return "<ModuleMatcher %r>" % (self.modules)
239
240 def info(self):
241 """A list of strings for displaying when dumping state."""
242 return self.modules
243
244 def match(self, module_name):
245 """Does `module_name` indicate a module in one of our packages?"""
246 if not module_name:
247 return False
248
249 for m in self.modules:
250 if module_name.startswith(m):
251 if module_name == m:
252 return True
253 if module_name[len(m)] == '.':
254 # This is a module in the package
255 return True
256
257 return False
258
259
260 class FnmatchMatcher(object):
261 """A matcher for files by file name pattern."""
262 def __init__(self, pats):
263 self.pats = list(pats)
264 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS)
265
266 def __repr__(self):
267 return "<FnmatchMatcher %r>" % self.pats
268
269 def info(self):
270 """A list of strings for displaying when dumping state."""
271 return self.pats
272
273 def match(self, fpath):
274 """Does `fpath` match one of our file name patterns?"""
275 return self.re.match(fpath) is not None
276
277
278 def sep(s):
279 """Find the path separator used in this string, or os.sep if none."""
280 sep_match = re.search(r"[\\/]", s)
281 if sep_match:
282 the_sep = sep_match.group(0)
283 else:
284 the_sep = os.sep
285 return the_sep
286
287
288 def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
289 """Convert fnmatch patterns to a compiled regex that matches any of them.
290
291 Slashes are always converted to match either slash or backslash, for
292 Windows support, even when running elsewhere.
293
294 If `partial` is true, then the pattern will match if the target string
295 starts with the pattern. Otherwise, it must match the entire string.
296
297 Returns: a compiled regex object. Use the .match method to compare target
298 strings.
299
300 """
301 regexes = (fnmatch.translate(pattern) for pattern in patterns)
302 # Python3.7 fnmatch translates "/" as "/". Before that, it translates as "\/",
303 # so we have to deal with maybe a backslash.
304 regexes = (re.sub(r"\\?/", r"[\\\\/]", regex) for regex in regexes)
305
306 if partial:
307 # fnmatch always adds a \Z to match the whole string, which we don't
308 # want, so we remove the \Z. While removing it, we only replace \Z if
309 # followed by paren (introducing flags), or at end, to keep from
310 # destroying a literal \Z in the pattern.
311 regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)
312
313 flags = 0
314 if case_insensitive:
315 flags |= re.IGNORECASE
316 compiled = re.compile(join_regex(regexes), flags=flags)
317
318 return compiled
319
320
321 class PathAliases(object):
322 """A collection of aliases for paths.
323
324 When combining data files from remote machines, often the paths to source
325 code are different, for example, due to OS differences, or because of
326 serialized checkouts on continuous integration machines.
327
328 A `PathAliases` object tracks a list of pattern/result pairs, and can
329 map a path through those aliases to produce a unified path.
330
331 """
332 def __init__(self):
333 self.aliases = []
334
335 def pprint(self): # pragma: debugging
336 """Dump the important parts of the PathAliases, for debugging."""
337 for regex, result in self.aliases:
338 print("{0!r} --> {1!r}".format(regex.pattern, result))
339
340 def add(self, pattern, result):
341 """Add the `pattern`/`result` pair to the list of aliases.
342
343 `pattern` is an `fnmatch`-style pattern. `result` is a simple
344 string. When mapping paths, if a path starts with a match against
345 `pattern`, then that match is replaced with `result`. This models
346 isomorphic source trees being rooted at different places on two
347 different machines.
348
349 `pattern` can't end with a wildcard component, since that would
350 match an entire tree, and not just its root.
351
352 """
353 if len(pattern) > 1:
354 pattern = pattern.rstrip(r"\/")
355
356 # The pattern can't end with a wildcard component.
357 if pattern.endswith("*"):
358 raise CoverageException("Pattern must not end with wildcards.")
359 pattern_sep = sep(pattern)
360
361 # The pattern is meant to match a filepath. Let's make it absolute
362 # unless it already is, or is meant to match any prefix.
363 if not pattern.startswith('*') and not isabs_anywhere(pattern):
364 pattern = abs_file(pattern)
365 if not pattern.endswith(pattern_sep):
366 pattern += pattern_sep
367
368 # Make a regex from the pattern.
369 regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True)
370
371 # Normalize the result: it must end with a path separator.
372 result_sep = sep(result)
373 result = result.rstrip(r"\/") + result_sep
374 self.aliases.append((regex, result))
375
376 def map(self, path):
377 """Map `path` through the aliases.
378
379 `path` is checked against all of the patterns. The first pattern to
380 match is used to replace the root of the path with the result root.
381 Only one pattern is ever used. If no patterns match, `path` is
382 returned unchanged.
383
384 The separator style in the result is made to match that of the result
385 in the alias.
386
387 Returns the mapped path. If a mapping has happened, this is a
388 canonical path. If no mapping has happened, it is the original value
389 of `path` unchanged.
390
391 """
392 for regex, result in self.aliases:
393 m = regex.match(path)
394 if m:
395 new = path.replace(m.group(0), result)
396 new = new.replace(sep(path), sep(result))
397 new = canonical_filename(new)
398 return new
399 return path
400
401
402 def find_python_files(dirname):
403 """Yield all of the importable Python files in `dirname`, recursively.
404
405 To be importable, the files have to be in a directory with a __init__.py,
406 except for `dirname` itself, which isn't required to have one. The
407 assumption is that `dirname` was specified directly, so the user knows
408 best, but sub-directories are checked for a __init__.py to be sure we only
409 find the importable files.
410
411 """
412 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
413 if i > 0 and '__init__.py' not in filenames:
414 # If a directory doesn't have __init__.py, then it isn't
415 # importable and neither are its files
416 del dirnames[:]
417 continue
418 for filename in filenames:
419 # We're only interested in files that look like reasonable Python
420 # files: Must end with .py or .pyw, and must not have certain funny
421 # characters that probably mean they are editor junk.
422 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
423 yield os.path.join(dirpath, filename)

eric ide

mercurial