eric7/DebugClients/Python/coverage/files.py

branch
eric7
changeset 8312
800c432b34c8
parent 7427
362cd1b6f81a
child 8527
2bd1325d727e
equal deleted inserted replaced
8311:4e8b98454baa 8312:800c432b34c8
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4 """File wrangling."""
5
6 import hashlib
7 import fnmatch
8 import ntpath
9 import os
10 import os.path
11 import posixpath
12 import re
13 import sys
14
15 from coverage import env
16 from coverage.backward import unicode_class
17 from coverage.misc import contract, CoverageException, join_regex, isolate_module
18
19
20 os = isolate_module(os)
21
22
23 def set_relative_directory():
24 """Set the directory that `relative_filename` will be relative to."""
25 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE
26
27 # The absolute path to our current directory.
28 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep)
29
30 # Cache of results of calling the canonical_filename() method, to
31 # avoid duplicating work.
32 CANONICAL_FILENAME_CACHE = {}
33
34
35 def relative_directory():
36 """Return the directory that `relative_filename` is relative to."""
37 return RELATIVE_DIR
38
39
40 @contract(returns='unicode')
41 def relative_filename(filename):
42 """Return the relative form of `filename`.
43
44 The file name will be relative to the current directory when the
45 `set_relative_directory` was called.
46
47 """
48 fnorm = os.path.normcase(filename)
49 if fnorm.startswith(RELATIVE_DIR):
50 filename = filename[len(RELATIVE_DIR):]
51 return unicode_filename(filename)
52
53
54 @contract(returns='unicode')
55 def canonical_filename(filename):
56 """Return a canonical file name for `filename`.
57
58 An absolute path with no redundant components and normalized case.
59
60 """
61 if filename not in CANONICAL_FILENAME_CACHE:
62 cf = filename
63 if not os.path.isabs(filename):
64 for path in [os.curdir] + sys.path:
65 if path is None:
66 continue
67 f = os.path.join(path, filename)
68 try:
69 exists = os.path.exists(f)
70 except UnicodeError:
71 exists = False
72 if exists:
73 cf = f
74 break
75 cf = abs_file(cf)
76 CANONICAL_FILENAME_CACHE[filename] = cf
77 return CANONICAL_FILENAME_CACHE[filename]
78
79
80 MAX_FLAT = 200
81
82 @contract(filename='unicode', returns='unicode')
83 def flat_rootname(filename):
84 """A base for a flat file name to correspond to this file.
85
86 Useful for writing files about the code where you want all the files in
87 the same directory, but need to differentiate same-named files from
88 different directories.
89
90 For example, the file a/b/c.py will return 'a_b_c_py'
91
92 """
93 name = ntpath.splitdrive(filename)[1]
94 name = re.sub(r"[\\/.:]", "_", name)
95 if len(name) > MAX_FLAT:
96 h = hashlib.sha1(name.encode('UTF-8')).hexdigest()
97 name = name[-(MAX_FLAT-len(h)-1):] + '_' + h
98 return name
99
100
101 if env.WINDOWS:
102
103 _ACTUAL_PATH_CACHE = {}
104 _ACTUAL_PATH_LIST_CACHE = {}
105
106 def actual_path(path):
107 """Get the actual path of `path`, including the correct case."""
108 if env.PY2 and isinstance(path, unicode_class):
109 path = path.encode(sys.getfilesystemencoding())
110 if path in _ACTUAL_PATH_CACHE:
111 return _ACTUAL_PATH_CACHE[path]
112
113 head, tail = os.path.split(path)
114 if not tail:
115 # This means head is the drive spec: normalize it.
116 actpath = head.upper()
117 elif not head:
118 actpath = tail
119 else:
120 head = actual_path(head)
121 if head in _ACTUAL_PATH_LIST_CACHE:
122 files = _ACTUAL_PATH_LIST_CACHE[head]
123 else:
124 try:
125 files = os.listdir(head)
126 except Exception:
127 # This will raise OSError, or this bizarre TypeError:
128 # https://bugs.python.org/issue1776160
129 files = []
130 _ACTUAL_PATH_LIST_CACHE[head] = files
131 normtail = os.path.normcase(tail)
132 for f in files:
133 if os.path.normcase(f) == normtail:
134 tail = f
135 break
136 actpath = os.path.join(head, tail)
137 _ACTUAL_PATH_CACHE[path] = actpath
138 return actpath
139
140 else:
141 def actual_path(filename):
142 """The actual path for non-Windows platforms."""
143 return filename
144
145
146 if env.PY2:
147 @contract(returns='unicode')
148 def unicode_filename(filename):
149 """Return a Unicode version of `filename`."""
150 if isinstance(filename, str):
151 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
152 filename = filename.decode(encoding, "replace")
153 return filename
154 else:
155 @contract(filename='unicode', returns='unicode')
156 def unicode_filename(filename):
157 """Return a Unicode version of `filename`."""
158 return filename
159
160
161 @contract(returns='unicode')
162 def abs_file(path):
163 """Return the absolute normalized form of `path`."""
164 try:
165 path = os.path.realpath(path)
166 except UnicodeError:
167 pass
168 path = os.path.abspath(path)
169 path = actual_path(path)
170 path = unicode_filename(path)
171 return path
172
173
174 def python_reported_file(filename):
175 """Return the string as Python would describe this file name."""
176 if env.PYBEHAVIOR.report_absolute_files:
177 filename = os.path.abspath(filename)
178 return filename
179
180
181 RELATIVE_DIR = None
182 CANONICAL_FILENAME_CACHE = None
183 set_relative_directory()
184
185
186 def isabs_anywhere(filename):
187 """Is `filename` an absolute path on any OS?"""
188 return ntpath.isabs(filename) or posixpath.isabs(filename)
189
190
191 def prep_patterns(patterns):
192 """Prepare the file patterns for use in a `FnmatchMatcher`.
193
194 If a pattern starts with a wildcard, it is used as a pattern
195 as-is. If it does not start with a wildcard, then it is made
196 absolute with the current directory.
197
198 If `patterns` is None, an empty list is returned.
199
200 """
201 prepped = []
202 for p in patterns or []:
203 if p.startswith(("*", "?")):
204 prepped.append(p)
205 else:
206 prepped.append(abs_file(p))
207 return prepped
208
209
210 class TreeMatcher(object):
211 """A matcher for files in a tree.
212
213 Construct with a list of paths, either files or directories. Paths match
214 with the `match` method if they are one of the files, or if they are
215 somewhere in a subtree rooted at one of the directories.
216
217 """
218 def __init__(self, paths):
219 self.paths = list(paths)
220
221 def __repr__(self):
222 return "<TreeMatcher %r>" % self.paths
223
224 def info(self):
225 """A list of strings for displaying when dumping state."""
226 return self.paths
227
228 def match(self, fpath):
229 """Does `fpath` indicate a file in one of our trees?"""
230 for p in self.paths:
231 if fpath.startswith(p):
232 if fpath == p:
233 # This is the same file!
234 return True
235 if fpath[len(p)] == os.sep:
236 # This is a file in the directory
237 return True
238 return False
239
240
241 class ModuleMatcher(object):
242 """A matcher for modules in a tree."""
243 def __init__(self, module_names):
244 self.modules = list(module_names)
245
246 def __repr__(self):
247 return "<ModuleMatcher %r>" % (self.modules)
248
249 def info(self):
250 """A list of strings for displaying when dumping state."""
251 return self.modules
252
253 def match(self, module_name):
254 """Does `module_name` indicate a module in one of our packages?"""
255 if not module_name:
256 return False
257
258 for m in self.modules:
259 if module_name.startswith(m):
260 if module_name == m:
261 return True
262 if module_name[len(m)] == '.':
263 # This is a module in the package
264 return True
265
266 return False
267
268
269 class FnmatchMatcher(object):
270 """A matcher for files by file name pattern."""
271 def __init__(self, pats):
272 self.pats = list(pats)
273 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS)
274
275 def __repr__(self):
276 return "<FnmatchMatcher %r>" % self.pats
277
278 def info(self):
279 """A list of strings for displaying when dumping state."""
280 return self.pats
281
282 def match(self, fpath):
283 """Does `fpath` match one of our file name patterns?"""
284 return self.re.match(fpath) is not None
285
286
287 def sep(s):
288 """Find the path separator used in this string, or os.sep if none."""
289 sep_match = re.search(r"[\\/]", s)
290 if sep_match:
291 the_sep = sep_match.group(0)
292 else:
293 the_sep = os.sep
294 return the_sep
295
296
297 def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
298 """Convert fnmatch patterns to a compiled regex that matches any of them.
299
300 Slashes are always converted to match either slash or backslash, for
301 Windows support, even when running elsewhere.
302
303 If `partial` is true, then the pattern will match if the target string
304 starts with the pattern. Otherwise, it must match the entire string.
305
306 Returns: a compiled regex object. Use the .match method to compare target
307 strings.
308
309 """
310 regexes = (fnmatch.translate(pattern) for pattern in patterns)
311 # Python3.7 fnmatch translates "/" as "/". Before that, it translates as "\/",
312 # so we have to deal with maybe a backslash.
313 regexes = (re.sub(r"\\?/", r"[\\\\/]", regex) for regex in regexes)
314
315 if partial:
316 # fnmatch always adds a \Z to match the whole string, which we don't
317 # want, so we remove the \Z. While removing it, we only replace \Z if
318 # followed by paren (introducing flags), or at end, to keep from
319 # destroying a literal \Z in the pattern.
320 regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)
321
322 flags = 0
323 if case_insensitive:
324 flags |= re.IGNORECASE
325 compiled = re.compile(join_regex(regexes), flags=flags)
326
327 return compiled
328
329
330 class PathAliases(object):
331 """A collection of aliases for paths.
332
333 When combining data files from remote machines, often the paths to source
334 code are different, for example, due to OS differences, or because of
335 serialized checkouts on continuous integration machines.
336
337 A `PathAliases` object tracks a list of pattern/result pairs, and can
338 map a path through those aliases to produce a unified path.
339
340 """
341 def __init__(self):
342 self.aliases = []
343
344 def pprint(self): # pragma: debugging
345 """Dump the important parts of the PathAliases, for debugging."""
346 for regex, result in self.aliases:
347 print("{!r} --> {!r}".format(regex.pattern, result))
348
349 def add(self, pattern, result):
350 """Add the `pattern`/`result` pair to the list of aliases.
351
352 `pattern` is an `fnmatch`-style pattern. `result` is a simple
353 string. When mapping paths, if a path starts with a match against
354 `pattern`, then that match is replaced with `result`. This models
355 isomorphic source trees being rooted at different places on two
356 different machines.
357
358 `pattern` can't end with a wildcard component, since that would
359 match an entire tree, and not just its root.
360
361 """
362 if len(pattern) > 1:
363 pattern = pattern.rstrip(r"\/")
364
365 # The pattern can't end with a wildcard component.
366 if pattern.endswith("*"):
367 raise CoverageException("Pattern must not end with wildcards.")
368 pattern_sep = sep(pattern)
369
370 # The pattern is meant to match a filepath. Let's make it absolute
371 # unless it already is, or is meant to match any prefix.
372 if not pattern.startswith('*') and not isabs_anywhere(pattern):
373 pattern = abs_file(pattern)
374 if not pattern.endswith(pattern_sep):
375 pattern += pattern_sep
376
377 # Make a regex from the pattern.
378 regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True)
379
380 # Normalize the result: it must end with a path separator.
381 result_sep = sep(result)
382 result = result.rstrip(r"\/") + result_sep
383 self.aliases.append((regex, result))
384
385 def map(self, path):
386 """Map `path` through the aliases.
387
388 `path` is checked against all of the patterns. The first pattern to
389 match is used to replace the root of the path with the result root.
390 Only one pattern is ever used. If no patterns match, `path` is
391 returned unchanged.
392
393 The separator style in the result is made to match that of the result
394 in the alias.
395
396 Returns the mapped path. If a mapping has happened, this is a
397 canonical path. If no mapping has happened, it is the original value
398 of `path` unchanged.
399
400 """
401 for regex, result in self.aliases:
402 m = regex.match(path)
403 if m:
404 new = path.replace(m.group(0), result)
405 new = new.replace(sep(path), sep(result))
406 new = canonical_filename(new)
407 return new
408 return path
409
410
411 def find_python_files(dirname):
412 """Yield all of the importable Python files in `dirname`, recursively.
413
414 To be importable, the files have to be in a directory with a __init__.py,
415 except for `dirname` itself, which isn't required to have one. The
416 assumption is that `dirname` was specified directly, so the user knows
417 best, but sub-directories are checked for a __init__.py to be sure we only
418 find the importable files.
419
420 """
421 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
422 if i > 0 and '__init__.py' not in filenames:
423 # If a directory doesn't have __init__.py, then it isn't
424 # importable and neither are its files
425 del dirnames[:]
426 continue
427 for filename in filenames:
428 # We're only interested in files that look like reasonable Python
429 # files: Must end with .py or .pyw, and must not have certain funny
430 # characters that probably mean they are editor junk.
431 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
432 yield os.path.join(dirpath, filename)

eric ide

mercurial