src/eric7/DebugClients/Python/coverage/files.py

branch
eric7
changeset 9209
b99e7fd55fd3
parent 8991
2fc945191992
child 9252
32dd11232e06
equal deleted inserted replaced
9208:3fc8dfeb6ebe 9209:b99e7fd55fd3
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt
3
4 """File wrangling."""
5
6 import hashlib
7 import fnmatch
8 import ntpath
9 import os
10 import os.path
11 import posixpath
12 import re
13 import sys
14
15 from coverage import env
16 from coverage.exceptions import ConfigError
17 from coverage.misc import contract, human_sorted, isolate_module, join_regex
18
19
20 os = isolate_module(os)
21
22
23 def set_relative_directory():
24 """Set the directory that `relative_filename` will be relative to."""
25 global RELATIVE_DIR, CANONICAL_FILENAME_CACHE
26
27 # The absolute path to our current directory.
28 RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep)
29
30 # Cache of results of calling the canonical_filename() method, to
31 # avoid duplicating work.
32 CANONICAL_FILENAME_CACHE = {}
33
34
35 def relative_directory():
36 """Return the directory that `relative_filename` is relative to."""
37 return RELATIVE_DIR
38
39
40 @contract(returns='unicode')
41 def relative_filename(filename):
42 """Return the relative form of `filename`.
43
44 The file name will be relative to the current directory when the
45 `set_relative_directory` was called.
46
47 """
48 fnorm = os.path.normcase(filename)
49 if fnorm.startswith(RELATIVE_DIR):
50 filename = filename[len(RELATIVE_DIR):]
51 return filename
52
53
54 @contract(returns='unicode')
55 def canonical_filename(filename):
56 """Return a canonical file name for `filename`.
57
58 An absolute path with no redundant components and normalized case.
59
60 """
61 if filename not in CANONICAL_FILENAME_CACHE:
62 cf = filename
63 if not os.path.isabs(filename):
64 for path in [os.curdir] + sys.path:
65 if path is None:
66 continue
67 f = os.path.join(path, filename)
68 try:
69 exists = os.path.exists(f)
70 except UnicodeError:
71 exists = False
72 if exists:
73 cf = f
74 break
75 cf = abs_file(cf)
76 CANONICAL_FILENAME_CACHE[filename] = cf
77 return CANONICAL_FILENAME_CACHE[filename]
78
79
80 MAX_FLAT = 100
81
82 @contract(filename='unicode', returns='unicode')
83 def flat_rootname(filename):
84 """A base for a flat file name to correspond to this file.
85
86 Useful for writing files about the code where you want all the files in
87 the same directory, but need to differentiate same-named files from
88 different directories.
89
90 For example, the file a/b/c.py will return 'd_86bbcbe134d28fd2_c_py'
91
92 """
93 dirname, basename = ntpath.split(filename)
94 if dirname:
95 fp = hashlib.new("sha3_256", dirname.encode("UTF-8")).hexdigest()[:16]
96 prefix = f"d_{fp}_"
97 else:
98 prefix = ""
99 return prefix + basename.replace(".", "_")
100
101
102 if env.WINDOWS:
103
104 _ACTUAL_PATH_CACHE = {}
105 _ACTUAL_PATH_LIST_CACHE = {}
106
107 def actual_path(path):
108 """Get the actual path of `path`, including the correct case."""
109 if path in _ACTUAL_PATH_CACHE:
110 return _ACTUAL_PATH_CACHE[path]
111
112 head, tail = os.path.split(path)
113 if not tail:
114 # This means head is the drive spec: normalize it.
115 actpath = head.upper()
116 elif not head:
117 actpath = tail
118 else:
119 head = actual_path(head)
120 if head in _ACTUAL_PATH_LIST_CACHE:
121 files = _ACTUAL_PATH_LIST_CACHE[head]
122 else:
123 try:
124 files = os.listdir(head)
125 except Exception:
126 # This will raise OSError, or this bizarre TypeError:
127 # https://bugs.python.org/issue1776160
128 files = []
129 _ACTUAL_PATH_LIST_CACHE[head] = files
130 normtail = os.path.normcase(tail)
131 for f in files:
132 if os.path.normcase(f) == normtail:
133 tail = f
134 break
135 actpath = os.path.join(head, tail)
136 _ACTUAL_PATH_CACHE[path] = actpath
137 return actpath
138
139 else:
140 def actual_path(path):
141 """The actual path for non-Windows platforms."""
142 return path
143
144
145 @contract(returns='unicode')
146 def abs_file(path):
147 """Return the absolute normalized form of `path`."""
148 return actual_path(os.path.abspath(os.path.realpath(path)))
149
150
151 def python_reported_file(filename):
152 """Return the string as Python would describe this file name."""
153 if env.PYBEHAVIOR.report_absolute_files:
154 filename = os.path.abspath(filename)
155 return filename
156
157
158 RELATIVE_DIR = None
159 CANONICAL_FILENAME_CACHE = None
160 set_relative_directory()
161
162
163 def isabs_anywhere(filename):
164 """Is `filename` an absolute path on any OS?"""
165 return ntpath.isabs(filename) or posixpath.isabs(filename)
166
167
168 def prep_patterns(patterns):
169 """Prepare the file patterns for use in a `FnmatchMatcher`.
170
171 If a pattern starts with a wildcard, it is used as a pattern
172 as-is. If it does not start with a wildcard, then it is made
173 absolute with the current directory.
174
175 If `patterns` is None, an empty list is returned.
176
177 """
178 prepped = []
179 for p in patterns or []:
180 if p.startswith(("*", "?")):
181 prepped.append(p)
182 else:
183 prepped.append(abs_file(p))
184 return prepped
185
186
187 class TreeMatcher:
188 """A matcher for files in a tree.
189
190 Construct with a list of paths, either files or directories. Paths match
191 with the `match` method if they are one of the files, or if they are
192 somewhere in a subtree rooted at one of the directories.
193
194 """
195 def __init__(self, paths, name="unknown"):
196 self.original_paths = human_sorted(paths)
197 self.paths = list(map(os.path.normcase, paths))
198 self.name = name
199
200 def __repr__(self):
201 return f"<TreeMatcher {self.name} {self.original_paths!r}>"
202
203 def info(self):
204 """A list of strings for displaying when dumping state."""
205 return self.original_paths
206
207 def match(self, fpath):
208 """Does `fpath` indicate a file in one of our trees?"""
209 fpath = os.path.normcase(fpath)
210 for p in self.paths:
211 if fpath.startswith(p):
212 if fpath == p:
213 # This is the same file!
214 return True
215 if fpath[len(p)] == os.sep:
216 # This is a file in the directory
217 return True
218 return False
219
220
221 class ModuleMatcher:
222 """A matcher for modules in a tree."""
223 def __init__(self, module_names, name="unknown"):
224 self.modules = list(module_names)
225 self.name = name
226
227 def __repr__(self):
228 return f"<ModuleMatcher {self.name} {self.modules!r}>"
229
230 def info(self):
231 """A list of strings for displaying when dumping state."""
232 return self.modules
233
234 def match(self, module_name):
235 """Does `module_name` indicate a module in one of our packages?"""
236 if not module_name:
237 return False
238
239 for m in self.modules:
240 if module_name.startswith(m):
241 if module_name == m:
242 return True
243 if module_name[len(m)] == '.':
244 # This is a module in the package
245 return True
246
247 return False
248
249
250 class FnmatchMatcher:
251 """A matcher for files by file name pattern."""
252 def __init__(self, pats, name="unknown"):
253 self.pats = list(pats)
254 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS)
255 self.name = name
256
257 def __repr__(self):
258 return f"<FnmatchMatcher {self.name} {self.pats!r}>"
259
260 def info(self):
261 """A list of strings for displaying when dumping state."""
262 return self.pats
263
264 def match(self, fpath):
265 """Does `fpath` match one of our file name patterns?"""
266 return self.re.match(fpath) is not None
267
268
269 def sep(s):
270 """Find the path separator used in this string, or os.sep if none."""
271 sep_match = re.search(r"[\\/]", s)
272 if sep_match:
273 the_sep = sep_match.group(0)
274 else:
275 the_sep = os.sep
276 return the_sep
277
278
279 def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
280 """Convert fnmatch patterns to a compiled regex that matches any of them.
281
282 Slashes are always converted to match either slash or backslash, for
283 Windows support, even when running elsewhere.
284
285 If `partial` is true, then the pattern will match if the target string
286 starts with the pattern. Otherwise, it must match the entire string.
287
288 Returns: a compiled regex object. Use the .match method to compare target
289 strings.
290
291 """
292 regexes = (fnmatch.translate(pattern) for pattern in patterns)
293 # Python3.7 fnmatch translates "/" as "/". Before that, it translates as "\/",
294 # so we have to deal with maybe a backslash.
295 regexes = (re.sub(r"\\?/", r"[\\\\/]", regex) for regex in regexes)
296
297 if partial:
298 # fnmatch always adds a \Z to match the whole string, which we don't
299 # want, so we remove the \Z. While removing it, we only replace \Z if
300 # followed by paren (introducing flags), or at end, to keep from
301 # destroying a literal \Z in the pattern.
302 regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)
303
304 flags = 0
305 if case_insensitive:
306 flags |= re.IGNORECASE
307 compiled = re.compile(join_regex(regexes), flags=flags)
308
309 return compiled
310
311
312 class PathAliases:
313 """A collection of aliases for paths.
314
315 When combining data files from remote machines, often the paths to source
316 code are different, for example, due to OS differences, or because of
317 serialized checkouts on continuous integration machines.
318
319 A `PathAliases` object tracks a list of pattern/result pairs, and can
320 map a path through those aliases to produce a unified path.
321
322 """
323 def __init__(self, relative=False):
324 self.aliases = []
325 self.relative = relative
326
327 def pprint(self): # pragma: debugging
328 """Dump the important parts of the PathAliases, for debugging."""
329 print(f"Aliases (relative={self.relative}):")
330 for regex, result in self.aliases:
331 print(f"{regex.pattern!r} --> {result!r}")
332
333 def add(self, pattern, result):
334 """Add the `pattern`/`result` pair to the list of aliases.
335
336 `pattern` is an `fnmatch`-style pattern. `result` is a simple
337 string. When mapping paths, if a path starts with a match against
338 `pattern`, then that match is replaced with `result`. This models
339 isomorphic source trees being rooted at different places on two
340 different machines.
341
342 `pattern` can't end with a wildcard component, since that would
343 match an entire tree, and not just its root.
344
345 """
346 pattern_sep = sep(pattern)
347
348 if len(pattern) > 1:
349 pattern = pattern.rstrip(r"\/")
350
351 # The pattern can't end with a wildcard component.
352 if pattern.endswith("*"):
353 raise ConfigError("Pattern must not end with wildcards.")
354
355 # The pattern is meant to match a filepath. Let's make it absolute
356 # unless it already is, or is meant to match any prefix.
357 if not pattern.startswith('*') and not isabs_anywhere(pattern +
358 pattern_sep):
359 pattern = abs_file(pattern)
360 if not pattern.endswith(pattern_sep):
361 pattern += pattern_sep
362
363 # Make a regex from the pattern.
364 regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True)
365
366 # Normalize the result: it must end with a path separator.
367 result_sep = sep(result)
368 result = result.rstrip(r"\/") + result_sep
369 self.aliases.append((regex, result))
370
371 def map(self, path):
372 """Map `path` through the aliases.
373
374 `path` is checked against all of the patterns. The first pattern to
375 match is used to replace the root of the path with the result root.
376 Only one pattern is ever used. If no patterns match, `path` is
377 returned unchanged.
378
379 The separator style in the result is made to match that of the result
380 in the alias.
381
382 Returns the mapped path. If a mapping has happened, this is a
383 canonical path. If no mapping has happened, it is the original value
384 of `path` unchanged.
385
386 """
387 for regex, result in self.aliases:
388 m = regex.match(path)
389 if m:
390 new = path.replace(m.group(0), result)
391 new = new.replace(sep(path), sep(result))
392 if not self.relative:
393 new = canonical_filename(new)
394 return new
395 return path
396
397
398 def find_python_files(dirname):
399 """Yield all of the importable Python files in `dirname`, recursively.
400
401 To be importable, the files have to be in a directory with a __init__.py,
402 except for `dirname` itself, which isn't required to have one. The
403 assumption is that `dirname` was specified directly, so the user knows
404 best, but sub-directories are checked for a __init__.py to be sure we only
405 find the importable files.
406
407 """
408 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
409 if i > 0 and '__init__.py' not in filenames:
410 # If a directory doesn't have __init__.py, then it isn't
411 # importable and neither are its files
412 del dirnames[:]
413 continue
414 for filename in filenames:
415 # We're only interested in files that look like reasonable Python
416 # files: Must end with .py or .pyw, and must not have certain funny
417 # characters that probably mean they are editor junk.
418 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
419 yield os.path.join(dirpath, filename)

eric ide

mercurial