11 import posixpath |
11 import posixpath |
12 import re |
12 import re |
13 import sys |
13 import sys |
14 |
14 |
15 from coverage import env |
15 from coverage import env |
16 from coverage.backward import unicode_class |
16 from coverage.exceptions import CoverageException |
17 from coverage.misc import contract, CoverageException, join_regex, isolate_module |
17 from coverage.misc import contract, human_sorted, isolate_module, join_regex |
18 |
18 |
19 |
19 |
20 os = isolate_module(os) |
20 os = isolate_module(os) |
21 |
21 |
22 |
22 |
46 |
46 |
47 """ |
47 """ |
48 fnorm = os.path.normcase(filename) |
48 fnorm = os.path.normcase(filename) |
49 if fnorm.startswith(RELATIVE_DIR): |
49 if fnorm.startswith(RELATIVE_DIR): |
50 filename = filename[len(RELATIVE_DIR):] |
50 filename = filename[len(RELATIVE_DIR):] |
51 return unicode_filename(filename) |
51 return filename |
52 |
52 |
53 |
53 |
54 @contract(returns='unicode') |
54 @contract(returns='unicode') |
55 def canonical_filename(filename): |
55 def canonical_filename(filename): |
56 """Return a canonical file name for `filename`. |
56 """Return a canonical file name for `filename`. |
75 cf = abs_file(cf) |
75 cf = abs_file(cf) |
76 CANONICAL_FILENAME_CACHE[filename] = cf |
76 CANONICAL_FILENAME_CACHE[filename] = cf |
77 return CANONICAL_FILENAME_CACHE[filename] |
77 return CANONICAL_FILENAME_CACHE[filename] |
78 |
78 |
79 |
79 |
80 MAX_FLAT = 200 |
80 MAX_FLAT = 100 |
81 |
81 |
82 @contract(filename='unicode', returns='unicode') |
82 @contract(filename='unicode', returns='unicode') |
83 def flat_rootname(filename): |
83 def flat_rootname(filename): |
84 """A base for a flat file name to correspond to this file. |
84 """A base for a flat file name to correspond to this file. |
85 |
85 |
86 Useful for writing files about the code where you want all the files in |
86 Useful for writing files about the code where you want all the files in |
87 the same directory, but need to differentiate same-named files from |
87 the same directory, but need to differentiate same-named files from |
88 different directories. |
88 different directories. |
89 |
89 |
90 For example, the file a/b/c.py will return 'a_b_c_py' |
90 For example, the file a/b/c.py will return 'd_86bbcbe134d28fd2_c_py' |
91 |
91 |
92 """ |
92 """ |
93 name = ntpath.splitdrive(filename)[1] |
93 dirname, basename = ntpath.split(filename) |
94 name = re.sub(r"[\\/.:]", "_", name) |
94 if dirname: |
95 if len(name) > MAX_FLAT: |
95 fp = hashlib.new("sha3_256", dirname.encode("UTF-8")).hexdigest()[:16] |
96 h = hashlib.sha1(name.encode('UTF-8')).hexdigest() |
96 prefix = f"d_{fp}_" |
97 name = name[-(MAX_FLAT-len(h)-1):] + '_' + h |
97 else: |
98 return name |
98 prefix = "" |
|
99 return prefix + basename.replace(".", "_") |
99 |
100 |
100 |
101 |
101 if env.WINDOWS: |
102 if env.WINDOWS: |
102 |
103 |
103 _ACTUAL_PATH_CACHE = {} |
104 _ACTUAL_PATH_CACHE = {} |
104 _ACTUAL_PATH_LIST_CACHE = {} |
105 _ACTUAL_PATH_LIST_CACHE = {} |
105 |
106 |
106 def actual_path(path): |
107 def actual_path(path): |
107 """Get the actual path of `path`, including the correct case.""" |
108 """Get the actual path of `path`, including the correct case.""" |
108 if env.PY2 and isinstance(path, unicode_class): |
|
109 path = path.encode(sys.getfilesystemencoding()) |
|
110 if path in _ACTUAL_PATH_CACHE: |
109 if path in _ACTUAL_PATH_CACHE: |
111 return _ACTUAL_PATH_CACHE[path] |
110 return _ACTUAL_PATH_CACHE[path] |
112 |
111 |
113 head, tail = os.path.split(path) |
112 head, tail = os.path.split(path) |
114 if not tail: |
113 if not tail: |
141 def actual_path(filename): |
140 def actual_path(filename): |
142 """The actual path for non-Windows platforms.""" |
141 """The actual path for non-Windows platforms.""" |
143 return filename |
142 return filename |
144 |
143 |
145 |
144 |
146 if env.PY2: |
|
147 @contract(returns='unicode') |
|
148 def unicode_filename(filename): |
|
149 """Return a Unicode version of `filename`.""" |
|
150 if isinstance(filename, str): |
|
151 encoding = sys.getfilesystemencoding() or sys.getdefaultencoding() |
|
152 filename = filename.decode(encoding, "replace") |
|
153 return filename |
|
154 else: |
|
155 @contract(filename='unicode', returns='unicode') |
|
156 def unicode_filename(filename): |
|
157 """Return a Unicode version of `filename`.""" |
|
158 return filename |
|
159 |
|
160 |
|
161 @contract(returns='unicode') |
145 @contract(returns='unicode') |
162 def abs_file(path): |
146 def abs_file(path): |
163 """Return the absolute normalized form of `path`.""" |
147 """Return the absolute normalized form of `path`.""" |
164 try: |
148 try: |
165 path = os.path.realpath(path) |
149 path = os.path.realpath(path) |
166 except UnicodeError: |
150 except UnicodeError: |
167 pass |
151 pass |
168 path = os.path.abspath(path) |
152 path = os.path.abspath(path) |
169 path = actual_path(path) |
153 path = actual_path(path) |
170 path = unicode_filename(path) |
|
171 return path |
154 return path |
172 |
155 |
173 |
156 |
174 def python_reported_file(filename): |
157 def python_reported_file(filename): |
175 """Return the string as Python would describe this file name.""" |
158 """Return the string as Python would describe this file name.""" |
205 else: |
188 else: |
206 prepped.append(abs_file(p)) |
189 prepped.append(abs_file(p)) |
207 return prepped |
190 return prepped |
208 |
191 |
209 |
192 |
210 class TreeMatcher(object): |
193 class TreeMatcher: |
211 """A matcher for files in a tree. |
194 """A matcher for files in a tree. |
212 |
195 |
213 Construct with a list of paths, either files or directories. Paths match |
196 Construct with a list of paths, either files or directories. Paths match |
214 with the `match` method if they are one of the files, or if they are |
197 with the `match` method if they are one of the files, or if they are |
215 somewhere in a subtree rooted at one of the directories. |
198 somewhere in a subtree rooted at one of the directories. |
216 |
199 |
217 """ |
200 """ |
218 def __init__(self, paths): |
201 def __init__(self, paths, name="unknown"): |
219 self.paths = list(paths) |
202 self.original_paths = human_sorted(paths) |
|
203 self.paths = list(map(os.path.normcase, paths)) |
|
204 self.name = name |
220 |
205 |
221 def __repr__(self): |
206 def __repr__(self): |
222 return "<TreeMatcher %r>" % self.paths |
207 return f"<TreeMatcher {self.name} {self.original_paths!r}>" |
223 |
208 |
224 def info(self): |
209 def info(self): |
225 """A list of strings for displaying when dumping state.""" |
210 """A list of strings for displaying when dumping state.""" |
226 return self.paths |
211 return self.original_paths |
227 |
212 |
228 def match(self, fpath): |
213 def match(self, fpath): |
229 """Does `fpath` indicate a file in one of our trees?""" |
214 """Does `fpath` indicate a file in one of our trees?""" |
|
215 fpath = os.path.normcase(fpath) |
230 for p in self.paths: |
216 for p in self.paths: |
231 if fpath.startswith(p): |
217 if fpath.startswith(p): |
232 if fpath == p: |
218 if fpath == p: |
233 # This is the same file! |
219 # This is the same file! |
234 return True |
220 return True |
236 # This is a file in the directory |
222 # This is a file in the directory |
237 return True |
223 return True |
238 return False |
224 return False |
239 |
225 |
240 |
226 |
241 class ModuleMatcher(object): |
227 class ModuleMatcher: |
242 """A matcher for modules in a tree.""" |
228 """A matcher for modules in a tree.""" |
243 def __init__(self, module_names): |
229 def __init__(self, module_names, name="unknown"): |
244 self.modules = list(module_names) |
230 self.modules = list(module_names) |
|
231 self.name = name |
245 |
232 |
246 def __repr__(self): |
233 def __repr__(self): |
247 return "<ModuleMatcher %r>" % (self.modules) |
234 return f"<ModuleMatcher {self.name} {self.modules!r}>" |
248 |
235 |
249 def info(self): |
236 def info(self): |
250 """A list of strings for displaying when dumping state.""" |
237 """A list of strings for displaying when dumping state.""" |
251 return self.modules |
238 return self.modules |
252 |
239 |
264 return True |
251 return True |
265 |
252 |
266 return False |
253 return False |
267 |
254 |
268 |
255 |
269 class FnmatchMatcher(object): |
256 class FnmatchMatcher: |
270 """A matcher for files by file name pattern.""" |
257 """A matcher for files by file name pattern.""" |
271 def __init__(self, pats): |
258 def __init__(self, pats, name="unknown"): |
272 self.pats = list(pats) |
259 self.pats = list(pats) |
273 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS) |
260 self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS) |
|
261 self.name = name |
274 |
262 |
275 def __repr__(self): |
263 def __repr__(self): |
276 return "<FnmatchMatcher %r>" % self.pats |
264 return f"<FnmatchMatcher {self.name} {self.pats!r}>" |
277 |
265 |
278 def info(self): |
266 def info(self): |
279 """A list of strings for displaying when dumping state.""" |
267 """A list of strings for displaying when dumping state.""" |
280 return self.pats |
268 return self.pats |
281 |
269 |
325 compiled = re.compile(join_regex(regexes), flags=flags) |
313 compiled = re.compile(join_regex(regexes), flags=flags) |
326 |
314 |
327 return compiled |
315 return compiled |
328 |
316 |
329 |
317 |
330 class PathAliases(object): |
318 class PathAliases: |
331 """A collection of aliases for paths. |
319 """A collection of aliases for paths. |
332 |
320 |
333 When combining data files from remote machines, often the paths to source |
321 When combining data files from remote machines, often the paths to source |
334 code are different, for example, due to OS differences, or because of |
322 code are different, for example, due to OS differences, or because of |
335 serialized checkouts on continuous integration machines. |
323 serialized checkouts on continuous integration machines. |
336 |
324 |
337 A `PathAliases` object tracks a list of pattern/result pairs, and can |
325 A `PathAliases` object tracks a list of pattern/result pairs, and can |
338 map a path through those aliases to produce a unified path. |
326 map a path through those aliases to produce a unified path. |
339 |
327 |
340 """ |
328 """ |
341 def __init__(self): |
329 def __init__(self, relative=False): |
342 self.aliases = [] |
330 self.aliases = [] |
|
331 self.relative = relative |
343 |
332 |
344 def pprint(self): # pragma: debugging |
333 def pprint(self): # pragma: debugging |
345 """Dump the important parts of the PathAliases, for debugging.""" |
334 """Dump the important parts of the PathAliases, for debugging.""" |
|
335 print(f"Aliases (relative={self.relative}):") |
346 for regex, result in self.aliases: |
336 for regex, result in self.aliases: |
347 print("{!r} --> {!r}".format(regex.pattern, result)) |
337 print(f"{regex.pattern!r} --> {result!r}") |
348 |
338 |
349 def add(self, pattern, result): |
339 def add(self, pattern, result): |
350 """Add the `pattern`/`result` pair to the list of aliases. |
340 """Add the `pattern`/`result` pair to the list of aliases. |
351 |
341 |
352 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
342 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
403 for regex, result in self.aliases: |
393 for regex, result in self.aliases: |
404 m = regex.match(path) |
394 m = regex.match(path) |
405 if m: |
395 if m: |
406 new = path.replace(m.group(0), result) |
396 new = path.replace(m.group(0), result) |
407 new = new.replace(sep(path), sep(result)) |
397 new = new.replace(sep(path), sep(result)) |
408 new = canonical_filename(new) |
398 if not self.relative: |
|
399 new = canonical_filename(new) |
409 return new |
400 return new |
410 return path |
401 return path |
411 |
402 |
412 |
403 |
413 def find_python_files(dirname): |
404 def find_python_files(dirname): |