DebugClients/Python/coverage/files.py

changeset 6219
d6c795b5ce33
parent 5178
878ce843ca9f
child 6649
f1b3a73831c9
equal deleted inserted replaced
6218:bedab77d0fa3 6219:d6c795b5ce33
1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 1 # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt 2 # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
3 3
4 """File wrangling.""" 4 """File wrangling."""
5 5
6 import hashlib
6 import fnmatch 7 import fnmatch
7 import ntpath 8 import ntpath
8 import os 9 import os
9 import os.path 10 import os.path
10 import posixpath 11 import posixpath
61 if not os.path.isabs(filename): 62 if not os.path.isabs(filename):
62 for path in [os.curdir] + sys.path: 63 for path in [os.curdir] + sys.path:
63 if path is None: 64 if path is None:
64 continue 65 continue
65 f = os.path.join(path, filename) 66 f = os.path.join(path, filename)
66 if os.path.exists(f): 67 try:
68 exists = os.path.exists(f)
69 except UnicodeError:
70 exists = False
71 if exists:
67 filename = f 72 filename = f
68 break 73 break
69 cf = abs_file(filename) 74 cf = abs_file(filename)
70 CANONICAL_FILENAME_CACHE[filename] = cf 75 CANONICAL_FILENAME_CACHE[filename] = cf
71 return CANONICAL_FILENAME_CACHE[filename] 76 return CANONICAL_FILENAME_CACHE[filename]
72 77
73 78
79 MAX_FLAT = 200
80
81 @contract(filename='unicode', returns='unicode')
74 def flat_rootname(filename): 82 def flat_rootname(filename):
75 """A base for a flat file name to correspond to this file. 83 """A base for a flat file name to correspond to this file.
76 84
77 Useful for writing files about the code where you want all the files in 85 Useful for writing files about the code where you want all the files in
78 the same directory, but need to differentiate same-named files from 86 the same directory, but need to differentiate same-named files from
80 88
81 For example, the file a/b/c.py will return 'a_b_c_py' 89 For example, the file a/b/c.py will return 'a_b_c_py'
82 90
83 """ 91 """
84 name = ntpath.splitdrive(filename)[1] 92 name = ntpath.splitdrive(filename)[1]
85 return re.sub(r"[\\/.:]", "_", name) 93 name = re.sub(r"[\\/.:]", "_", name)
94 if len(name) > MAX_FLAT:
95 h = hashlib.sha1(name.encode('UTF-8')).hexdigest()
96 name = name[-(MAX_FLAT-len(h)-1):] + '_' + h
97 return name
86 98
87 99
88 if env.WINDOWS: 100 if env.WINDOWS:
89 101
90 _ACTUAL_PATH_CACHE = {} 102 _ACTUAL_PATH_CACHE = {}
145 157
146 @contract(returns='unicode') 158 @contract(returns='unicode')
147 def abs_file(filename): 159 def abs_file(filename):
148 """Return the absolute normalized form of `filename`.""" 160 """Return the absolute normalized form of `filename`."""
149 path = os.path.expandvars(os.path.expanduser(filename)) 161 path = os.path.expandvars(os.path.expanduser(filename))
150 path = os.path.abspath(os.path.realpath(path)) 162 try:
163 path = os.path.realpath(path)
164 except UnicodeError:
165 pass
166 path = os.path.abspath(path)
151 path = actual_path(path) 167 path = actual_path(path)
152 path = unicode_filename(path) 168 path = unicode_filename(path)
153 return path 169 return path
154 170
155 171
181 prepped.append(abs_file(p)) 197 prepped.append(abs_file(p))
182 return prepped 198 return prepped
183 199
184 200
185 class TreeMatcher(object): 201 class TreeMatcher(object):
186 """A matcher for files in a tree.""" 202 """A matcher for files in a tree.
187 def __init__(self, directories): 203
188 self.dirs = list(directories) 204 Construct with a list of paths, either files or directories. Paths match
205 with the `match` method if they are one of the files, or if they are
206 somewhere in a subtree rooted at one of the directories.
207
208 """
209 def __init__(self, paths):
210 self.paths = list(paths)
189 211
190 def __repr__(self): 212 def __repr__(self):
191 return "<TreeMatcher %r>" % self.dirs 213 return "<TreeMatcher %r>" % self.paths
192 214
193 def info(self): 215 def info(self):
194 """A list of strings for displaying when dumping state.""" 216 """A list of strings for displaying when dumping state."""
195 return self.dirs 217 return self.paths
196 218
197 def match(self, fpath): 219 def match(self, fpath):
198 """Does `fpath` indicate a file in one of our trees?""" 220 """Does `fpath` indicate a file in one of our trees?"""
199 for d in self.dirs: 221 for p in self.paths:
200 if fpath.startswith(d): 222 if fpath.startswith(p):
201 if fpath == d: 223 if fpath == p:
202 # This is the same file! 224 # This is the same file!
203 return True 225 return True
204 if fpath[len(d)] == os.sep: 226 if fpath[len(p)] == os.sep:
205 # This is a file in the directory 227 # This is a file in the directory
206 return True 228 return True
207 return False 229 return False
208 230
209 231
241 self.pats = pats[:] 263 self.pats = pats[:]
242 # fnmatch is platform-specific. On Windows, it does the Windows thing 264 # fnmatch is platform-specific. On Windows, it does the Windows thing
243 # of treating / and \ as equivalent. But on other platforms, we need to 265 # of treating / and \ as equivalent. But on other platforms, we need to
244 # take care of that ourselves. 266 # take care of that ourselves.
245 fnpats = (fnmatch.translate(p) for p in pats) 267 fnpats = (fnmatch.translate(p) for p in pats)
246 fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats) 268 # Python3.7 fnmatch translates "/" as "/", before that, it translates as "\/",
269 # so we have to deal with maybe a backslash.
270 fnpats = (re.sub(r"\\?/", r"[\\\\/]", p) for p in fnpats)
271 flags = 0
247 if env.WINDOWS: 272 if env.WINDOWS:
248 # Windows is also case-insensitive. BTW: the regex docs say that 273 # Windows is also case-insensitive, so make the regex case-insensitive.
249 # flags like (?i) have to be at the beginning, but fnmatch puts 274 flags |= re.IGNORECASE
250 # them at the end, and having two there seems to work fine. 275 self.re = re.compile(join_regex(fnpats), flags=flags)
251 fnpats = (p + "(?i)" for p in fnpats)
252 self.re = re.compile(join_regex(fnpats))
253 276
254 def __repr__(self): 277 def __repr__(self):
255 return "<FnmatchMatcher %r>" % self.pats 278 return "<FnmatchMatcher %r>" % self.pats
256 279
257 def info(self): 280 def info(self):
285 308
286 """ 309 """
287 def __init__(self): 310 def __init__(self):
288 self.aliases = [] 311 self.aliases = []
289 312
313 def pprint(self): # pragma: debugging
314 """Dump the important parts of the PathAliases, for debugging."""
315 for regex, result in self.aliases:
316 print("{0!r} --> {1!r}".format(regex.pattern, result))
317
290 def add(self, pattern, result): 318 def add(self, pattern, result):
291 """Add the `pattern`/`result` pair to the list of aliases. 319 """Add the `pattern`/`result` pair to the list of aliases.
292 320
293 `pattern` is an `fnmatch`-style pattern. `result` is a simple 321 `pattern` is an `fnmatch`-style pattern. `result` is a simple
294 string. When mapping paths, if a path starts with a match against 322 string. When mapping paths, if a path starts with a match against
298 326
299 `pattern` can't end with a wildcard component, since that would 327 `pattern` can't end with a wildcard component, since that would
300 match an entire tree, and not just its root. 328 match an entire tree, and not just its root.
301 329
302 """ 330 """
331 if len(pattern) > 1:
332 pattern = pattern.rstrip(r"\/")
333
303 # The pattern can't end with a wildcard component. 334 # The pattern can't end with a wildcard component.
304 pattern = pattern.rstrip(r"\/")
305 if pattern.endswith("*"): 335 if pattern.endswith("*"):
306 raise CoverageException("Pattern must not end with wildcards.") 336 raise CoverageException("Pattern must not end with wildcards.")
307 pattern_sep = sep(pattern) 337 pattern_sep = sep(pattern)
308 338
309 # The pattern is meant to match a filepath. Let's make it absolute 339 # The pattern is meant to match a filepath. Let's make it absolute
310 # unless it already is, or is meant to match any prefix. 340 # unless it already is, or is meant to match any prefix.
311 if not pattern.startswith('*') and not isabs_anywhere(pattern): 341 if not pattern.startswith('*') and not isabs_anywhere(pattern):
312 pattern = abs_file(pattern) 342 pattern = abs_file(pattern)
313 pattern += pattern_sep 343 if not pattern.endswith(pattern_sep):
344 pattern += pattern_sep
314 345
315 # Make a regex from the pattern. fnmatch always adds a \Z to 346 # Make a regex from the pattern. fnmatch always adds a \Z to
316 # match the whole string, which we don't want. 347 # match the whole string, which we don't want, so we remove the \Z.
317 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') 348 # While removing it, we only replace \Z if followed by paren, or at
349 # end, to keep from destroying a literal \Z in the pattern.
350 regex_pat = fnmatch.translate(pattern)
351 regex_pat = re.sub(r'\\Z(\(|$)', r'\1', regex_pat)
318 352
319 # We want */a/b.py to match on Windows too, so change slash to match 353 # We want */a/b.py to match on Windows too, so change slash to match
320 # either separator. 354 # either separator.
321 regex_pat = regex_pat.replace(r"\/", r"[\\/]") 355 regex_pat = regex_pat.replace(r"\/", r"[\\/]")
322 # We want case-insensitive matching, so add that flag. 356 # We want case-insensitive matching, so add that flag.
323 regex = re.compile(r"(?i)" + regex_pat) 357 regex = re.compile(r"(?i)" + regex_pat)
324 358
325 # Normalize the result: it must end with a path separator. 359 # Normalize the result: it must end with a path separator.
326 result_sep = sep(result) 360 result_sep = sep(result)
327 result = result.rstrip(r"\/") + result_sep 361 result = result.rstrip(r"\/") + result_sep
328 self.aliases.append((regex, result, pattern_sep, result_sep)) 362 self.aliases.append((regex, result))
329 363
330 def map(self, path): 364 def map(self, path):
331 """Map `path` through the aliases. 365 """Map `path` through the aliases.
332 366
333 `path` is checked against all of the patterns. The first pattern to 367 `path` is checked against all of the patterns. The first pattern to
341 Returns the mapped path. If a mapping has happened, this is a 375 Returns the mapped path. If a mapping has happened, this is a
342 canonical path. If no mapping has happened, it is the original value 376 canonical path. If no mapping has happened, it is the original value
343 of `path` unchanged. 377 of `path` unchanged.
344 378
345 """ 379 """
346 for regex, result, pattern_sep, result_sep in self.aliases: 380 for regex, result in self.aliases:
347 m = regex.match(path) 381 m = regex.match(path)
348 if m: 382 if m:
349 new = path.replace(m.group(0), result) 383 new = path.replace(m.group(0), result)
350 if pattern_sep != result_sep: 384 new = new.replace(sep(path), sep(result))
351 new = new.replace(pattern_sep, result_sep)
352 new = canonical_filename(new) 385 new = canonical_filename(new)
353 return new 386 return new
354 return path 387 return path
355 388
356 389

eric ide

mercurial