diff -r bedab77d0fa3 -r d6c795b5ce33 DebugClients/Python/coverage/files.py --- a/DebugClients/Python/coverage/files.py Sat Apr 07 13:17:06 2018 +0200 +++ b/DebugClients/Python/coverage/files.py Sat Apr 07 13:35:10 2018 +0200 @@ -3,6 +3,7 @@ """File wrangling.""" +import hashlib import fnmatch import ntpath import os @@ -63,7 +64,11 @@ if path is None: continue f = os.path.join(path, filename) - if os.path.exists(f): + try: + exists = os.path.exists(f) + except UnicodeError: + exists = False + if exists: filename = f break cf = abs_file(filename) @@ -71,6 +76,9 @@ return CANONICAL_FILENAME_CACHE[filename] +MAX_FLAT = 200 + +@contract(filename='unicode', returns='unicode') def flat_rootname(filename): """A base for a flat file name to correspond to this file. @@ -82,7 +90,11 @@ """ name = ntpath.splitdrive(filename)[1] - return re.sub(r"[\\/.:]", "_", name) + name = re.sub(r"[\\/.:]", "_", name) + if len(name) > MAX_FLAT: + h = hashlib.sha1(name.encode('UTF-8')).hexdigest() + name = name[-(MAX_FLAT-len(h)-1):] + '_' + h + return name if env.WINDOWS: @@ -147,7 +159,11 @@ def abs_file(filename): """Return the absolute normalized form of `filename`.""" path = os.path.expandvars(os.path.expanduser(filename)) - path = os.path.abspath(os.path.realpath(path)) + try: + path = os.path.realpath(path) + except UnicodeError: + pass + path = os.path.abspath(path) path = actual_path(path) path = unicode_filename(path) return path @@ -183,25 +199,31 @@ class TreeMatcher(object): - """A matcher for files in a tree.""" - def __init__(self, directories): - self.dirs = list(directories) + """A matcher for files in a tree. + + Construct with a list of paths, either files or directories. Paths match + with the `match` method if they are one of the files, or if they are + somewhere in a subtree rooted at one of the directories. + + """ + def __init__(self, paths): + self.paths = list(paths) def __repr__(self): - return "<TreeMatcher %r>" % self.dirs + return "<TreeMatcher %r>" % self.paths def info(self): """A list of strings for displaying when dumping state.""" - return self.dirs + return self.paths def match(self, fpath): """Does `fpath` indicate a file in one of our trees?""" - for d in self.dirs: - if fpath.startswith(d): - if fpath == d: + for p in self.paths: + if fpath.startswith(p): + if fpath == p: # This is the same file! return True - if fpath[len(d)] == os.sep: + if fpath[len(p)] == os.sep: # This is a file in the directory return True return False @@ -243,13 +265,14 @@ # of treating / and \ as equivalent. But on other platforms, we need to # take care of that ourselves. fnpats = (fnmatch.translate(p) for p in pats) - fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats) + # Python3.7 fnmatch translates "/" as "/", before that, it translates as "\/", + # so we have to deal with maybe a backslash. + fnpats = (re.sub(r"\\?/", r"[\\\\/]", p) for p in fnpats) + flags = 0 if env.WINDOWS: - # Windows is also case-insensitive. BTW: the regex docs say that - # flags like (?i) have to be at the beginning, but fnmatch puts - # them at the end, and having two there seems to work fine. - fnpats = (p + "(?i)" for p in fnpats) - self.re = re.compile(join_regex(fnpats)) + # Windows is also case-insensitive, so make the regex case-insensitive. + flags |= re.IGNORECASE + self.re = re.compile(join_regex(fnpats), flags=flags) def __repr__(self): return "<FnmatchMatcher %r>" % self.pats @@ -287,6 +310,11 @@ def __init__(self): self.aliases = [] + def pprint(self): # pragma: debugging + """Dump the important parts of the PathAliases, for debugging.""" + for regex, result in self.aliases: + print("{0!r} --> {1!r}".format(regex.pattern, result)) + def add(self, pattern, result): """Add the `pattern`/`result` pair to the list of aliases. @@ -300,8 +328,10 @@ match an entire tree, and not just its root. """ + if len(pattern) > 1: + pattern = pattern.rstrip(r"\/") + # The pattern can't end with a wildcard component. - pattern = pattern.rstrip(r"\/") if pattern.endswith("*"): raise CoverageException("Pattern must not end with wildcards.") pattern_sep = sep(pattern) @@ -310,11 +340,15 @@ # unless it already is, or is meant to match any prefix. if not pattern.startswith('*') and not isabs_anywhere(pattern): pattern = abs_file(pattern) - pattern += pattern_sep + if not pattern.endswith(pattern_sep): + pattern += pattern_sep # Make a regex from the pattern. fnmatch always adds a \Z to - # match the whole string, which we don't want. - regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') + # match the whole string, which we don't want, so we remove the \Z. + # While removing it, we only replace \Z if followed by paren, or at + # end, to keep from destroying a literal \Z in the pattern. + regex_pat = fnmatch.translate(pattern) + regex_pat = re.sub(r'\\Z(\(|$)', r'\1', regex_pat) # We want */a/b.py to match on Windows too, so change slash to match # either separator. @@ -325,7 +359,7 @@ # Normalize the result: it must end with a path separator. result_sep = sep(result) result = result.rstrip(r"\/") + result_sep - self.aliases.append((regex, result, pattern_sep, result_sep)) + self.aliases.append((regex, result)) def map(self, path): """Map `path` through the aliases. @@ -343,12 +377,11 @@ of `path` unchanged. """ - for regex, result, pattern_sep, result_sep in self.aliases: + for regex, result in self.aliases: m = regex.match(path) if m: new = path.replace(m.group(0), result) - if pattern_sep != result_sep: - new = new.replace(pattern_sep, result_sep) + new = new.replace(sep(path), sep(result)) new = canonical_filename(new) return new return path