--- a/DebugClients/Python/coverage/files.py Fri Apr 04 22:57:07 2014 +0200 +++ b/DebugClients/Python/coverage/files.py Thu Apr 10 23:02:20 2014 +0200 @@ -1,29 +1,32 @@ """File wrangling.""" -import os, sys +from .backward import to_string +from .misc import CoverageException +import fnmatch, os, os.path, re, sys +import ntpath, posixpath class FileLocator(object): """Understand how filenames work.""" def __init__(self): - self.relative_dir = self.abs_file(os.curdir) + os.sep + # The absolute path to our current directory. + self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) # Cache of results of calling the canonical_filename() method, to # avoid duplicating work. self.canonical_filename_cache = {} - def abs_file(self, filename): - """Return the absolute normalized form of `filename`.""" - return os.path.normcase(os.path.abspath(os.path.realpath(filename))) - def relative_filename(self, filename): """Return the relative form of `filename`. The filename will be relative to the current directory when the - FileLocator was constructed. + `FileLocator` was constructed. """ - return filename.replace(self.relative_dir, "") + fnorm = os.path.normcase(filename) + if fnorm.startswith(self.relative_dir): + filename = filename[len(self.relative_dir):] + return filename def canonical_filename(self, filename): """Return a canonical filename for `filename`. @@ -32,17 +35,15 @@ """ if filename not in self.canonical_filename_cache: - f = filename - if os.path.isabs(f) and not os.path.exists(f): - if self.get_zip_data(f) is None: - f = os.path.basename(f) - if not os.path.isabs(f): + if not os.path.isabs(filename): for path in [os.curdir] + sys.path: - g = os.path.join(path, f) - if os.path.exists(g): - f = g + if path is None: + continue + f = os.path.join(path, filename) + if os.path.exists(f): + filename = f break - cf = self.abs_file(f) + cf = abs_file(filename) self.canonical_filename_cache[filename] = cf return self.canonical_filename_cache[filename] @@ -67,10 +68,242 @@ data = zi.get_data(parts[1]) except IOError: continue - if sys.hexversion > 0x03000000: - data = data.decode('utf8') # TODO: How to do this properly? - return data + return to_string(data) return None -# -# eflag: FileType = Python2 + +if sys.platform == 'win32': + + def actual_path(path): + """Get the actual path of `path`, including the correct case.""" + if path in actual_path.cache: + return actual_path.cache[path] + + head, tail = os.path.split(path) + if not tail: + actpath = head + elif not head: + actpath = tail + else: + head = actual_path(head) + if head in actual_path.list_cache: + files = actual_path.list_cache[head] + else: + try: + files = os.listdir(head) + except OSError: + files = [] + actual_path.list_cache[head] = files + normtail = os.path.normcase(tail) + for f in files: + if os.path.normcase(f) == normtail: + tail = f + break + actpath = os.path.join(head, tail) + actual_path.cache[path] = actpath + return actpath + + actual_path.cache = {} + actual_path.list_cache = {} + +else: + def actual_path(filename): + """The actual path for non-Windows platforms.""" + return filename + + +def abs_file(filename): + """Return the absolute normalized form of `filename`.""" + path = os.path.expandvars(os.path.expanduser(filename)) + path = os.path.abspath(os.path.realpath(path)) + path = actual_path(path) + return path + + +def isabs_anywhere(filename): + """Is `filename` an absolute path on any OS?""" + return ntpath.isabs(filename) or posixpath.isabs(filename) + + +def prep_patterns(patterns): + """Prepare the file patterns for use in a `FnmatchMatcher`. + + If a pattern starts with a wildcard, it is used as a pattern + as-is. If it does not start with a wildcard, then it is made + absolute with the current directory. + + If `patterns` is None, an empty list is returned. + + """ + prepped = [] + for p in patterns or []: + if p.startswith("*") or p.startswith("?"): + prepped.append(p) + else: + prepped.append(abs_file(p)) + return prepped + + +class TreeMatcher(object): + """A matcher for files in a tree.""" + def __init__(self, directories): + self.dirs = directories[:] + + def __repr__(self): + return "<TreeMatcher %r>" % self.dirs + + def info(self): + """A list of strings for displaying when dumping state.""" + return self.dirs + + def add(self, directory): + """Add another directory to the list we match for.""" + self.dirs.append(directory) + + def match(self, fpath): + """Does `fpath` indicate a file in one of our trees?""" + for d in self.dirs: + if fpath.startswith(d): + if fpath == d: + # This is the same file! + return True + if fpath[len(d)] == os.sep: + # This is a file in the directory + return True + return False + + +class FnmatchMatcher(object): + """A matcher for files by filename pattern.""" + def __init__(self, pats): + self.pats = pats[:] + + def __repr__(self): + return "<FnmatchMatcher %r>" % self.pats + + def info(self): + """A list of strings for displaying when dumping state.""" + return self.pats + + def match(self, fpath): + """Does `fpath` match one of our filename patterns?""" + for pat in self.pats: + if fnmatch.fnmatch(fpath, pat): + return True + return False + + +def sep(s): + """Find the path separator used in this string, or os.sep if none.""" + sep_match = re.search(r"[\\/]", s) + if sep_match: + the_sep = sep_match.group(0) + else: + the_sep = os.sep + return the_sep + + +class PathAliases(object): + """A collection of aliases for paths. + + When combining data files from remote machines, often the paths to source + code are different, for example, due to OS differences, or because of + serialized checkouts on continuous integration machines. + + A `PathAliases` object tracks a list of pattern/result pairs, and can + map a path through those aliases to produce a unified path. + + `locator` is a FileLocator that is used to canonicalize the results. + + """ + def __init__(self, locator=None): + self.aliases = [] + self.locator = locator + + def add(self, pattern, result): + """Add the `pattern`/`result` pair to the list of aliases. + + `pattern` is an `fnmatch`-style pattern. `result` is a simple + string. When mapping paths, if a path starts with a match against + `pattern`, then that match is replaced with `result`. This models + isomorphic source trees being rooted at different places on two + different machines. + + `pattern` can't end with a wildcard component, since that would + match an entire tree, and not just its root. + + """ + # The pattern can't end with a wildcard component. + pattern = pattern.rstrip(r"\/") + if pattern.endswith("*"): + raise CoverageException("Pattern must not end with wildcards.") + pattern_sep = sep(pattern) + + # The pattern is meant to match a filepath. Let's make it absolute + # unless it already is, or is meant to match any prefix. + if not pattern.startswith('*') and not isabs_anywhere(pattern): + pattern = abs_file(pattern) + pattern += pattern_sep + + # Make a regex from the pattern. fnmatch always adds a \Z or $ to + # match the whole string, which we don't want. + regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') + if regex_pat.endswith("$"): + regex_pat = regex_pat[:-1] + # We want */a/b.py to match on Windows too, so change slash to match + # either separator. + regex_pat = regex_pat.replace(r"\/", r"[\\/]") + # We want case-insensitive matching, so add that flag. + regex = re.compile(r"(?i)" + regex_pat) + + # Normalize the result: it must end with a path separator. + result_sep = sep(result) + result = result.rstrip(r"\/") + result_sep + self.aliases.append((regex, result, pattern_sep, result_sep)) + + def map(self, path): + """Map `path` through the aliases. + + `path` is checked against all of the patterns. The first pattern to + match is used to replace the root of the path with the result root. + Only one pattern is ever used. If no patterns match, `path` is + returned unchanged. + + The separator style in the result is made to match that of the result + in the alias. + + """ + for regex, result, pattern_sep, result_sep in self.aliases: + m = regex.match(path) + if m: + new = path.replace(m.group(0), result) + if pattern_sep != result_sep: + new = new.replace(pattern_sep, result_sep) + if self.locator: + new = self.locator.canonical_filename(new) + return new + return path + + +def find_python_files(dirname): + """Yield all of the importable Python files in `dirname`, recursively. + + To be importable, the files have to be in a directory with a __init__.py, + except for `dirname` itself, which isn't required to have one. The + assumption is that `dirname` was specified directly, so the user knows + best, but subdirectories are checked for a __init__.py to be sure we only + find the importable files. + + """ + for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): + if i > 0 and '__init__.py' not in filenames: + # If a directory doesn't have __init__.py, then it isn't + # importable and neither are its files + del dirnames[:] + continue + for filename in filenames: + # We're only interested in files that look like reasonable Python + # files: Must end with .py or .pyw, and must not have certain funny + # characters that probably mean they are editor junk. + if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): + yield os.path.join(dirpath, filename)