1 """File wrangling.""" |
1 """File wrangling.""" |
2 |
2 |
3 import os, sys |
3 from .backward import to_string |
|
4 from .misc import CoverageException |
|
5 import fnmatch, os, os.path, re, sys |
|
6 import ntpath, posixpath |
4 |
7 |
5 class FileLocator(object): |
8 class FileLocator(object): |
6 """Understand how filenames work.""" |
9 """Understand how filenames work.""" |
7 |
10 |
8 def __init__(self): |
11 def __init__(self): |
9 self.relative_dir = self.abs_file(os.curdir) + os.sep |
12 # The absolute path to our current directory. |
|
13 self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) |
10 |
14 |
11 # Cache of results of calling the canonical_filename() method, to |
15 # Cache of results of calling the canonical_filename() method, to |
12 # avoid duplicating work. |
16 # avoid duplicating work. |
13 self.canonical_filename_cache = {} |
17 self.canonical_filename_cache = {} |
14 |
18 |
15 def abs_file(self, filename): |
|
16 """Return the absolute normalized form of `filename`.""" |
|
17 return os.path.normcase(os.path.abspath(os.path.realpath(filename))) |
|
18 |
|
19 def relative_filename(self, filename): |
19 def relative_filename(self, filename): |
20 """Return the relative form of `filename`. |
20 """Return the relative form of `filename`. |
21 |
21 |
22 The filename will be relative to the current directory when the |
22 The filename will be relative to the current directory when the |
23 FileLocator was constructed. |
23 `FileLocator` was constructed. |
24 |
24 |
25 """ |
25 """ |
26 return filename.replace(self.relative_dir, "") |
26 fnorm = os.path.normcase(filename) |
|
27 if fnorm.startswith(self.relative_dir): |
|
28 filename = filename[len(self.relative_dir):] |
|
29 return filename |
27 |
30 |
28 def canonical_filename(self, filename): |
31 def canonical_filename(self, filename): |
29 """Return a canonical filename for `filename`. |
32 """Return a canonical filename for `filename`. |
30 |
33 |
31 An absolute path with no redundant components and normalized case. |
34 An absolute path with no redundant components and normalized case. |
32 |
35 |
33 """ |
36 """ |
34 if filename not in self.canonical_filename_cache: |
37 if filename not in self.canonical_filename_cache: |
35 f = filename |
38 if not os.path.isabs(filename): |
36 if os.path.isabs(f) and not os.path.exists(f): |
|
37 if self.get_zip_data(f) is None: |
|
38 f = os.path.basename(f) |
|
39 if not os.path.isabs(f): |
|
40 for path in [os.curdir] + sys.path: |
39 for path in [os.curdir] + sys.path: |
41 g = os.path.join(path, f) |
40 if path is None: |
42 if os.path.exists(g): |
41 continue |
43 f = g |
42 f = os.path.join(path, filename) |
|
43 if os.path.exists(f): |
|
44 filename = f |
44 break |
45 break |
45 cf = self.abs_file(f) |
46 cf = abs_file(filename) |
46 self.canonical_filename_cache[filename] = cf |
47 self.canonical_filename_cache[filename] = cf |
47 return self.canonical_filename_cache[filename] |
48 return self.canonical_filename_cache[filename] |
48 |
49 |
49 def get_zip_data(self, filename): |
50 def get_zip_data(self, filename): |
50 """Get data from `filename` if it is a zip file path. |
51 """Get data from `filename` if it is a zip file path. |
65 continue |
66 continue |
66 try: |
67 try: |
67 data = zi.get_data(parts[1]) |
68 data = zi.get_data(parts[1]) |
68 except IOError: |
69 except IOError: |
69 continue |
70 continue |
70 if sys.hexversion > 0x03000000: |
71 return to_string(data) |
71 data = data.decode('utf8') # TODO: How to do this properly? |
|
72 return data |
|
73 return None |
72 return None |
74 |
73 |
75 # |
74 |
76 # eflag: FileType = Python2 |
75 if sys.platform == 'win32': |
|
76 |
|
77 def actual_path(path): |
|
78 """Get the actual path of `path`, including the correct case.""" |
|
79 if path in actual_path.cache: |
|
80 return actual_path.cache[path] |
|
81 |
|
82 head, tail = os.path.split(path) |
|
83 if not tail: |
|
84 actpath = head |
|
85 elif not head: |
|
86 actpath = tail |
|
87 else: |
|
88 head = actual_path(head) |
|
89 if head in actual_path.list_cache: |
|
90 files = actual_path.list_cache[head] |
|
91 else: |
|
92 try: |
|
93 files = os.listdir(head) |
|
94 except OSError: |
|
95 files = [] |
|
96 actual_path.list_cache[head] = files |
|
97 normtail = os.path.normcase(tail) |
|
98 for f in files: |
|
99 if os.path.normcase(f) == normtail: |
|
100 tail = f |
|
101 break |
|
102 actpath = os.path.join(head, tail) |
|
103 actual_path.cache[path] = actpath |
|
104 return actpath |
|
105 |
|
106 actual_path.cache = {} |
|
107 actual_path.list_cache = {} |
|
108 |
|
109 else: |
|
110 def actual_path(filename): |
|
111 """The actual path for non-Windows platforms.""" |
|
112 return filename |
|
113 |
|
114 |
|
115 def abs_file(filename): |
|
116 """Return the absolute normalized form of `filename`.""" |
|
117 path = os.path.expandvars(os.path.expanduser(filename)) |
|
118 path = os.path.abspath(os.path.realpath(path)) |
|
119 path = actual_path(path) |
|
120 return path |
|
121 |
|
122 |
|
123 def isabs_anywhere(filename): |
|
124 """Is `filename` an absolute path on any OS?""" |
|
125 return ntpath.isabs(filename) or posixpath.isabs(filename) |
|
126 |
|
127 |
|
128 def prep_patterns(patterns): |
|
129 """Prepare the file patterns for use in a `FnmatchMatcher`. |
|
130 |
|
131 If a pattern starts with a wildcard, it is used as a pattern |
|
132 as-is. If it does not start with a wildcard, then it is made |
|
133 absolute with the current directory. |
|
134 |
|
135 If `patterns` is None, an empty list is returned. |
|
136 |
|
137 """ |
|
138 prepped = [] |
|
139 for p in patterns or []: |
|
140 if p.startswith("*") or p.startswith("?"): |
|
141 prepped.append(p) |
|
142 else: |
|
143 prepped.append(abs_file(p)) |
|
144 return prepped |
|
145 |
|
146 |
|
147 class TreeMatcher(object): |
|
148 """A matcher for files in a tree.""" |
|
149 def __init__(self, directories): |
|
150 self.dirs = directories[:] |
|
151 |
|
152 def __repr__(self): |
|
153 return "<TreeMatcher %r>" % self.dirs |
|
154 |
|
155 def info(self): |
|
156 """A list of strings for displaying when dumping state.""" |
|
157 return self.dirs |
|
158 |
|
159 def add(self, directory): |
|
160 """Add another directory to the list we match for.""" |
|
161 self.dirs.append(directory) |
|
162 |
|
163 def match(self, fpath): |
|
164 """Does `fpath` indicate a file in one of our trees?""" |
|
165 for d in self.dirs: |
|
166 if fpath.startswith(d): |
|
167 if fpath == d: |
|
168 # This is the same file! |
|
169 return True |
|
170 if fpath[len(d)] == os.sep: |
|
171 # This is a file in the directory |
|
172 return True |
|
173 return False |
|
174 |
|
175 |
|
176 class FnmatchMatcher(object): |
|
177 """A matcher for files by filename pattern.""" |
|
178 def __init__(self, pats): |
|
179 self.pats = pats[:] |
|
180 |
|
181 def __repr__(self): |
|
182 return "<FnmatchMatcher %r>" % self.pats |
|
183 |
|
184 def info(self): |
|
185 """A list of strings for displaying when dumping state.""" |
|
186 return self.pats |
|
187 |
|
188 def match(self, fpath): |
|
189 """Does `fpath` match one of our filename patterns?""" |
|
190 for pat in self.pats: |
|
191 if fnmatch.fnmatch(fpath, pat): |
|
192 return True |
|
193 return False |
|
194 |
|
195 |
|
196 def sep(s): |
|
197 """Find the path separator used in this string, or os.sep if none.""" |
|
198 sep_match = re.search(r"[\\/]", s) |
|
199 if sep_match: |
|
200 the_sep = sep_match.group(0) |
|
201 else: |
|
202 the_sep = os.sep |
|
203 return the_sep |
|
204 |
|
205 |
|
206 class PathAliases(object): |
|
207 """A collection of aliases for paths. |
|
208 |
|
209 When combining data files from remote machines, often the paths to source |
|
210 code are different, for example, due to OS differences, or because of |
|
211 serialized checkouts on continuous integration machines. |
|
212 |
|
213 A `PathAliases` object tracks a list of pattern/result pairs, and can |
|
214 map a path through those aliases to produce a unified path. |
|
215 |
|
216 `locator` is a FileLocator that is used to canonicalize the results. |
|
217 |
|
218 """ |
|
219 def __init__(self, locator=None): |
|
220 self.aliases = [] |
|
221 self.locator = locator |
|
222 |
|
223 def add(self, pattern, result): |
|
224 """Add the `pattern`/`result` pair to the list of aliases. |
|
225 |
|
226 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
|
227 string. When mapping paths, if a path starts with a match against |
|
228 `pattern`, then that match is replaced with `result`. This models |
|
229 isomorphic source trees being rooted at different places on two |
|
230 different machines. |
|
231 |
|
232 `pattern` can't end with a wildcard component, since that would |
|
233 match an entire tree, and not just its root. |
|
234 |
|
235 """ |
|
236 # The pattern can't end with a wildcard component. |
|
237 pattern = pattern.rstrip(r"\/") |
|
238 if pattern.endswith("*"): |
|
239 raise CoverageException("Pattern must not end with wildcards.") |
|
240 pattern_sep = sep(pattern) |
|
241 |
|
242 # The pattern is meant to match a filepath. Let's make it absolute |
|
243 # unless it already is, or is meant to match any prefix. |
|
244 if not pattern.startswith('*') and not isabs_anywhere(pattern): |
|
245 pattern = abs_file(pattern) |
|
246 pattern += pattern_sep |
|
247 |
|
248 # Make a regex from the pattern. fnmatch always adds a \Z or $ to |
|
249 # match the whole string, which we don't want. |
|
250 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') |
|
251 if regex_pat.endswith("$"): |
|
252 regex_pat = regex_pat[:-1] |
|
253 # We want */a/b.py to match on Windows too, so change slash to match |
|
254 # either separator. |
|
255 regex_pat = regex_pat.replace(r"\/", r"[\\/]") |
|
256 # We want case-insensitive matching, so add that flag. |
|
257 regex = re.compile(r"(?i)" + regex_pat) |
|
258 |
|
259 # Normalize the result: it must end with a path separator. |
|
260 result_sep = sep(result) |
|
261 result = result.rstrip(r"\/") + result_sep |
|
262 self.aliases.append((regex, result, pattern_sep, result_sep)) |
|
263 |
|
264 def map(self, path): |
|
265 """Map `path` through the aliases. |
|
266 |
|
267 `path` is checked against all of the patterns. The first pattern to |
|
268 match is used to replace the root of the path with the result root. |
|
269 Only one pattern is ever used. If no patterns match, `path` is |
|
270 returned unchanged. |
|
271 |
|
272 The separator style in the result is made to match that of the result |
|
273 in the alias. |
|
274 |
|
275 """ |
|
276 for regex, result, pattern_sep, result_sep in self.aliases: |
|
277 m = regex.match(path) |
|
278 if m: |
|
279 new = path.replace(m.group(0), result) |
|
280 if pattern_sep != result_sep: |
|
281 new = new.replace(pattern_sep, result_sep) |
|
282 if self.locator: |
|
283 new = self.locator.canonical_filename(new) |
|
284 return new |
|
285 return path |
|
286 |
|
287 |
|
288 def find_python_files(dirname): |
|
289 """Yield all of the importable Python files in `dirname`, recursively. |
|
290 |
|
291 To be importable, the files have to be in a directory with a __init__.py, |
|
292 except for `dirname` itself, which isn't required to have one. The |
|
293 assumption is that `dirname` was specified directly, so the user knows |
|
294 best, but subdirectories are checked for a __init__.py to be sure we only |
|
295 find the importable files. |
|
296 |
|
297 """ |
|
298 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
|
299 if i > 0 and '__init__.py' not in filenames: |
|
300 # If a directory doesn't have __init__.py, then it isn't |
|
301 # importable and neither are its files |
|
302 del dirnames[:] |
|
303 continue |
|
304 for filename in filenames: |
|
305 # We're only interested in files that look like reasonable Python |
|
306 # files: Must end with .py or .pyw, and must not have certain funny |
|
307 # characters that probably mean they are editor junk. |
|
308 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
|
309 yield os.path.join(dirpath, filename) |