|
1 """Filename matching with shell patterns. |
|
2 |
|
3 fnmatch(FILENAME, PATTERN) matches according to the local convention. |
|
4 fnmatchcase(FILENAME, PATTERN) always takes case in account. |
|
5 |
|
6 The functions operate by translating the pattern into a regular |
|
7 expression. They cache the compiled regular expressions for speed. |
|
8 |
|
9 The function translate(PATTERN) returns a regular expression |
|
10 corresponding to PATTERN. (It does not compile it.) |
|
11 |
|
12 Based on code from fnmatch.py file distributed with Python 2.6. |
|
13 |
|
14 Licensed under PSF License (see LICENSE.txt file). |
|
15 |
|
16 Changes to original fnmatch module: |
|
17 - translate function supports ``*`` and ``**`` similarly to fnmatch C library |
|
18 """ |
|
19 |
|
20 import os |
|
21 import re |
|
22 |
|
23 __all__ = ["fnmatch", "fnmatchcase", "translate"] |
|
24 |
|
25 _cache = {} |
|
26 |
|
27 LEFT_BRACE = re.compile( |
|
28 r""" |
|
29 |
|
30 (?: ^ | [^\\] ) # Beginning of string or a character besides "\" |
|
31 |
|
32 \{ # "{" |
|
33 |
|
34 """, re.VERBOSE |
|
35 ) |
|
36 |
|
37 RIGHT_BRACE = re.compile( |
|
38 r""" |
|
39 |
|
40 (?: ^ | [^\\] ) # Beginning of string or a character besides "\" |
|
41 |
|
42 \} # "}" |
|
43 |
|
44 """, re.VERBOSE |
|
45 ) |
|
46 |
|
47 NUMERIC_RANGE = re.compile( |
|
48 r""" |
|
49 ( # Capture a number |
|
50 [+-] ? # Zero or one "+" or "-" characters |
|
51 \d + # One or more digits |
|
52 ) |
|
53 |
|
54 \.\. # ".." |
|
55 |
|
56 ( # Capture a number |
|
57 [+-] ? # Zero or one "+" or "-" characters |
|
58 \d + # One or more digits |
|
59 ) |
|
60 """, re.VERBOSE |
|
61 ) |
|
62 |
|
63 |
|
64 def fnmatch(name, pat): |
|
65 """Test whether FILENAME matches PATTERN. |
|
66 |
|
67 Patterns are Unix shell style: |
|
68 |
|
69 - ``*`` matches everything except path separator |
|
70 - ``**`` matches everything |
|
71 - ``?`` matches any single character |
|
72 - ``[seq]`` matches any character in seq |
|
73 - ``[!seq]`` matches any char not in seq |
|
74 - ``{s1,s2,s3}`` matches any of the strings given (separated by commas) |
|
75 |
|
76 An initial period in FILENAME is not special. |
|
77 Both FILENAME and PATTERN are first case-normalized |
|
78 if the operating system requires it. |
|
79 If you don't want this, use fnmatchcase(FILENAME, PATTERN). |
|
80 """ |
|
81 |
|
82 name = os.path.normpath(name).replace(os.sep, "/") |
|
83 return fnmatchcase(name, pat) |
|
84 |
|
85 |
|
86 def cached_translate(pat): |
|
87 if not pat in _cache: |
|
88 res, num_groups = translate(pat) |
|
89 regex = re.compile(res) |
|
90 _cache[pat] = regex, num_groups |
|
91 return _cache[pat] |
|
92 |
|
93 |
|
94 def fnmatchcase(name, pat): |
|
95 """Test whether FILENAME matches PATTERN, including case. |
|
96 |
|
97 This is a version of fnmatch() which doesn't case-normalize |
|
98 its arguments. |
|
99 """ |
|
100 |
|
101 regex, num_groups = cached_translate(pat) |
|
102 match = regex.match(name) |
|
103 if not match: |
|
104 return False |
|
105 pattern_matched = True |
|
106 for (num, (min_num, max_num)) in zip(match.groups(), num_groups): |
|
107 if num[0] == '0' or not (min_num <= int(num) <= max_num): |
|
108 pattern_matched = False |
|
109 break |
|
110 return pattern_matched |
|
111 |
|
112 |
|
113 def translate(pat, nested=False): |
|
114 """Translate a shell PATTERN to a regular expression. |
|
115 |
|
116 There is no way to quote meta-characters. |
|
117 """ |
|
118 |
|
119 index, length = 0, len(pat) # Current index and length of pattern |
|
120 brace_level = 0 |
|
121 in_brackets = False |
|
122 result = '' |
|
123 is_escaped = False |
|
124 matching_braces = (len(LEFT_BRACE.findall(pat)) == |
|
125 len(RIGHT_BRACE.findall(pat))) |
|
126 numeric_groups = [] |
|
127 while index < length: |
|
128 current_char = pat[index] |
|
129 index += 1 |
|
130 if current_char == '*': |
|
131 pos = index |
|
132 if pos < length and pat[pos] == '*': |
|
133 result += '.*' |
|
134 else: |
|
135 result += '[^/]*' |
|
136 elif current_char == '?': |
|
137 result += '.' |
|
138 elif current_char == '[': |
|
139 if in_brackets: |
|
140 result += '\\[' |
|
141 else: |
|
142 pos = index |
|
143 has_slash = False |
|
144 while pos < length and pat[pos] != ']': |
|
145 if pat[pos] == '/' and pat[pos-1] != '\\': |
|
146 has_slash = True |
|
147 break |
|
148 pos += 1 |
|
149 if has_slash: |
|
150 result += '\\[' + pat[index:(pos + 1)] + '\\]' |
|
151 index = pos + 2 |
|
152 else: |
|
153 if index < length and pat[index] in '!^': |
|
154 index += 1 |
|
155 result += '[^' |
|
156 else: |
|
157 result += '[' |
|
158 in_brackets = True |
|
159 elif current_char == '-': |
|
160 if in_brackets: |
|
161 result += current_char |
|
162 else: |
|
163 result += '\\' + current_char |
|
164 elif current_char == ']': |
|
165 result += current_char |
|
166 in_brackets = False |
|
167 elif current_char == '{': |
|
168 pos = index |
|
169 has_comma = False |
|
170 while pos < length and (pat[pos] != '}' or is_escaped): |
|
171 if pat[pos] == ',' and not is_escaped: |
|
172 has_comma = True |
|
173 break |
|
174 is_escaped = pat[pos] == '\\' and not is_escaped |
|
175 pos += 1 |
|
176 if not has_comma and pos < length: |
|
177 num_range = NUMERIC_RANGE.match(pat[index:pos]) |
|
178 if num_range: |
|
179 numeric_groups.append(map(int, num_range.groups())) |
|
180 result += "([+-]?\d+)" |
|
181 else: |
|
182 inner_result, inner_groups = translate(pat[index:pos], |
|
183 nested=True) |
|
184 result += '\\{%s\\}' % (inner_result,) |
|
185 numeric_groups += inner_groups |
|
186 index = pos + 1 |
|
187 elif matching_braces: |
|
188 result += '(?:' |
|
189 brace_level += 1 |
|
190 else: |
|
191 result += '\\{' |
|
192 elif current_char == ',': |
|
193 if brace_level > 0 and not is_escaped: |
|
194 result += '|' |
|
195 else: |
|
196 result += '\\,' |
|
197 elif current_char == '}': |
|
198 if brace_level > 0 and not is_escaped: |
|
199 result += ')' |
|
200 brace_level -= 1 |
|
201 else: |
|
202 result += '\\}' |
|
203 elif current_char == '/': |
|
204 if pat[index:(index + 3)] == "**/": |
|
205 result += "(?:/|/.*/)" |
|
206 index += 3 |
|
207 else: |
|
208 result += '/' |
|
209 elif current_char != '\\': |
|
210 result += re.escape(current_char) |
|
211 if current_char == '\\': |
|
212 if is_escaped: |
|
213 result += re.escape(current_char) |
|
214 is_escaped = not is_escaped |
|
215 else: |
|
216 is_escaped = False |
|
217 if not nested: |
|
218 result += '\Z(?ms)' |
|
219 return result, numeric_groups |