56 string = options.get(optname, default) |
50 string = options.get(optname, default) |
57 if isinstance(string, bool): |
51 if isinstance(string, bool): |
58 return string |
52 return string |
59 elif isinstance(string, int): |
53 elif isinstance(string, int): |
60 return bool(string) |
54 return bool(string) |
61 elif not isinstance(string, str): |
55 elif not isinstance(string, string_types): |
62 raise OptionError('Invalid type %r for option %s; use ' |
56 raise OptionError('Invalid type %r for option %s; use ' |
63 '1/0, yes/no, true/false, on/off' % ( |
57 '1/0, yes/no, true/false, on/off' % ( |
64 string, optname)) |
58 string, optname)) |
65 elif string.lower() in ('1', 'yes', 'true', 'on'): |
59 elif string.lower() in ('1', 'yes', 'true', 'on'): |
66 return True |
60 return True |
67 elif string.lower() in ('0', 'no', 'false', 'off'): |
61 elif string.lower() in ('0', 'no', 'false', 'off'): |
68 return False |
62 return False |
69 else: |
63 else: |
70 raise OptionError('Invalid value %r for option %s; use ' |
64 raise OptionError('Invalid value %r for option %s; use ' |
71 '1/0, yes/no, true/false, on/off' % ( |
65 '1/0, yes/no, true/false, on/off' % ( |
72 string, optname)) |
66 string, optname)) |
73 |
67 |
74 |
68 |
75 def get_int_opt(options, optname, default=None): |
69 def get_int_opt(options, optname, default=None): |
76 string = options.get(optname, default) |
70 string = options.get(optname, default) |
77 try: |
71 try: |
78 return int(string) |
72 return int(string) |
79 except TypeError: |
73 except TypeError: |
80 raise OptionError('Invalid type %r for option %s; you ' |
74 raise OptionError('Invalid type %r for option %s; you ' |
81 'must give an integer value' % ( |
75 'must give an integer value' % ( |
82 string, optname)) |
76 string, optname)) |
83 except ValueError: |
77 except ValueError: |
84 raise OptionError('Invalid value %r for option %s; you ' |
78 raise OptionError('Invalid value %r for option %s; you ' |
85 'must give an integer value' % ( |
79 'must give an integer value' % ( |
86 string, optname)) |
80 string, optname)) |
87 |
81 |
88 |
82 |
89 def get_list_opt(options, optname, default=None): |
83 def get_list_opt(options, optname, default=None): |
90 val = options.get(optname, default) |
84 val = options.get(optname, default) |
91 if isinstance(val, str): |
85 if isinstance(val, string_types): |
92 return val.split() |
86 return val.split() |
93 elif isinstance(val, (list, tuple)): |
87 elif isinstance(val, (list, tuple)): |
94 return list(val) |
88 return list(val) |
95 else: |
89 else: |
96 raise OptionError('Invalid type %r for option %s; you ' |
90 raise OptionError('Invalid type %r for option %s; you ' |
97 'must give a list value' % ( |
91 'must give a list value' % ( |
98 val, optname)) |
92 val, optname)) |
99 |
93 |
100 |
94 |
101 def docstring_headline(obj): |
95 def docstring_headline(obj): |
102 if not obj.__doc__: |
96 if not obj.__doc__: |
103 return '' |
97 return '' |
175 return True |
165 return True |
176 return False |
166 return False |
177 |
167 |
178 |
168 |
179 def doctype_matches(text, regex): |
169 def doctype_matches(text, regex): |
180 """ |
170 """Check if the doctype matches a regular expression (if present). |
181 Check if the doctype matches a regular expression (if present). |
171 |
182 Note that this method only checks the first part of a DOCTYPE. |
172 Note that this method only checks the first part of a DOCTYPE. |
183 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' |
173 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' |
184 """ |
174 """ |
185 m = doctype_lookup_re.match(text) |
175 m = doctype_lookup_re.match(text) |
186 if m is None: |
176 if m is None: |
187 return False |
177 return False |
188 doctype = m.group(2) |
178 doctype = m.group(2) |
189 return re.compile(regex).match(doctype.strip()) is not None |
179 return re.compile(regex, re.I).match(doctype.strip()) is not None |
190 |
180 |
191 |
181 |
192 def html_doctype_matches(text): |
182 def html_doctype_matches(text): |
193 """ |
183 """Check if the file looks like it has a html doctype.""" |
194 Check if the file looks like it has a html doctype. |
184 return doctype_matches(text, r'html') |
195 """ |
|
196 return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*') |
|
197 |
185 |
198 |
186 |
199 _looks_like_xml_cache = {} |
187 _looks_like_xml_cache = {} |
|
188 |
|
189 |
200 def looks_like_xml(text): |
190 def looks_like_xml(text): |
201 """ |
191 """Check if a doctype exists or if we have some tags.""" |
202 Check if a doctype exists or if we have some tags. |
192 if xml_decl_re.match(text): |
203 """ |
193 return True |
204 key = hash(text) |
194 key = hash(text) |
205 try: |
195 try: |
206 return _looks_like_xml_cache[key] |
196 return _looks_like_xml_cache[key] |
207 except KeyError: |
197 except KeyError: |
208 m = doctype_lookup_re.match(text) |
198 m = doctype_lookup_re.match(text) |
210 return True |
200 return True |
211 rv = tag_re.search(text[:1000]) is not None |
201 rv = tag_re.search(text[:1000]) is not None |
212 _looks_like_xml_cache[key] = rv |
202 _looks_like_xml_cache[key] = rv |
213 return rv |
203 return rv |
214 |
204 |
|
205 |
215 # Python narrow build compatibility |
206 # Python narrow build compatibility |
216 |
207 |
217 def _surrogatepair(c): |
208 def _surrogatepair(c): |
|
209 # Given a unicode character code |
|
210 # with length greater than 16 bits, |
|
211 # return the two 16 bit surrogate pair. |
|
212 # From example D28 of: |
|
213 # http://www.unicode.org/book/ch03.pdf |
218 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) |
214 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) |
219 |
215 |
|
216 |
220 def unirange(a, b): |
217 def unirange(a, b): |
221 """ |
218 """Returns a regular expression string to match the given non-BMP range.""" |
222 Returns a regular expression string to match the given non-BMP range. |
|
223 """ |
|
224 if b < a: |
219 if b < a: |
225 raise ValueError("Bad character range") |
220 raise ValueError("Bad character range") |
226 if a < 0x10000 or b < 0x10000: |
221 if a < 0x10000 or b < 0x10000: |
227 raise ValueError("unirange is only defined for non-BMP ranges") |
222 raise ValueError("unirange is only defined for non-BMP ranges") |
228 |
223 |
229 if sys.maxunicode > 0xffff: |
224 if sys.maxunicode > 0xffff: |
230 # wide build |
225 # wide build |
231 return '[%s-%s]' % (chr(a), chr(b)) |
226 return u'[%s-%s]' % (unichr(a), unichr(b)) |
232 else: |
227 else: |
233 # narrow build stores surrogates, and the 're' module handles them |
228 # narrow build stores surrogates, and the 're' module handles them |
234 # (incorrectly) as characters. Since there is still ordering among |
229 # (incorrectly) as characters. Since there is still ordering among |
235 # these characters, expand the range to one that it understands. Some |
230 # these characters, expand the range to one that it understands. Some |
236 # background in http://bugs.python.org/issue3665 and |
231 # background in http://bugs.python.org/issue3665 and |
240 # literals because jython [which uses the wide path] can't load this |
235 # literals because jython [which uses the wide path] can't load this |
241 # file if they are literals. |
236 # file if they are literals. |
242 ah, al = _surrogatepair(a) |
237 ah, al = _surrogatepair(a) |
243 bh, bl = _surrogatepair(b) |
238 bh, bl = _surrogatepair(b) |
244 if ah == bh: |
239 if ah == bh: |
245 return '(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl)) |
240 return u'(?:%s[%s-%s])' % (unichr(ah), unichr(al), unichr(bl)) |
246 else: |
241 else: |
247 buf = [] |
242 buf = [] |
248 buf.append('%s[%s-%s]' % |
243 buf.append(u'%s[%s-%s]' % |
249 (chr(ah), chr(al), |
244 (unichr(ah), unichr(al), |
250 ah == bh and chr(bl) or chr(0xdfff))) |
245 ah == bh and unichr(bl) or unichr(0xdfff))) |
251 if ah - bh > 1: |
246 if ah - bh > 1: |
252 buf.append('[%s-%s][%s-%s]' % |
247 buf.append(u'[%s-%s][%s-%s]' % |
253 chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff)) |
248 unichr(ah+1), unichr(bh-1), unichr(0xdc00), unichr(0xdfff)) |
254 if ah != bh: |
249 if ah != bh: |
255 buf.append('%s[%s-%s]' % |
250 buf.append(u'%s[%s-%s]' % |
256 (chr(bh), chr(0xdc00), chr(bl))) |
251 (unichr(bh), unichr(0xdc00), unichr(bl))) |
257 |
252 |
258 return '(?:' + '|'.join(buf) + ')' |
253 return u'(?:' + u'|'.join(buf) + u')' |
|
254 |
|
255 |
|
256 def format_lines(var_name, seq, raw=False, indent_level=0): |
|
257 """Formats a sequence of strings for output.""" |
|
258 lines = [] |
|
259 base_indent = ' ' * indent_level * 4 |
|
260 inner_indent = ' ' * (indent_level + 1) * 4 |
|
261 lines.append(base_indent + var_name + ' = (') |
|
262 if raw: |
|
263 # These should be preformatted reprs of, say, tuples. |
|
264 for i in seq: |
|
265 lines.append(inner_indent + i + ',') |
|
266 else: |
|
267 for i in seq: |
|
268 # Force use of single quotes |
|
269 r = repr(i + '"') |
|
270 lines.append(inner_indent + r[:-2] + r[-1] + ',') |
|
271 lines.append(base_indent + ')') |
|
272 return '\n'.join(lines) |
|
273 |
|
274 |
|
275 def duplicates_removed(it, already_seen=()): |
|
276 """ |
|
277 Returns a list with duplicates removed from the iterable `it`. |
|
278 |
|
279 Order is preserved. |
|
280 """ |
|
281 lst = [] |
|
282 seen = set() |
|
283 for i in it: |
|
284 if i in seen or i in already_seen: |
|
285 continue |
|
286 lst.append(i) |
|
287 seen.add(i) |
|
288 return lst |
|
289 |
|
290 |
|
291 class Future(object): |
|
292 """Generic class to defer some work. |
|
293 |
|
294 Handled specially in RegexLexerMeta, to support regex string construction at |
|
295 first use. |
|
296 """ |
|
297 def get(self): |
|
298 raise NotImplementedError |
|
299 |
|
300 |
|
301 def guess_decode(text): |
|
302 """Decode *text* with guessed encoding. |
|
303 |
|
304 First try UTF-8; this should fail for non-UTF-8 encodings. |
|
305 Then try the preferred locale encoding. |
|
306 Fall back to latin-1, which always works. |
|
307 """ |
|
308 try: |
|
309 text = text.decode('utf-8') |
|
310 return text, 'utf-8' |
|
311 except UnicodeDecodeError: |
|
312 try: |
|
313 import locale |
|
314 prefencoding = locale.getpreferredencoding() |
|
315 text = text.decode() |
|
316 return text, prefencoding |
|
317 except (UnicodeDecodeError, LookupError): |
|
318 text = text.decode('latin1') |
|
319 return text, 'latin1' |
|
320 |
|
321 |
|
322 def guess_decode_from_terminal(text, term): |
|
323 """Decode *text* coming from terminal *term*. |
|
324 |
|
325 First try the terminal encoding, if given. |
|
326 Then try UTF-8. Then try the preferred locale encoding. |
|
327 Fall back to latin-1, which always works. |
|
328 """ |
|
329 if getattr(term, 'encoding', None): |
|
330 try: |
|
331 text = text.decode(term.encoding) |
|
332 except UnicodeDecodeError: |
|
333 pass |
|
334 else: |
|
335 return text, term.encoding |
|
336 return guess_decode(text) |
|
337 |
|
338 |
|
339 def terminal_encoding(term): |
|
340 """Return our best guess of encoding for the given *term*.""" |
|
341 if getattr(term, 'encoding', None): |
|
342 return term.encoding |
|
343 import locale |
|
344 return locale.getpreferredencoding() |
|
345 |
259 |
346 |
260 # Python 2/3 compatibility |
347 # Python 2/3 compatibility |
261 |
348 |
262 if sys.version_info < (3,0): |
349 if sys.version_info < (3, 0): |
263 b = bytes = str |
350 unichr = unichr |
|
351 xrange = xrange |
|
352 string_types = (str, unicode) |
|
353 text_type = unicode |
264 u_prefix = 'u' |
354 u_prefix = 'u' |
265 import StringIO, cStringIO |
355 iteritems = dict.iteritems |
|
356 itervalues = dict.itervalues |
|
357 import StringIO |
|
358 import cStringIO |
|
359 # unfortunately, io.StringIO in Python 2 doesn't accept str at all |
|
360 StringIO = StringIO.StringIO |
266 BytesIO = cStringIO.StringIO |
361 BytesIO = cStringIO.StringIO |
267 StringIO = StringIO.StringIO |
|
268 uni_open = codecs.open |
|
269 else: |
362 else: |
270 import builtins |
363 unichr = chr |
271 bytes = builtins.bytes |
364 xrange = range |
|
365 string_types = (str,) |
|
366 text_type = str |
272 u_prefix = '' |
367 u_prefix = '' |
273 def b(s): |
368 iteritems = dict.items |
274 if isinstance(s, str): |
369 itervalues = dict.values |
275 return bytes(list(map(ord, s))) |
370 from io import StringIO, BytesIO, TextIOWrapper |
276 elif isinstance(s, bytes): |
371 |
277 return s |
372 class UnclosingTextIOWrapper(TextIOWrapper): |
278 else: |
373 # Don't close underlying buffer on destruction. |
279 raise TypeError("Invalid argument %r for b()" % (s,)) |
374 def close(self): |
280 import io |
375 pass |
281 BytesIO = io.BytesIO |
376 |
282 StringIO = io.StringIO |
377 |
283 uni_open = builtins.open |
378 def add_metaclass(metaclass): |
|
379 """Class decorator for creating a class with a metaclass.""" |
|
380 def wrapper(cls): |
|
381 orig_vars = cls.__dict__.copy() |
|
382 orig_vars.pop('__dict__', None) |
|
383 orig_vars.pop('__weakref__', None) |
|
384 for slots_var in orig_vars.get('__slots__', ()): |
|
385 orig_vars.pop(slots_var) |
|
386 return metaclass(cls.__name__, cls.__bases__, orig_vars) |
|
387 return wrapper |