eric6/ThirdParty/Pygments/pygments/util.py

changeset 6942
2602857055c5
parent 5713
6762afd9f963
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.util
4 ~~~~~~~~~~~~~
5
6 Utility functions.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 import sys
14
15
16 split_path_re = re.compile(r'[/\\ ]')
17 doctype_lookup_re = re.compile(r'''
18 (<\?.*?\?>)?\s*
19 <!DOCTYPE\s+(
20 [a-zA-Z_][a-zA-Z0-9]*
21 (?: \s+ # optional in HTML5
22 [a-zA-Z_][a-zA-Z0-9]*\s+
23 "[^"]*")?
24 )
25 [^>]*>
26 ''', re.DOTALL | re.MULTILINE | re.VERBOSE)
27 tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>',
28 re.UNICODE | re.IGNORECASE | re.DOTALL | re.MULTILINE)
29 xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I)
30
31
32 class ClassNotFound(ValueError):
33 """Raised if one of the lookup functions didn't find a matching class."""
34
35
36 class OptionError(Exception):
37 pass
38
39
40 def get_choice_opt(options, optname, allowed, default=None, normcase=False):
41 string = options.get(optname, default)
42 if normcase:
43 string = string.lower()
44 if string not in allowed:
45 raise OptionError('Value for option %s must be one of %s' %
46 (optname, ', '.join(map(str, allowed))))
47 return string
48
49
50 def get_bool_opt(options, optname, default=None):
51 string = options.get(optname, default)
52 if isinstance(string, bool):
53 return string
54 elif isinstance(string, int):
55 return bool(string)
56 elif not isinstance(string, string_types):
57 raise OptionError('Invalid type %r for option %s; use '
58 '1/0, yes/no, true/false, on/off' % (
59 string, optname))
60 elif string.lower() in ('1', 'yes', 'true', 'on'):
61 return True
62 elif string.lower() in ('0', 'no', 'false', 'off'):
63 return False
64 else:
65 raise OptionError('Invalid value %r for option %s; use '
66 '1/0, yes/no, true/false, on/off' % (
67 string, optname))
68
69
70 def get_int_opt(options, optname, default=None):
71 string = options.get(optname, default)
72 try:
73 return int(string)
74 except TypeError:
75 raise OptionError('Invalid type %r for option %s; you '
76 'must give an integer value' % (
77 string, optname))
78 except ValueError:
79 raise OptionError('Invalid value %r for option %s; you '
80 'must give an integer value' % (
81 string, optname))
82
83
84 def get_list_opt(options, optname, default=None):
85 val = options.get(optname, default)
86 if isinstance(val, string_types):
87 return val.split()
88 elif isinstance(val, (list, tuple)):
89 return list(val)
90 else:
91 raise OptionError('Invalid type %r for option %s; you '
92 'must give a list value' % (
93 val, optname))
94
95
96 def docstring_headline(obj):
97 if not obj.__doc__:
98 return ''
99 res = []
100 for line in obj.__doc__.strip().splitlines():
101 if line.strip():
102 res.append(" " + line.strip())
103 else:
104 break
105 return ''.join(res).lstrip()
106
107
108 def make_analysator(f):
109 """Return a static text analyser function that returns float values."""
110 def text_analyse(text):
111 try:
112 rv = f(text)
113 except Exception:
114 return 0.0
115 if not rv:
116 return 0.0
117 try:
118 return min(1.0, max(0.0, float(rv)))
119 except (ValueError, TypeError):
120 return 0.0
121 text_analyse.__doc__ = f.__doc__
122 return staticmethod(text_analyse)
123
124
125 def shebang_matches(text, regex):
126 r"""Check if the given regular expression matches the last part of the
127 shebang if one exists.
128
129 >>> from pygments.util import shebang_matches
130 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
131 True
132 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
133 True
134 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
135 False
136 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
137 False
138 >>> shebang_matches('#!/usr/bin/startsomethingwith python',
139 ... r'python(2\.\d)?')
140 True
141
142 It also checks for common windows executable file extensions::
143
144 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
145 True
146
147 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
148 the same as ``'perl -e'``)
149
150 Note that this method automatically searches the whole string (eg:
151 the regular expression is wrapped in ``'^$'``)
152 """
153 index = text.find('\n')
154 if index >= 0:
155 first_line = text[:index].lower()
156 else:
157 first_line = text.lower()
158 if first_line.startswith('#!'):
159 try:
160 found = [x for x in split_path_re.split(first_line[2:].strip())
161 if x and not x.startswith('-')][-1]
162 except IndexError:
163 return False
164 regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)
165 if regex.search(found) is not None:
166 return True
167 return False
168
169
170 def doctype_matches(text, regex):
171 """Check if the doctype matches a regular expression (if present).
172
173 Note that this method only checks the first part of a DOCTYPE.
174 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
175 """
176 m = doctype_lookup_re.match(text)
177 if m is None:
178 return False
179 doctype = m.group(2)
180 return re.compile(regex, re.I).match(doctype.strip()) is not None
181
182
183 def html_doctype_matches(text):
184 """Check if the file looks like it has a html doctype."""
185 return doctype_matches(text, r'html')
186
187
188 _looks_like_xml_cache = {}
189
190
191 def looks_like_xml(text):
192 """Check if a doctype exists or if we have some tags."""
193 if xml_decl_re.match(text):
194 return True
195 key = hash(text)
196 try:
197 return _looks_like_xml_cache[key]
198 except KeyError:
199 m = doctype_lookup_re.match(text)
200 if m is not None:
201 return True
202 rv = tag_re.search(text[:1000]) is not None
203 _looks_like_xml_cache[key] = rv
204 return rv
205
206
207 # Python narrow build compatibility
208
209 def _surrogatepair(c):
210 # Given a unicode character code
211 # with length greater than 16 bits,
212 # return the two 16 bit surrogate pair.
213 # From example D28 of:
214 # http://www.unicode.org/book/ch03.pdf
215 return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))
216
217
218 def unirange(a, b):
219 """Returns a regular expression string to match the given non-BMP range."""
220 if b < a:
221 raise ValueError("Bad character range")
222 if a < 0x10000 or b < 0x10000:
223 raise ValueError("unirange is only defined for non-BMP ranges")
224
225 if sys.maxunicode > 0xffff:
226 # wide build
227 return u'[%s-%s]' % (unichr(a), unichr(b))
228 else:
229 # narrow build stores surrogates, and the 're' module handles them
230 # (incorrectly) as characters. Since there is still ordering among
231 # these characters, expand the range to one that it understands. Some
232 # background in http://bugs.python.org/issue3665 and
233 # http://bugs.python.org/issue12749
234 #
235 # Additionally, the lower constants are using unichr rather than
236 # literals because jython [which uses the wide path] can't load this
237 # file if they are literals.
238 ah, al = _surrogatepair(a)
239 bh, bl = _surrogatepair(b)
240 if ah == bh:
241 return u'(?:%s[%s-%s])' % (unichr(ah), unichr(al), unichr(bl))
242 else:
243 buf = []
244 buf.append(u'%s[%s-%s]' %
245 (unichr(ah), unichr(al),
246 ah == bh and unichr(bl) or unichr(0xdfff)))
247 if ah - bh > 1:
248 buf.append(u'[%s-%s][%s-%s]' %
249 unichr(ah+1), unichr(bh-1), unichr(0xdc00), unichr(0xdfff))
250 if ah != bh:
251 buf.append(u'%s[%s-%s]' %
252 (unichr(bh), unichr(0xdc00), unichr(bl)))
253
254 return u'(?:' + u'|'.join(buf) + u')'
255
256
257 def format_lines(var_name, seq, raw=False, indent_level=0):
258 """Formats a sequence of strings for output."""
259 lines = []
260 base_indent = ' ' * indent_level * 4
261 inner_indent = ' ' * (indent_level + 1) * 4
262 lines.append(base_indent + var_name + ' = (')
263 if raw:
264 # These should be preformatted reprs of, say, tuples.
265 for i in seq:
266 lines.append(inner_indent + i + ',')
267 else:
268 for i in seq:
269 # Force use of single quotes
270 r = repr(i + '"')
271 lines.append(inner_indent + r[:-2] + r[-1] + ',')
272 lines.append(base_indent + ')')
273 return '\n'.join(lines)
274
275
276 def duplicates_removed(it, already_seen=()):
277 """
278 Returns a list with duplicates removed from the iterable `it`.
279
280 Order is preserved.
281 """
282 lst = []
283 seen = set()
284 for i in it:
285 if i in seen or i in already_seen:
286 continue
287 lst.append(i)
288 seen.add(i)
289 return lst
290
291
292 class Future(object):
293 """Generic class to defer some work.
294
295 Handled specially in RegexLexerMeta, to support regex string construction at
296 first use.
297 """
298 def get(self):
299 raise NotImplementedError
300
301
302 def guess_decode(text):
303 """Decode *text* with guessed encoding.
304
305 First try UTF-8; this should fail for non-UTF-8 encodings.
306 Then try the preferred locale encoding.
307 Fall back to latin-1, which always works.
308 """
309 try:
310 text = text.decode('utf-8')
311 return text, 'utf-8'
312 except UnicodeDecodeError:
313 try:
314 import locale
315 prefencoding = locale.getpreferredencoding()
316 text = text.decode()
317 return text, prefencoding
318 except (UnicodeDecodeError, LookupError):
319 text = text.decode('latin1')
320 return text, 'latin1'
321
322
323 def guess_decode_from_terminal(text, term):
324 """Decode *text* coming from terminal *term*.
325
326 First try the terminal encoding, if given.
327 Then try UTF-8. Then try the preferred locale encoding.
328 Fall back to latin-1, which always works.
329 """
330 if getattr(term, 'encoding', None):
331 try:
332 text = text.decode(term.encoding)
333 except UnicodeDecodeError:
334 pass
335 else:
336 return text, term.encoding
337 return guess_decode(text)
338
339
340 def terminal_encoding(term):
341 """Return our best guess of encoding for the given *term*."""
342 if getattr(term, 'encoding', None):
343 return term.encoding
344 import locale
345 return locale.getpreferredencoding()
346
347
348 # Python 2/3 compatibility
349
350 if sys.version_info < (3, 0):
351 unichr = unichr
352 xrange = xrange
353 string_types = (str, unicode)
354 text_type = unicode
355 u_prefix = 'u'
356 iteritems = dict.iteritems
357 itervalues = dict.itervalues
358 import StringIO
359 import cStringIO
360 # unfortunately, io.StringIO in Python 2 doesn't accept str at all
361 StringIO = StringIO.StringIO
362 BytesIO = cStringIO.StringIO
363 else:
364 unichr = chr
365 xrange = range
366 string_types = (str,)
367 text_type = str
368 u_prefix = ''
369 iteritems = dict.items
370 itervalues = dict.values
371 from io import StringIO, BytesIO, TextIOWrapper
372
373 class UnclosingTextIOWrapper(TextIOWrapper):
374 # Don't close underlying buffer on destruction.
375 def close(self):
376 self.flush()
377
378
379 def add_metaclass(metaclass):
380 """Class decorator for creating a class with a metaclass."""
381 def wrapper(cls):
382 orig_vars = cls.__dict__.copy()
383 orig_vars.pop('__dict__', None)
384 orig_vars.pop('__weakref__', None)
385 for slots_var in orig_vars.get('__slots__', ()):
386 orig_vars.pop(slots_var)
387 return metaclass(cls.__name__, cls.__bases__, orig_vars)
388 return wrapper

eric ide

mercurial