ThirdParty/Pygments/pygments/util.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.util
4 ~~~~~~~~~~~~~
5
6 Utility functions.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11 import re
12 import sys
13
14
15 split_path_re = re.compile(r'[/\\ ]')
16 doctype_lookup_re = re.compile(r'''(?smx)
17 (<\?.*?\?>)?\s*
18 <!DOCTYPE\s+(
19 [a-zA-Z_][a-zA-Z0-9]*\s+
20 [a-zA-Z_][a-zA-Z0-9]*\s+
21 "[^"]*")
22 [^>]*>
23 ''')
24 tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>(?uism)')
25
26
27 class ClassNotFound(ValueError):
28 """
29 If one of the get_*_by_* functions didn't find a matching class.
30 """
31
32
33 class OptionError(Exception):
34 pass
35
36
37 def get_choice_opt(options, optname, allowed, default=None, normcase=False):
38 string = options.get(optname, default)
39 if normcase:
40 string = string.lower()
41 if string not in allowed:
42 raise OptionError('Value for option %s must be one of %s' %
43 (optname, ', '.join(map(str, allowed))))
44 return string
45
46
47 def get_bool_opt(options, optname, default=None):
48 string = options.get(optname, default)
49 if isinstance(string, bool):
50 return string
51 elif isinstance(string, int):
52 return bool(string)
53 elif not isinstance(string, basestring):
54 raise OptionError('Invalid type %r for option %s; use '
55 '1/0, yes/no, true/false, on/off' % (
56 string, optname))
57 elif string.lower() in ('1', 'yes', 'true', 'on'):
58 return True
59 elif string.lower() in ('0', 'no', 'false', 'off'):
60 return False
61 else:
62 raise OptionError('Invalid value %r for option %s; use '
63 '1/0, yes/no, true/false, on/off' % (
64 string, optname))
65
66
67 def get_int_opt(options, optname, default=None):
68 string = options.get(optname, default)
69 try:
70 return int(string)
71 except TypeError:
72 raise OptionError('Invalid type %r for option %s; you '
73 'must give an integer value' % (
74 string, optname))
75 except ValueError:
76 raise OptionError('Invalid value %r for option %s; you '
77 'must give an integer value' % (
78 string, optname))
79
80
81 def get_list_opt(options, optname, default=None):
82 val = options.get(optname, default)
83 if isinstance(val, basestring):
84 return val.split()
85 elif isinstance(val, (list, tuple)):
86 return list(val)
87 else:
88 raise OptionError('Invalid type %r for option %s; you '
89 'must give a list value' % (
90 val, optname))
91
92
93 def docstring_headline(obj):
94 if not obj.__doc__:
95 return ''
96 res = []
97 for line in obj.__doc__.strip().splitlines():
98 if line.strip():
99 res.append(" " + line.strip())
100 else:
101 break
102 return ''.join(res).lstrip()
103
104
105 def make_analysator(f):
106 """
107 Return a static text analysation function that
108 returns float values.
109 """
110 def text_analyse(text):
111 rv = f(text)
112 if not rv:
113 return 0.0
114 return min(1.0, max(0.0, float(rv)))
115 text_analyse.__doc__ = f.__doc__
116 return staticmethod(text_analyse)
117
118
119 def shebang_matches(text, regex):
120 """
121 Check if the given regular expression matches the last part of the
122 shebang if one exists.
123
124 >>> from pygments.util import shebang_matches
125 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
126 True
127 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
128 True
129 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
130 False
131 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
132 False
133 >>> shebang_matches('#!/usr/bin/startsomethingwith python',
134 ... r'python(2\.\d)?')
135 True
136
137 It also checks for common windows executable file extensions::
138
139 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
140 True
141
142 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
143 the same as ``'perl -e'``)
144
145 Note that this method automatically searches the whole string (eg:
146 the regular expression is wrapped in ``'^$'``)
147 """
148 index = text.find('\n')
149 if index >= 0:
150 first_line = text[:index].lower()
151 else:
152 first_line = text.lower()
153 if first_line.startswith('#!'):
154 try:
155 found = [x for x in split_path_re.split(first_line[2:].strip())
156 if x and not x.startswith('-')][-1]
157 except IndexError:
158 return False
159 regex = re.compile('^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE)
160 if regex.search(found) is not None:
161 return True
162 return False
163
164
165 def doctype_matches(text, regex):
166 """
167 Check if the doctype matches a regular expression (if present).
168 Note that this method only checks the first part of a DOCTYPE.
169 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
170 """
171 m = doctype_lookup_re.match(text)
172 if m is None:
173 return False
174 doctype = m.group(2)
175 return re.compile(regex).match(doctype.strip()) is not None
176
177
178 def html_doctype_matches(text):
179 """
180 Check if the file looks like it has a html doctype.
181 """
182 return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*')
183
184
185 _looks_like_xml_cache = {}
186 def looks_like_xml(text):
187 """
188 Check if a doctype exists or if we have some tags.
189 """
190 key = hash(text)
191 try:
192 return _looks_like_xml_cache[key]
193 except KeyError:
194 m = doctype_lookup_re.match(text)
195 if m is not None:
196 return True
197 rv = tag_re.search(text[:1000]) is not None
198 _looks_like_xml_cache[key] = rv
199 return rv
200
201 # Python 2/3 compatibility
202
203 if sys.version_info < (3,0):
204 b = bytes = str
205 u_prefix = 'u'
206 import StringIO, cStringIO
207 BytesIO = cStringIO.StringIO
208 StringIO = StringIO.StringIO
209 else:
210 import builtins
211 bytes = builtins.bytes
212 u_prefix = ''
213 def b(s):
214 if isinstance(s, str):
215 return bytes(map(ord, s))
216 elif isinstance(s, bytes):
217 return s
218 else:
219 raise TypeError("Invalid argument %r for b()" % (s,))
220 import io
221 BytesIO = io.BytesIO
222 StringIO = io.StringIO

eric ide

mercurial