|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.util |
|
4 ~~~~~~~~~~~~~ |
|
5 |
|
6 Utility functions. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 import re |
|
12 import sys |
|
13 |
|
14 |
|
15 split_path_re = re.compile(r'[/\\ ]') |
|
16 doctype_lookup_re = re.compile(r'''(?smx) |
|
17 (<\?.*?\?>)?\s* |
|
18 <!DOCTYPE\s+( |
|
19 [a-zA-Z_][a-zA-Z0-9]*\s+ |
|
20 [a-zA-Z_][a-zA-Z0-9]*\s+ |
|
21 "[^"]*") |
|
22 [^>]*> |
|
23 ''') |
|
24 tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>(?uism)') |
|
25 |
|
26 |
|
27 class ClassNotFound(ValueError): |
|
28 """ |
|
29 If one of the get_*_by_* functions didn't find a matching class. |
|
30 """ |
|
31 |
|
32 |
|
33 class OptionError(Exception): |
|
34 pass |
|
35 |
|
36 |
|
37 def get_choice_opt(options, optname, allowed, default=None, normcase=False): |
|
38 string = options.get(optname, default) |
|
39 if normcase: |
|
40 string = string.lower() |
|
41 if string not in allowed: |
|
42 raise OptionError('Value for option %s must be one of %s' % |
|
43 (optname, ', '.join(map(str, allowed)))) |
|
44 return string |
|
45 |
|
46 |
|
47 def get_bool_opt(options, optname, default=None): |
|
48 string = options.get(optname, default) |
|
49 if isinstance(string, bool): |
|
50 return string |
|
51 elif isinstance(string, int): |
|
52 return bool(string) |
|
53 elif not isinstance(string, basestring): |
|
54 raise OptionError('Invalid type %r for option %s; use ' |
|
55 '1/0, yes/no, true/false, on/off' % ( |
|
56 string, optname)) |
|
57 elif string.lower() in ('1', 'yes', 'true', 'on'): |
|
58 return True |
|
59 elif string.lower() in ('0', 'no', 'false', 'off'): |
|
60 return False |
|
61 else: |
|
62 raise OptionError('Invalid value %r for option %s; use ' |
|
63 '1/0, yes/no, true/false, on/off' % ( |
|
64 string, optname)) |
|
65 |
|
66 |
|
67 def get_int_opt(options, optname, default=None): |
|
68 string = options.get(optname, default) |
|
69 try: |
|
70 return int(string) |
|
71 except TypeError: |
|
72 raise OptionError('Invalid type %r for option %s; you ' |
|
73 'must give an integer value' % ( |
|
74 string, optname)) |
|
75 except ValueError: |
|
76 raise OptionError('Invalid value %r for option %s; you ' |
|
77 'must give an integer value' % ( |
|
78 string, optname)) |
|
79 |
|
80 |
|
81 def get_list_opt(options, optname, default=None): |
|
82 val = options.get(optname, default) |
|
83 if isinstance(val, basestring): |
|
84 return val.split() |
|
85 elif isinstance(val, (list, tuple)): |
|
86 return list(val) |
|
87 else: |
|
88 raise OptionError('Invalid type %r for option %s; you ' |
|
89 'must give a list value' % ( |
|
90 val, optname)) |
|
91 |
|
92 |
|
93 def docstring_headline(obj): |
|
94 if not obj.__doc__: |
|
95 return '' |
|
96 res = [] |
|
97 for line in obj.__doc__.strip().splitlines(): |
|
98 if line.strip(): |
|
99 res.append(" " + line.strip()) |
|
100 else: |
|
101 break |
|
102 return ''.join(res).lstrip() |
|
103 |
|
104 |
|
105 def make_analysator(f): |
|
106 """ |
|
107 Return a static text analysation function that |
|
108 returns float values. |
|
109 """ |
|
110 def text_analyse(text): |
|
111 rv = f(text) |
|
112 if not rv: |
|
113 return 0.0 |
|
114 return min(1.0, max(0.0, float(rv))) |
|
115 text_analyse.__doc__ = f.__doc__ |
|
116 return staticmethod(text_analyse) |
|
117 |
|
118 |
|
119 def shebang_matches(text, regex): |
|
120 """ |
|
121 Check if the given regular expression matches the last part of the |
|
122 shebang if one exists. |
|
123 |
|
124 >>> from pygments.util import shebang_matches |
|
125 >>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') |
|
126 True |
|
127 >>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') |
|
128 True |
|
129 >>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') |
|
130 False |
|
131 >>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') |
|
132 False |
|
133 >>> shebang_matches('#!/usr/bin/startsomethingwith python', |
|
134 ... r'python(2\.\d)?') |
|
135 True |
|
136 |
|
137 It also checks for common windows executable file extensions:: |
|
138 |
|
139 >>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') |
|
140 True |
|
141 |
|
142 Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does |
|
143 the same as ``'perl -e'``) |
|
144 |
|
145 Note that this method automatically searches the whole string (eg: |
|
146 the regular expression is wrapped in ``'^$'``) |
|
147 """ |
|
148 index = text.find('\n') |
|
149 if index >= 0: |
|
150 first_line = text[:index].lower() |
|
151 else: |
|
152 first_line = text.lower() |
|
153 if first_line.startswith('#!'): |
|
154 try: |
|
155 found = [x for x in split_path_re.split(first_line[2:].strip()) |
|
156 if x and not x.startswith('-')][-1] |
|
157 except IndexError: |
|
158 return False |
|
159 regex = re.compile('^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE) |
|
160 if regex.search(found) is not None: |
|
161 return True |
|
162 return False |
|
163 |
|
164 |
|
165 def doctype_matches(text, regex): |
|
166 """ |
|
167 Check if the doctype matches a regular expression (if present). |
|
168 Note that this method only checks the first part of a DOCTYPE. |
|
169 eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' |
|
170 """ |
|
171 m = doctype_lookup_re.match(text) |
|
172 if m is None: |
|
173 return False |
|
174 doctype = m.group(2) |
|
175 return re.compile(regex).match(doctype.strip()) is not None |
|
176 |
|
177 |
|
178 def html_doctype_matches(text): |
|
179 """ |
|
180 Check if the file looks like it has a html doctype. |
|
181 """ |
|
182 return doctype_matches(text, r'html\s+PUBLIC\s+"-//W3C//DTD X?HTML.*') |
|
183 |
|
184 |
|
185 _looks_like_xml_cache = {} |
|
186 def looks_like_xml(text): |
|
187 """ |
|
188 Check if a doctype exists or if we have some tags. |
|
189 """ |
|
190 key = hash(text) |
|
191 try: |
|
192 return _looks_like_xml_cache[key] |
|
193 except KeyError: |
|
194 m = doctype_lookup_re.match(text) |
|
195 if m is not None: |
|
196 return True |
|
197 rv = tag_re.search(text[:1000]) is not None |
|
198 _looks_like_xml_cache[key] = rv |
|
199 return rv |
|
200 |
|
201 # Python 2/3 compatibility |
|
202 |
|
203 if sys.version_info < (3,0): |
|
204 b = bytes = str |
|
205 u_prefix = 'u' |
|
206 import StringIO, cStringIO |
|
207 BytesIO = cStringIO.StringIO |
|
208 StringIO = StringIO.StringIO |
|
209 else: |
|
210 import builtins |
|
211 bytes = builtins.bytes |
|
212 u_prefix = '' |
|
213 def b(s): |
|
214 if isinstance(s, str): |
|
215 return bytes(map(ord, s)) |
|
216 elif isinstance(s, bytes): |
|
217 return s |
|
218 else: |
|
219 raise TypeError("Invalid argument %r for b()" % (s,)) |
|
220 import io |
|
221 BytesIO = io.BytesIO |
|
222 StringIO = io.StringIO |