|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers |
|
4 ~~~~~~~~~~~~~~~ |
|
5 |
|
6 Pygments lexers. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 import sys |
|
14 import types |
|
15 import fnmatch |
|
16 from os.path import basename |
|
17 |
|
18 from pygments.lexers._mapping import LEXERS |
|
19 from pygments.modeline import get_filetype_from_buffer |
|
20 from pygments.plugin import find_plugin_lexers |
|
21 from pygments.util import ClassNotFound, itervalues, guess_decode |
|
22 |
|
23 |
|
24 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', |
|
25 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) |
|
26 |
|
27 _lexer_cache = {} |
|
28 _pattern_cache = {} |
|
29 |
|
30 |
|
31 def _fn_matches(fn, glob): |
|
32 """Return whether the supplied file name fn matches pattern filename.""" |
|
33 if glob not in _pattern_cache: |
|
34 pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) |
|
35 return pattern.match(fn) |
|
36 return _pattern_cache[glob].match(fn) |
|
37 |
|
38 |
|
39 def _load_lexers(module_name): |
|
40 """Load a lexer (and all others in the module too).""" |
|
41 mod = __import__(module_name, None, None, ['__all__']) |
|
42 for lexer_name in mod.__all__: |
|
43 cls = getattr(mod, lexer_name) |
|
44 _lexer_cache[cls.name] = cls |
|
45 |
|
46 |
|
47 def get_all_lexers(): |
|
48 """Return a generator of tuples in the form ``(name, aliases, |
|
49 filenames, mimetypes)`` of all know lexers. |
|
50 """ |
|
51 for item in itervalues(LEXERS): |
|
52 yield item[1:] |
|
53 for lexer in find_plugin_lexers(): |
|
54 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes |
|
55 |
|
56 |
|
57 def find_lexer_class(name): |
|
58 """Lookup a lexer class by name. |
|
59 |
|
60 Return None if not found. |
|
61 """ |
|
62 if name in _lexer_cache: |
|
63 return _lexer_cache[name] |
|
64 # lookup builtin lexers |
|
65 for module_name, lname, aliases, _, _ in itervalues(LEXERS): |
|
66 if name == lname: |
|
67 _load_lexers(module_name) |
|
68 return _lexer_cache[name] |
|
69 # continue with lexers from setuptools entrypoints |
|
70 for cls in find_plugin_lexers(): |
|
71 if cls.name == name: |
|
72 return cls |
|
73 |
|
74 |
|
75 def find_lexer_class_by_name(_alias): |
|
76 """Lookup a lexer class by alias. |
|
77 |
|
78 Like `get_lexer_by_name`, but does not instantiate the class. |
|
79 |
|
80 .. versionadded:: 2.2 |
|
81 """ |
|
82 if not _alias: |
|
83 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
84 # lookup builtin lexers |
|
85 for module_name, name, aliases, _, _ in itervalues(LEXERS): |
|
86 if _alias.lower() in aliases: |
|
87 if name not in _lexer_cache: |
|
88 _load_lexers(module_name) |
|
89 return _lexer_cache[name] |
|
90 # continue with lexers from setuptools entrypoints |
|
91 for cls in find_plugin_lexers(): |
|
92 if _alias.lower() in cls.aliases: |
|
93 return cls |
|
94 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
95 |
|
96 |
|
97 def get_lexer_by_name(_alias, **options): |
|
98 """Get a lexer by an alias. |
|
99 |
|
100 Raises ClassNotFound if not found. |
|
101 """ |
|
102 if not _alias: |
|
103 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
104 |
|
105 # lookup builtin lexers |
|
106 for module_name, name, aliases, _, _ in itervalues(LEXERS): |
|
107 if _alias.lower() in aliases: |
|
108 if name not in _lexer_cache: |
|
109 _load_lexers(module_name) |
|
110 return _lexer_cache[name](**options) |
|
111 # continue with lexers from setuptools entrypoints |
|
112 for cls in find_plugin_lexers(): |
|
113 if _alias.lower() in cls.aliases: |
|
114 return cls(**options) |
|
115 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
116 |
|
117 |
|
118 def load_lexer_from_file(filename, lexername="CustomLexer", **options): |
|
119 """Load a lexer from a file. |
|
120 |
|
121 This method expects a file located relative to the current working |
|
122 directory, which contains a Lexer class. By default, it expects the |
|
123 Lexer to be name CustomLexer; you can specify your own class name |
|
124 as the second argument to this function. |
|
125 |
|
126 Users should be very careful with the input, because this method |
|
127 is equivalent to running eval on the input file. |
|
128 |
|
129 Raises ClassNotFound if there are any problems importing the Lexer. |
|
130 |
|
131 .. versionadded:: 2.2 |
|
132 """ |
|
133 try: |
|
134 # This empty dict will contain the namespace for the exec'd file |
|
135 custom_namespace = {} |
|
136 exec(open(filename, 'rb').read(), custom_namespace) |
|
137 # Retrieve the class `lexername` from that namespace |
|
138 if lexername not in custom_namespace: |
|
139 raise ClassNotFound('no valid %s class found in %s' % |
|
140 (lexername, filename)) |
|
141 lexer_class = custom_namespace[lexername] |
|
142 # And finally instantiate it with the options |
|
143 return lexer_class(**options) |
|
144 except IOError as err: |
|
145 raise ClassNotFound('cannot read %s' % filename) |
|
146 except ClassNotFound as err: |
|
147 raise |
|
148 except Exception as err: |
|
149 raise ClassNotFound('error when loading custom lexer: %s' % err) |
|
150 |
|
151 |
|
152 def find_lexer_class_for_filename(_fn, code=None): |
|
153 """Get a lexer for a filename. |
|
154 |
|
155 If multiple lexers match the filename pattern, use ``analyse_text()`` to |
|
156 figure out which one is more appropriate. |
|
157 |
|
158 Returns None if not found. |
|
159 """ |
|
160 matches = [] |
|
161 fn = basename(_fn) |
|
162 for modname, name, _, filenames, _ in itervalues(LEXERS): |
|
163 for filename in filenames: |
|
164 if _fn_matches(fn, filename): |
|
165 if name not in _lexer_cache: |
|
166 _load_lexers(modname) |
|
167 matches.append((_lexer_cache[name], filename)) |
|
168 for cls in find_plugin_lexers(): |
|
169 for filename in cls.filenames: |
|
170 if _fn_matches(fn, filename): |
|
171 matches.append((cls, filename)) |
|
172 |
|
173 if sys.version_info > (3,) and isinstance(code, bytes): |
|
174 # decode it, since all analyse_text functions expect unicode |
|
175 code = guess_decode(code) |
|
176 |
|
177 def get_rating(info): |
|
178 cls, filename = info |
|
179 # explicit patterns get a bonus |
|
180 bonus = '*' not in filename and 0.5 or 0 |
|
181 # The class _always_ defines analyse_text because it's included in |
|
182 # the Lexer class. The default implementation returns None which |
|
183 # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py |
|
184 # to find lexers which need it overridden. |
|
185 if code: |
|
186 return cls.analyse_text(code) + bonus, cls.__name__ |
|
187 return cls.priority + bonus, cls.__name__ |
|
188 |
|
189 if matches: |
|
190 matches.sort(key=get_rating) |
|
191 # print "Possible lexers, after sort:", matches |
|
192 return matches[-1][0] |
|
193 |
|
194 |
|
195 def get_lexer_for_filename(_fn, code=None, **options): |
|
196 """Get a lexer for a filename. |
|
197 |
|
198 If multiple lexers match the filename pattern, use ``analyse_text()`` to |
|
199 figure out which one is more appropriate. |
|
200 |
|
201 Raises ClassNotFound if not found. |
|
202 """ |
|
203 res = find_lexer_class_for_filename(_fn, code) |
|
204 if not res: |
|
205 raise ClassNotFound('no lexer for filename %r found' % _fn) |
|
206 return res(**options) |
|
207 |
|
208 |
|
209 def get_lexer_for_mimetype(_mime, **options): |
|
210 """Get a lexer for a mimetype. |
|
211 |
|
212 Raises ClassNotFound if not found. |
|
213 """ |
|
214 for modname, name, _, _, mimetypes in itervalues(LEXERS): |
|
215 if _mime in mimetypes: |
|
216 if name not in _lexer_cache: |
|
217 _load_lexers(modname) |
|
218 return _lexer_cache[name](**options) |
|
219 for cls in find_plugin_lexers(): |
|
220 if _mime in cls.mimetypes: |
|
221 return cls(**options) |
|
222 raise ClassNotFound('no lexer for mimetype %r found' % _mime) |
|
223 |
|
224 |
|
225 def _iter_lexerclasses(plugins=True): |
|
226 """Return an iterator over all lexer classes.""" |
|
227 for key in sorted(LEXERS): |
|
228 module_name, name = LEXERS[key][:2] |
|
229 if name not in _lexer_cache: |
|
230 _load_lexers(module_name) |
|
231 yield _lexer_cache[name] |
|
232 if plugins: |
|
233 for lexer in find_plugin_lexers(): |
|
234 yield lexer |
|
235 |
|
236 |
|
237 def guess_lexer_for_filename(_fn, _text, **options): |
|
238 """ |
|
239 Lookup all lexers that handle those filenames primary (``filenames``) |
|
240 or secondary (``alias_filenames``). Then run a text analysis for those |
|
241 lexers and choose the best result. |
|
242 |
|
243 usage:: |
|
244 |
|
245 >>> from pygments.lexers import guess_lexer_for_filename |
|
246 >>> guess_lexer_for_filename('hello.html', '<%= @foo %>') |
|
247 <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c> |
|
248 >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>') |
|
249 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> |
|
250 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') |
|
251 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> |
|
252 """ |
|
253 fn = basename(_fn) |
|
254 primary = {} |
|
255 matching_lexers = set() |
|
256 for lexer in _iter_lexerclasses(): |
|
257 for filename in lexer.filenames: |
|
258 if _fn_matches(fn, filename): |
|
259 matching_lexers.add(lexer) |
|
260 primary[lexer] = True |
|
261 for filename in lexer.alias_filenames: |
|
262 if _fn_matches(fn, filename): |
|
263 matching_lexers.add(lexer) |
|
264 primary[lexer] = False |
|
265 if not matching_lexers: |
|
266 raise ClassNotFound('no lexer for filename %r found' % fn) |
|
267 if len(matching_lexers) == 1: |
|
268 return matching_lexers.pop()(**options) |
|
269 result = [] |
|
270 for lexer in matching_lexers: |
|
271 rv = lexer.analyse_text(_text) |
|
272 if rv == 1.0: |
|
273 return lexer(**options) |
|
274 result.append((rv, lexer)) |
|
275 |
|
276 def type_sort(t): |
|
277 # sort by: |
|
278 # - analyse score |
|
279 # - is primary filename pattern? |
|
280 # - priority |
|
281 # - last resort: class name |
|
282 return (t[0], primary[t[1]], t[1].priority, t[1].__name__) |
|
283 result.sort(key=type_sort) |
|
284 |
|
285 return result[-1][1](**options) |
|
286 |
|
287 |
|
288 def guess_lexer(_text, **options): |
|
289 """Guess a lexer by strong distinctions in the text (eg, shebang).""" |
|
290 |
|
291 # try to get a vim modeline first |
|
292 ft = get_filetype_from_buffer(_text) |
|
293 |
|
294 if ft is not None: |
|
295 try: |
|
296 return get_lexer_by_name(ft, **options) |
|
297 except ClassNotFound: |
|
298 pass |
|
299 |
|
300 best_lexer = [0.0, None] |
|
301 for lexer in _iter_lexerclasses(): |
|
302 rv = lexer.analyse_text(_text) |
|
303 if rv == 1.0: |
|
304 return lexer(**options) |
|
305 if rv > best_lexer[0]: |
|
306 best_lexer[:] = (rv, lexer) |
|
307 if not best_lexer[0] or best_lexer[1] is None: |
|
308 raise ClassNotFound('no lexer matching the text found') |
|
309 return best_lexer[1](**options) |
|
310 |
|
311 |
|
312 class _automodule(types.ModuleType): |
|
313 """Automatically import lexers.""" |
|
314 |
|
315 def __getattr__(self, name): |
|
316 info = LEXERS.get(name) |
|
317 if info: |
|
318 _load_lexers(info[0]) |
|
319 cls = _lexer_cache[info[1]] |
|
320 setattr(self, name, cls) |
|
321 return cls |
|
322 raise AttributeError(name) |
|
323 |
|
324 |
|
325 oldmod = sys.modules[__name__] |
|
326 newmod = _automodule(__name__) |
|
327 newmod.__dict__.update(oldmod.__dict__) |
|
328 sys.modules[__name__] = newmod |
|
329 del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types |