1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers |
|
4 ~~~~~~~~~~~~~~~ |
|
5 |
|
6 Pygments lexers. |
|
7 |
|
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 import sys |
|
14 import types |
|
15 import fnmatch |
|
16 from os.path import basename |
|
17 |
|
18 from pygments.lexers._mapping import LEXERS |
|
19 from pygments.modeline import get_filetype_from_buffer |
|
20 from pygments.plugin import find_plugin_lexers |
|
21 from pygments.util import ClassNotFound, guess_decode |
|
22 |
|
23 COMPAT = { |
|
24 'Python3Lexer': 'PythonLexer', |
|
25 'Python3TracebackLexer': 'PythonTracebackLexer', |
|
26 } |
|
27 |
|
28 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', |
|
29 'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT) |
|
30 |
|
31 _lexer_cache = {} |
|
32 _pattern_cache = {} |
|
33 |
|
34 |
|
35 def _fn_matches(fn, glob): |
|
36 """Return whether the supplied file name fn matches pattern filename.""" |
|
37 if glob not in _pattern_cache: |
|
38 pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) |
|
39 return pattern.match(fn) |
|
40 return _pattern_cache[glob].match(fn) |
|
41 |
|
42 |
|
43 def _load_lexers(module_name): |
|
44 """Load a lexer (and all others in the module too).""" |
|
45 mod = __import__(module_name, None, None, ['__all__']) |
|
46 for lexer_name in mod.__all__: |
|
47 cls = getattr(mod, lexer_name) |
|
48 _lexer_cache[cls.name] = cls |
|
49 |
|
50 |
|
51 def get_all_lexers(): |
|
52 """Return a generator of tuples in the form ``(name, aliases, |
|
53 filenames, mimetypes)`` of all know lexers. |
|
54 """ |
|
55 for item in LEXERS.values(): |
|
56 yield item[1:] |
|
57 for lexer in find_plugin_lexers(): |
|
58 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes |
|
59 |
|
60 |
|
61 def find_lexer_class(name): |
|
62 """Lookup a lexer class by name. |
|
63 |
|
64 Return None if not found. |
|
65 """ |
|
66 if name in _lexer_cache: |
|
67 return _lexer_cache[name] |
|
68 # lookup builtin lexers |
|
69 for module_name, lname, aliases, _, _ in LEXERS.values(): |
|
70 if name == lname: |
|
71 _load_lexers(module_name) |
|
72 return _lexer_cache[name] |
|
73 # continue with lexers from setuptools entrypoints |
|
74 for cls in find_plugin_lexers(): |
|
75 if cls.name == name: |
|
76 return cls |
|
77 |
|
78 |
|
79 def find_lexer_class_by_name(_alias): |
|
80 """Lookup a lexer class by alias. |
|
81 |
|
82 Like `get_lexer_by_name`, but does not instantiate the class. |
|
83 |
|
84 .. versionadded:: 2.2 |
|
85 """ |
|
86 if not _alias: |
|
87 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
88 # lookup builtin lexers |
|
89 for module_name, name, aliases, _, _ in LEXERS.values(): |
|
90 if _alias.lower() in aliases: |
|
91 if name not in _lexer_cache: |
|
92 _load_lexers(module_name) |
|
93 return _lexer_cache[name] |
|
94 # continue with lexers from setuptools entrypoints |
|
95 for cls in find_plugin_lexers(): |
|
96 if _alias.lower() in cls.aliases: |
|
97 return cls |
|
98 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
99 |
|
100 |
|
101 def get_lexer_by_name(_alias, **options): |
|
102 """Get a lexer by an alias. |
|
103 |
|
104 Raises ClassNotFound if not found. |
|
105 """ |
|
106 if not _alias: |
|
107 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
108 |
|
109 # lookup builtin lexers |
|
110 for module_name, name, aliases, _, _ in LEXERS.values(): |
|
111 if _alias.lower() in aliases: |
|
112 if name not in _lexer_cache: |
|
113 _load_lexers(module_name) |
|
114 return _lexer_cache[name](**options) |
|
115 # continue with lexers from setuptools entrypoints |
|
116 for cls in find_plugin_lexers(): |
|
117 if _alias.lower() in cls.aliases: |
|
118 return cls(**options) |
|
119 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
120 |
|
121 |
|
122 def load_lexer_from_file(filename, lexername="CustomLexer", **options): |
|
123 """Load a lexer from a file. |
|
124 |
|
125 This method expects a file located relative to the current working |
|
126 directory, which contains a Lexer class. By default, it expects the |
|
127 Lexer to be name CustomLexer; you can specify your own class name |
|
128 as the second argument to this function. |
|
129 |
|
130 Users should be very careful with the input, because this method |
|
131 is equivalent to running eval on the input file. |
|
132 |
|
133 Raises ClassNotFound if there are any problems importing the Lexer. |
|
134 |
|
135 .. versionadded:: 2.2 |
|
136 """ |
|
137 try: |
|
138 # This empty dict will contain the namespace for the exec'd file |
|
139 custom_namespace = {} |
|
140 with open(filename, 'rb') as f: |
|
141 exec(f.read(), custom_namespace) |
|
142 # Retrieve the class `lexername` from that namespace |
|
143 if lexername not in custom_namespace: |
|
144 raise ClassNotFound('no valid %s class found in %s' % |
|
145 (lexername, filename)) |
|
146 lexer_class = custom_namespace[lexername] |
|
147 # And finally instantiate it with the options |
|
148 return lexer_class(**options) |
|
149 except IOError as err: |
|
150 raise ClassNotFound('cannot read %s: %s' % (filename, err)) |
|
151 except ClassNotFound: |
|
152 raise |
|
153 except Exception as err: |
|
154 raise ClassNotFound('error when loading custom lexer: %s' % err) |
|
155 |
|
156 |
|
157 def find_lexer_class_for_filename(_fn, code=None): |
|
158 """Get a lexer for a filename. |
|
159 |
|
160 If multiple lexers match the filename pattern, use ``analyse_text()`` to |
|
161 figure out which one is more appropriate. |
|
162 |
|
163 Returns None if not found. |
|
164 """ |
|
165 matches = [] |
|
166 fn = basename(_fn) |
|
167 for modname, name, _, filenames, _ in LEXERS.values(): |
|
168 for filename in filenames: |
|
169 if _fn_matches(fn, filename): |
|
170 if name not in _lexer_cache: |
|
171 _load_lexers(modname) |
|
172 matches.append((_lexer_cache[name], filename)) |
|
173 for cls in find_plugin_lexers(): |
|
174 for filename in cls.filenames: |
|
175 if _fn_matches(fn, filename): |
|
176 matches.append((cls, filename)) |
|
177 |
|
178 if isinstance(code, bytes): |
|
179 # decode it, since all analyse_text functions expect unicode |
|
180 code = guess_decode(code) |
|
181 |
|
182 def get_rating(info): |
|
183 cls, filename = info |
|
184 # explicit patterns get a bonus |
|
185 bonus = '*' not in filename and 0.5 or 0 |
|
186 # The class _always_ defines analyse_text because it's included in |
|
187 # the Lexer class. The default implementation returns None which |
|
188 # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py |
|
189 # to find lexers which need it overridden. |
|
190 if code: |
|
191 return cls.analyse_text(code) + bonus, cls.__name__ |
|
192 return cls.priority + bonus, cls.__name__ |
|
193 |
|
194 if matches: |
|
195 matches.sort(key=get_rating) |
|
196 # print "Possible lexers, after sort:", matches |
|
197 return matches[-1][0] |
|
198 |
|
199 |
|
200 def get_lexer_for_filename(_fn, code=None, **options): |
|
201 """Get a lexer for a filename. |
|
202 |
|
203 If multiple lexers match the filename pattern, use ``analyse_text()`` to |
|
204 figure out which one is more appropriate. |
|
205 |
|
206 Raises ClassNotFound if not found. |
|
207 """ |
|
208 res = find_lexer_class_for_filename(_fn, code) |
|
209 if not res: |
|
210 raise ClassNotFound('no lexer for filename %r found' % _fn) |
|
211 return res(**options) |
|
212 |
|
213 |
|
214 def get_lexer_for_mimetype(_mime, **options): |
|
215 """Get a lexer for a mimetype. |
|
216 |
|
217 Raises ClassNotFound if not found. |
|
218 """ |
|
219 for modname, name, _, _, mimetypes in LEXERS.values(): |
|
220 if _mime in mimetypes: |
|
221 if name not in _lexer_cache: |
|
222 _load_lexers(modname) |
|
223 return _lexer_cache[name](**options) |
|
224 for cls in find_plugin_lexers(): |
|
225 if _mime in cls.mimetypes: |
|
226 return cls(**options) |
|
227 raise ClassNotFound('no lexer for mimetype %r found' % _mime) |
|
228 |
|
229 |
|
230 def _iter_lexerclasses(plugins=True): |
|
231 """Return an iterator over all lexer classes.""" |
|
232 for key in sorted(LEXERS): |
|
233 module_name, name = LEXERS[key][:2] |
|
234 if name not in _lexer_cache: |
|
235 _load_lexers(module_name) |
|
236 yield _lexer_cache[name] |
|
237 if plugins: |
|
238 yield from find_plugin_lexers() |
|
239 |
|
240 |
|
241 def guess_lexer_for_filename(_fn, _text, **options): |
|
242 """ |
|
243 Lookup all lexers that handle those filenames primary (``filenames``) |
|
244 or secondary (``alias_filenames``). Then run a text analysis for those |
|
245 lexers and choose the best result. |
|
246 |
|
247 usage:: |
|
248 |
|
249 >>> from pygments.lexers import guess_lexer_for_filename |
|
250 >>> guess_lexer_for_filename('hello.html', '<%= @foo %>') |
|
251 <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c> |
|
252 >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>') |
|
253 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> |
|
254 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') |
|
255 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> |
|
256 """ |
|
257 fn = basename(_fn) |
|
258 primary = {} |
|
259 matching_lexers = set() |
|
260 for lexer in _iter_lexerclasses(): |
|
261 for filename in lexer.filenames: |
|
262 if _fn_matches(fn, filename): |
|
263 matching_lexers.add(lexer) |
|
264 primary[lexer] = True |
|
265 for filename in lexer.alias_filenames: |
|
266 if _fn_matches(fn, filename): |
|
267 matching_lexers.add(lexer) |
|
268 primary[lexer] = False |
|
269 if not matching_lexers: |
|
270 raise ClassNotFound('no lexer for filename %r found' % fn) |
|
271 if len(matching_lexers) == 1: |
|
272 return matching_lexers.pop()(**options) |
|
273 result = [] |
|
274 for lexer in matching_lexers: |
|
275 rv = lexer.analyse_text(_text) |
|
276 if rv == 1.0: |
|
277 return lexer(**options) |
|
278 result.append((rv, lexer)) |
|
279 |
|
280 def type_sort(t): |
|
281 # sort by: |
|
282 # - analyse score |
|
283 # - is primary filename pattern? |
|
284 # - priority |
|
285 # - last resort: class name |
|
286 return (t[0], primary[t[1]], t[1].priority, t[1].__name__) |
|
287 result.sort(key=type_sort) |
|
288 |
|
289 return result[-1][1](**options) |
|
290 |
|
291 |
|
292 def guess_lexer(_text, **options): |
|
293 """Guess a lexer by strong distinctions in the text (eg, shebang).""" |
|
294 |
|
295 if not isinstance(_text, str): |
|
296 inencoding = options.get('inencoding', options.get('encoding')) |
|
297 if inencoding: |
|
298 _text = _text.decode(inencoding or 'utf8') |
|
299 else: |
|
300 _text, _ = guess_decode(_text) |
|
301 |
|
302 # try to get a vim modeline first |
|
303 ft = get_filetype_from_buffer(_text) |
|
304 |
|
305 if ft is not None: |
|
306 try: |
|
307 return get_lexer_by_name(ft, **options) |
|
308 except ClassNotFound: |
|
309 pass |
|
310 |
|
311 best_lexer = [0.0, None] |
|
312 for lexer in _iter_lexerclasses(): |
|
313 rv = lexer.analyse_text(_text) |
|
314 if rv == 1.0: |
|
315 return lexer(**options) |
|
316 if rv > best_lexer[0]: |
|
317 best_lexer[:] = (rv, lexer) |
|
318 if not best_lexer[0] or best_lexer[1] is None: |
|
319 raise ClassNotFound('no lexer matching the text found') |
|
320 return best_lexer[1](**options) |
|
321 |
|
322 |
|
323 class _automodule(types.ModuleType): |
|
324 """Automatically import lexers.""" |
|
325 |
|
326 def __getattr__(self, name): |
|
327 info = LEXERS.get(name) |
|
328 if info: |
|
329 _load_lexers(info[0]) |
|
330 cls = _lexer_cache[info[1]] |
|
331 setattr(self, name, cls) |
|
332 return cls |
|
333 if name in COMPAT: |
|
334 return getattr(self, COMPAT[name]) |
|
335 raise AttributeError(name) |
|
336 |
|
337 |
|
338 oldmod = sys.modules[__name__] |
|
339 newmod = _automodule(__name__) |
|
340 newmod.__dict__.update(oldmod.__dict__) |
|
341 sys.modules[__name__] = newmod |
|
342 del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types |
|