|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers |
|
4 ~~~~~~~~~~~~~~~ |
|
5 |
|
6 Pygments lexers. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 import sys |
|
12 import fnmatch |
|
13 import types |
|
14 from os.path import basename |
|
15 |
|
16 try: |
|
17 set |
|
18 except NameError: |
|
19 from sets import Set as set |
|
20 |
|
21 from pygments.lexers._mapping import LEXERS |
|
22 from pygments.plugin import find_plugin_lexers |
|
23 from pygments.util import ClassNotFound, bytes |
|
24 |
|
25 |
|
26 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', |
|
27 'guess_lexer'] + LEXERS.keys() |
|
28 |
|
29 _lexer_cache = {} |
|
30 |
|
31 |
|
32 def _load_lexers(module_name): |
|
33 """ |
|
34 Load a lexer (and all others in the module too). |
|
35 """ |
|
36 mod = __import__(module_name, None, None, ['__all__']) |
|
37 for lexer_name in mod.__all__: |
|
38 cls = getattr(mod, lexer_name) |
|
39 _lexer_cache[cls.name] = cls |
|
40 |
|
41 |
|
42 def get_all_lexers(): |
|
43 """ |
|
44 Return a generator of tuples in the form ``(name, aliases, |
|
45 filenames, mimetypes)`` of all know lexers. |
|
46 """ |
|
47 for item in LEXERS.itervalues(): |
|
48 yield item[1:] |
|
49 for lexer in find_plugin_lexers(): |
|
50 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes |
|
51 |
|
52 |
|
53 def find_lexer_class(name): |
|
54 """ |
|
55 Lookup a lexer class by name. Return None if not found. |
|
56 """ |
|
57 if name in _lexer_cache: |
|
58 return _lexer_cache[name] |
|
59 # lookup builtin lexers |
|
60 for module_name, lname, aliases, _, _ in LEXERS.itervalues(): |
|
61 if name == lname: |
|
62 _load_lexers(module_name) |
|
63 return _lexer_cache[name] |
|
64 # continue with lexers from setuptools entrypoints |
|
65 for cls in find_plugin_lexers(): |
|
66 if cls.name == name: |
|
67 return cls |
|
68 |
|
69 |
|
70 def get_lexer_by_name(_alias, **options): |
|
71 """ |
|
72 Get a lexer by an alias. |
|
73 """ |
|
74 # lookup builtin lexers |
|
75 for module_name, name, aliases, _, _ in LEXERS.itervalues(): |
|
76 if _alias in aliases: |
|
77 if name not in _lexer_cache: |
|
78 _load_lexers(module_name) |
|
79 return _lexer_cache[name](**options) |
|
80 # continue with lexers from setuptools entrypoints |
|
81 for cls in find_plugin_lexers(): |
|
82 if _alias in cls.aliases: |
|
83 return cls(**options) |
|
84 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
85 |
|
86 |
|
87 def get_lexer_for_filename(_fn, code=None, **options): |
|
88 """ |
|
89 Get a lexer for a filename. If multiple lexers match the filename |
|
90 pattern, use ``analyze_text()`` to figure out which one is more |
|
91 appropriate. |
|
92 """ |
|
93 matches = [] |
|
94 fn = basename(_fn) |
|
95 for modname, name, _, filenames, _ in LEXERS.itervalues(): |
|
96 for filename in filenames: |
|
97 if fnmatch.fnmatch(fn, filename): |
|
98 if name not in _lexer_cache: |
|
99 _load_lexers(modname) |
|
100 matches.append(_lexer_cache[name]) |
|
101 for cls in find_plugin_lexers(): |
|
102 for filename in cls.filenames: |
|
103 if fnmatch.fnmatch(fn, filename): |
|
104 matches.append(cls) |
|
105 |
|
106 if sys.version_info > (3,) and isinstance(code, bytes): |
|
107 # decode it, since all analyse_text functions expect unicode |
|
108 code = code.decode('latin1') |
|
109 |
|
110 def get_rating(cls): |
|
111 # The class _always_ defines analyse_text because it's included in |
|
112 # the Lexer class. The default implementation returns None which |
|
113 # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py |
|
114 # to find lexers which need it overridden. |
|
115 d = cls.analyse_text(code) |
|
116 #print "Got %r from %r" % (d, cls) |
|
117 return d |
|
118 |
|
119 if code: |
|
120 matches.sort(key=get_rating) |
|
121 if matches: |
|
122 #print "Possible lexers, after sort:", matches |
|
123 return matches[-1](**options) |
|
124 raise ClassNotFound('no lexer for filename %r found' % _fn) |
|
125 |
|
126 |
|
127 def get_lexer_for_mimetype(_mime, **options): |
|
128 """ |
|
129 Get a lexer for a mimetype. |
|
130 """ |
|
131 for modname, name, _, _, mimetypes in LEXERS.itervalues(): |
|
132 if _mime in mimetypes: |
|
133 if name not in _lexer_cache: |
|
134 _load_lexers(modname) |
|
135 return _lexer_cache[name](**options) |
|
136 for cls in find_plugin_lexers(): |
|
137 if _mime in cls.mimetypes: |
|
138 return cls(**options) |
|
139 raise ClassNotFound('no lexer for mimetype %r found' % _mime) |
|
140 |
|
141 |
|
142 def _iter_lexerclasses(): |
|
143 """ |
|
144 Return an iterator over all lexer classes. |
|
145 """ |
|
146 for module_name, name, _, _, _ in LEXERS.itervalues(): |
|
147 if name not in _lexer_cache: |
|
148 _load_lexers(module_name) |
|
149 yield _lexer_cache[name] |
|
150 for lexer in find_plugin_lexers(): |
|
151 yield lexer |
|
152 |
|
153 |
|
154 def guess_lexer_for_filename(_fn, _text, **options): |
|
155 """ |
|
156 Lookup all lexers that handle those filenames primary (``filenames``) |
|
157 or secondary (``alias_filenames``). Then run a text analysis for those |
|
158 lexers and choose the best result. |
|
159 |
|
160 usage:: |
|
161 |
|
162 >>> from pygments.lexers import guess_lexer_for_filename |
|
163 >>> guess_lexer_for_filename('hello.html', '<%= @foo %>') |
|
164 <pygments.lexers.templates.RhtmlLexer object at 0xb7d2f32c> |
|
165 >>> guess_lexer_for_filename('hello.html', '<h1>{{ title|e }}</h1>') |
|
166 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> |
|
167 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') |
|
168 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> |
|
169 """ |
|
170 fn = basename(_fn) |
|
171 primary = None |
|
172 matching_lexers = set() |
|
173 for lexer in _iter_lexerclasses(): |
|
174 for filename in lexer.filenames: |
|
175 if fnmatch.fnmatch(fn, filename): |
|
176 matching_lexers.add(lexer) |
|
177 primary = lexer |
|
178 for filename in lexer.alias_filenames: |
|
179 if fnmatch.fnmatch(fn, filename): |
|
180 matching_lexers.add(lexer) |
|
181 if not matching_lexers: |
|
182 raise ClassNotFound('no lexer for filename %r found' % fn) |
|
183 if len(matching_lexers) == 1: |
|
184 return matching_lexers.pop()(**options) |
|
185 result = [] |
|
186 for lexer in matching_lexers: |
|
187 rv = lexer.analyse_text(_text) |
|
188 if rv == 1.0: |
|
189 return lexer(**options) |
|
190 result.append((rv, lexer)) |
|
191 result.sort() |
|
192 if not result[-1][0] and primary is not None: |
|
193 return primary(**options) |
|
194 return result[-1][1](**options) |
|
195 |
|
196 |
|
197 def guess_lexer(_text, **options): |
|
198 """ |
|
199 Guess a lexer by strong distinctions in the text (eg, shebang). |
|
200 """ |
|
201 best_lexer = [0.0, None] |
|
202 for lexer in _iter_lexerclasses(): |
|
203 rv = lexer.analyse_text(_text) |
|
204 if rv == 1.0: |
|
205 return lexer(**options) |
|
206 if rv > best_lexer[0]: |
|
207 best_lexer[:] = (rv, lexer) |
|
208 if not best_lexer[0] or best_lexer[1] is None: |
|
209 raise ClassNotFound('no lexer matching the text found') |
|
210 return best_lexer[1](**options) |
|
211 |
|
212 |
|
213 class _automodule(types.ModuleType): |
|
214 """Automatically import lexers.""" |
|
215 |
|
216 def __getattr__(self, name): |
|
217 info = LEXERS.get(name) |
|
218 if info: |
|
219 _load_lexers(info[0]) |
|
220 cls = _lexer_cache[info[1]] |
|
221 setattr(self, name, cls) |
|
222 return cls |
|
223 raise AttributeError(name) |
|
224 |
|
225 |
|
226 import sys |
|
227 oldmod = sys.modules['pygments.lexers'] |
|
228 newmod = _automodule('pygments.lexers') |
|
229 newmod.__dict__.update(oldmod.__dict__) |
|
230 sys.modules['pygments.lexers'] = newmod |
|
231 del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types |