3 pygments.lexers |
3 pygments.lexers |
4 ~~~~~~~~~~~~~~~ |
4 ~~~~~~~~~~~~~~~ |
5 |
5 |
6 Pygments lexers. |
6 Pygments lexers. |
7 |
7 |
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. |
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
9 :license: BSD, see LICENSE for details. |
9 :license: BSD, see LICENSE for details. |
10 """ |
10 """ |
11 |
11 |
|
12 import re |
12 import sys |
13 import sys |
13 import types |
14 import types |
14 import fnmatch |
15 import fnmatch |
15 from os.path import basename |
16 from os.path import basename |
16 |
17 |
17 from pygments.lexers._mapping import LEXERS |
18 from pygments.lexers._mapping import LEXERS |
|
19 from pygments.modeline import get_filetype_from_buffer |
18 from pygments.plugin import find_plugin_lexers |
20 from pygments.plugin import find_plugin_lexers |
19 from pygments.util import ClassNotFound, bytes |
21 from pygments.util import ClassNotFound, itervalues, guess_decode |
20 |
22 |
21 |
23 |
22 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', |
24 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', |
23 'guess_lexer'] + list(LEXERS.keys()) |
25 'guess_lexer'] + list(LEXERS) |
24 |
26 |
25 _lexer_cache = {} |
27 _lexer_cache = {} |
|
28 _pattern_cache = {} |
|
29 |
|
30 |
|
31 def _fn_matches(fn, glob): |
|
32 """Return whether the supplied file name fn matches pattern filename.""" |
|
33 if glob not in _pattern_cache: |
|
34 pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) |
|
35 return pattern.match(fn) |
|
36 return _pattern_cache[glob].match(fn) |
26 |
37 |
27 |
38 |
28 def _load_lexers(module_name): |
39 def _load_lexers(module_name): |
29 """ |
40 """Load a lexer (and all others in the module too).""" |
30 Load a lexer (and all others in the module too). |
|
31 """ |
|
32 mod = __import__(module_name, None, None, ['__all__']) |
41 mod = __import__(module_name, None, None, ['__all__']) |
33 for lexer_name in mod.__all__: |
42 for lexer_name in mod.__all__: |
34 cls = getattr(mod, lexer_name) |
43 cls = getattr(mod, lexer_name) |
35 _lexer_cache[cls.name] = cls |
44 _lexer_cache[cls.name] = cls |
36 |
45 |
37 |
46 |
38 def get_all_lexers(): |
47 def get_all_lexers(): |
39 """ |
48 """Return a generator of tuples in the form ``(name, aliases, |
40 Return a generator of tuples in the form ``(name, aliases, |
|
41 filenames, mimetypes)`` of all know lexers. |
49 filenames, mimetypes)`` of all know lexers. |
42 """ |
50 """ |
43 for item in LEXERS.values(): |
51 for item in itervalues(LEXERS): |
44 yield item[1:] |
52 yield item[1:] |
45 for lexer in find_plugin_lexers(): |
53 for lexer in find_plugin_lexers(): |
46 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes |
54 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes |
47 |
55 |
48 |
56 |
49 def find_lexer_class(name): |
57 def find_lexer_class(name): |
50 """ |
58 """Lookup a lexer class by name. |
51 Lookup a lexer class by name. Return None if not found. |
59 |
|
60 Return None if not found. |
52 """ |
61 """ |
53 if name in _lexer_cache: |
62 if name in _lexer_cache: |
54 return _lexer_cache[name] |
63 return _lexer_cache[name] |
55 # lookup builtin lexers |
64 # lookup builtin lexers |
56 for module_name, lname, aliases, _, _ in LEXERS.values(): |
65 for module_name, lname, aliases, _, _ in itervalues(LEXERS): |
57 if name == lname: |
66 if name == lname: |
58 _load_lexers(module_name) |
67 _load_lexers(module_name) |
59 return _lexer_cache[name] |
68 return _lexer_cache[name] |
60 # continue with lexers from setuptools entrypoints |
69 # continue with lexers from setuptools entrypoints |
61 for cls in find_plugin_lexers(): |
70 for cls in find_plugin_lexers(): |
62 if cls.name == name: |
71 if cls.name == name: |
63 return cls |
72 return cls |
64 |
73 |
65 |
74 |
66 def get_lexer_by_name(_alias, **options): |
75 def get_lexer_by_name(_alias, **options): |
67 """ |
76 """Get a lexer by an alias. |
68 Get a lexer by an alias. |
77 |
69 """ |
78 Raises ClassNotFound if not found. |
|
79 """ |
|
80 if not _alias: |
|
81 raise ClassNotFound('no lexer for alias %r found' % _alias) |
|
82 |
70 # lookup builtin lexers |
83 # lookup builtin lexers |
71 for module_name, name, aliases, _, _ in LEXERS.values(): |
84 for module_name, name, aliases, _, _ in itervalues(LEXERS): |
72 if _alias in aliases: |
85 if _alias.lower() in aliases: |
73 if name not in _lexer_cache: |
86 if name not in _lexer_cache: |
74 _load_lexers(module_name) |
87 _load_lexers(module_name) |
75 return _lexer_cache[name](**options) |
88 return _lexer_cache[name](**options) |
76 # continue with lexers from setuptools entrypoints |
89 # continue with lexers from setuptools entrypoints |
77 for cls in find_plugin_lexers(): |
90 for cls in find_plugin_lexers(): |
78 if _alias in cls.aliases: |
91 if _alias in cls.aliases: |
79 return cls(**options) |
92 return cls(**options) |
80 raise ClassNotFound('no lexer for alias %r found' % _alias) |
93 raise ClassNotFound('no lexer for alias %r found' % _alias) |
81 |
94 |
82 |
95 |
83 def get_lexer_for_filename(_fn, code=None, **options): |
96 def find_lexer_class_for_filename(_fn, code=None): |
84 """ |
97 """Get a lexer for a filename. |
85 Get a lexer for a filename. If multiple lexers match the filename |
98 |
86 pattern, use ``analyze_text()`` to figure out which one is more |
99 If multiple lexers match the filename pattern, use ``analyse_text()`` to |
87 appropriate. |
100 figure out which one is more appropriate. |
|
101 |
|
102 Returns None if not found. |
88 """ |
103 """ |
89 matches = [] |
104 matches = [] |
90 fn = basename(_fn) |
105 fn = basename(_fn) |
91 for modname, name, _, filenames, _ in LEXERS.values(): |
106 for modname, name, _, filenames, _ in itervalues(LEXERS): |
92 for filename in filenames: |
107 for filename in filenames: |
93 if fnmatch.fnmatch(fn, filename): |
108 if _fn_matches(fn, filename): |
94 if name not in _lexer_cache: |
109 if name not in _lexer_cache: |
95 _load_lexers(modname) |
110 _load_lexers(modname) |
96 matches.append((_lexer_cache[name], filename)) |
111 matches.append((_lexer_cache[name], filename)) |
97 for cls in find_plugin_lexers(): |
112 for cls in find_plugin_lexers(): |
98 for filename in cls.filenames: |
113 for filename in cls.filenames: |
99 if fnmatch.fnmatch(fn, filename): |
114 if _fn_matches(fn, filename): |
100 matches.append((cls, filename)) |
115 matches.append((cls, filename)) |
101 |
116 |
102 if sys.version_info > (3,) and isinstance(code, bytes): |
117 if sys.version_info > (3,) and isinstance(code, bytes): |
103 # decode it, since all analyse_text functions expect unicode |
118 # decode it, since all analyse_text functions expect unicode |
104 code = code.decode('latin1') |
119 code = guess_decode(code) |
105 |
120 |
106 def get_rating(info): |
121 def get_rating(info): |
107 cls, filename = info |
122 cls, filename = info |
108 # explicit patterns get a bonus |
123 # explicit patterns get a bonus |
109 bonus = '*' not in filename and 0.5 or 0 |
124 bonus = '*' not in filename and 0.5 or 0 |
115 return cls.analyse_text(code) + bonus |
130 return cls.analyse_text(code) + bonus |
116 return cls.priority + bonus |
131 return cls.priority + bonus |
117 |
132 |
118 if matches: |
133 if matches: |
119 matches.sort(key=get_rating) |
134 matches.sort(key=get_rating) |
120 #print "Possible lexers, after sort:", matches |
135 # print "Possible lexers, after sort:", matches |
121 return matches[-1][0](**options) |
136 return matches[-1][0] |
122 raise ClassNotFound('no lexer for filename %r found' % _fn) |
137 |
|
138 |
|
139 def get_lexer_for_filename(_fn, code=None, **options): |
|
140 """Get a lexer for a filename. |
|
141 |
|
142 If multiple lexers match the filename pattern, use ``analyse_text()`` to |
|
143 figure out which one is more appropriate. |
|
144 |
|
145 Raises ClassNotFound if not found. |
|
146 """ |
|
147 res = find_lexer_class_for_filename(_fn, code) |
|
148 if not res: |
|
149 raise ClassNotFound('no lexer for filename %r found' % _fn) |
|
150 return res(**options) |
123 |
151 |
124 |
152 |
125 def get_lexer_for_mimetype(_mime, **options): |
153 def get_lexer_for_mimetype(_mime, **options): |
126 """ |
154 """Get a lexer for a mimetype. |
127 Get a lexer for a mimetype. |
155 |
128 """ |
156 Raises ClassNotFound if not found. |
129 for modname, name, _, _, mimetypes in LEXERS.values(): |
157 """ |
|
158 for modname, name, _, _, mimetypes in itervalues(LEXERS): |
130 if _mime in mimetypes: |
159 if _mime in mimetypes: |
131 if name not in _lexer_cache: |
160 if name not in _lexer_cache: |
132 _load_lexers(modname) |
161 _load_lexers(modname) |
133 return _lexer_cache[name](**options) |
162 return _lexer_cache[name](**options) |
134 for cls in find_plugin_lexers(): |
163 for cls in find_plugin_lexers(): |
135 if _mime in cls.mimetypes: |
164 if _mime in cls.mimetypes: |
136 return cls(**options) |
165 return cls(**options) |
137 raise ClassNotFound('no lexer for mimetype %r found' % _mime) |
166 raise ClassNotFound('no lexer for mimetype %r found' % _mime) |
138 |
167 |
139 |
168 |
140 def _iter_lexerclasses(): |
169 def _iter_lexerclasses(plugins=True): |
141 """ |
170 """Return an iterator over all lexer classes.""" |
142 Return an iterator over all lexer classes. |
|
143 """ |
|
144 for key in sorted(LEXERS): |
171 for key in sorted(LEXERS): |
145 module_name, name = LEXERS[key][:2] |
172 module_name, name = LEXERS[key][:2] |
146 if name not in _lexer_cache: |
173 if name not in _lexer_cache: |
147 _load_lexers(module_name) |
174 _load_lexers(module_name) |
148 yield _lexer_cache[name] |
175 yield _lexer_cache[name] |
149 for lexer in find_plugin_lexers(): |
176 if plugins: |
150 yield lexer |
177 for lexer in find_plugin_lexers(): |
|
178 yield lexer |
151 |
179 |
152 |
180 |
153 def guess_lexer_for_filename(_fn, _text, **options): |
181 def guess_lexer_for_filename(_fn, _text, **options): |
154 """ |
182 """ |
155 Lookup all lexers that handle those filenames primary (``filenames``) |
183 Lookup all lexers that handle those filenames primary (``filenames``) |
165 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> |
193 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> |
166 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') |
194 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') |
167 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> |
195 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> |
168 """ |
196 """ |
169 fn = basename(_fn) |
197 fn = basename(_fn) |
170 primary = None |
198 primary = {} |
171 matching_lexers = set() |
199 matching_lexers = set() |
172 for lexer in _iter_lexerclasses(): |
200 for lexer in _iter_lexerclasses(): |
173 for filename in lexer.filenames: |
201 for filename in lexer.filenames: |
174 if fnmatch.fnmatch(fn, filename): |
202 if _fn_matches(fn, filename): |
175 matching_lexers.add(lexer) |
203 matching_lexers.add(lexer) |
176 primary = lexer |
204 primary[lexer] = True |
177 for filename in lexer.alias_filenames: |
205 for filename in lexer.alias_filenames: |
178 if fnmatch.fnmatch(fn, filename): |
206 if _fn_matches(fn, filename): |
179 matching_lexers.add(lexer) |
207 matching_lexers.add(lexer) |
|
208 primary[lexer] = False |
180 if not matching_lexers: |
209 if not matching_lexers: |
181 raise ClassNotFound('no lexer for filename %r found' % fn) |
210 raise ClassNotFound('no lexer for filename %r found' % fn) |
182 if len(matching_lexers) == 1: |
211 if len(matching_lexers) == 1: |
183 return matching_lexers.pop()(**options) |
212 return matching_lexers.pop()(**options) |
184 result = [] |
213 result = [] |
185 for lexer in matching_lexers: |
214 for lexer in matching_lexers: |
186 rv = lexer.analyse_text(_text) |
215 rv = lexer.analyse_text(_text) |
187 if rv == 1.0: |
216 if rv == 1.0: |
188 return lexer(**options) |
217 return lexer(**options) |
189 result.append((rv, lexer)) |
218 result.append((rv, lexer)) |
190 result.sort(key=lambda k: k[0]) |
219 |
191 if not result[-1][0] and primary is not None: |
220 def type_sort(t): |
192 return primary(**options) |
221 # sort by: |
|
222 # - analyse score |
|
223 # - is primary filename pattern? |
|
224 # - priority |
|
225 # - last resort: class name |
|
226 return (t[0], primary[t[1]], t[1].priority, t[1].__name__) |
|
227 result.sort(key=type_sort) |
|
228 |
193 return result[-1][1](**options) |
229 return result[-1][1](**options) |
194 |
230 |
195 |
231 |
196 def guess_lexer(_text, **options): |
232 def guess_lexer(_text, **options): |
197 """ |
233 """Guess a lexer by strong distinctions in the text (eg, shebang).""" |
198 Guess a lexer by strong distinctions in the text (eg, shebang). |
234 |
199 """ |
235 # try to get a vim modeline first |
|
236 ft = get_filetype_from_buffer(_text) |
|
237 |
|
238 if ft is not None: |
|
239 try: |
|
240 return get_lexer_by_name(ft, **options) |
|
241 except ClassNotFound: |
|
242 pass |
|
243 |
200 best_lexer = [0.0, None] |
244 best_lexer = [0.0, None] |
201 for lexer in _iter_lexerclasses(): |
245 for lexer in _iter_lexerclasses(): |
202 rv = lexer.analyse_text(_text) |
246 rv = lexer.analyse_text(_text) |
203 if rv == 1.0: |
247 if rv == 1.0: |
204 return lexer(**options) |
248 return lexer(**options) |