ThirdParty/Pygments/pygments/lexers/__init__.py

changeset 4172
4f20dba37ab6
parent 2591
b1c918293219
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
3 pygments.lexers 3 pygments.lexers
4 ~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~
5 5
6 Pygments lexers. 6 Pygments lexers.
7 7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 import re
12 import sys 13 import sys
13 import types 14 import types
14 import fnmatch 15 import fnmatch
15 from os.path import basename 16 from os.path import basename
16 17
17 from pygments.lexers._mapping import LEXERS 18 from pygments.lexers._mapping import LEXERS
19 from pygments.modeline import get_filetype_from_buffer
18 from pygments.plugin import find_plugin_lexers 20 from pygments.plugin import find_plugin_lexers
19 from pygments.util import ClassNotFound, bytes 21 from pygments.util import ClassNotFound, itervalues, guess_decode
20 22
21 23
22 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', 24 __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
23 'guess_lexer'] + list(LEXERS.keys()) 25 'guess_lexer'] + list(LEXERS)
24 26
25 _lexer_cache = {} 27 _lexer_cache = {}
28 _pattern_cache = {}
29
30
31 def _fn_matches(fn, glob):
32 """Return whether the supplied file name fn matches pattern filename."""
33 if glob not in _pattern_cache:
34 pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob))
35 return pattern.match(fn)
36 return _pattern_cache[glob].match(fn)
26 37
27 38
28 def _load_lexers(module_name): 39 def _load_lexers(module_name):
29 """ 40 """Load a lexer (and all others in the module too)."""
30 Load a lexer (and all others in the module too).
31 """
32 mod = __import__(module_name, None, None, ['__all__']) 41 mod = __import__(module_name, None, None, ['__all__'])
33 for lexer_name in mod.__all__: 42 for lexer_name in mod.__all__:
34 cls = getattr(mod, lexer_name) 43 cls = getattr(mod, lexer_name)
35 _lexer_cache[cls.name] = cls 44 _lexer_cache[cls.name] = cls
36 45
37 46
38 def get_all_lexers(): 47 def get_all_lexers():
39 """ 48 """Return a generator of tuples in the form ``(name, aliases,
40 Return a generator of tuples in the form ``(name, aliases,
41 filenames, mimetypes)`` of all know lexers. 49 filenames, mimetypes)`` of all know lexers.
42 """ 50 """
43 for item in LEXERS.values(): 51 for item in itervalues(LEXERS):
44 yield item[1:] 52 yield item[1:]
45 for lexer in find_plugin_lexers(): 53 for lexer in find_plugin_lexers():
46 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes 54 yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes
47 55
48 56
49 def find_lexer_class(name): 57 def find_lexer_class(name):
50 """ 58 """Lookup a lexer class by name.
51 Lookup a lexer class by name. Return None if not found. 59
60 Return None if not found.
52 """ 61 """
53 if name in _lexer_cache: 62 if name in _lexer_cache:
54 return _lexer_cache[name] 63 return _lexer_cache[name]
55 # lookup builtin lexers 64 # lookup builtin lexers
56 for module_name, lname, aliases, _, _ in LEXERS.values(): 65 for module_name, lname, aliases, _, _ in itervalues(LEXERS):
57 if name == lname: 66 if name == lname:
58 _load_lexers(module_name) 67 _load_lexers(module_name)
59 return _lexer_cache[name] 68 return _lexer_cache[name]
60 # continue with lexers from setuptools entrypoints 69 # continue with lexers from setuptools entrypoints
61 for cls in find_plugin_lexers(): 70 for cls in find_plugin_lexers():
62 if cls.name == name: 71 if cls.name == name:
63 return cls 72 return cls
64 73
65 74
66 def get_lexer_by_name(_alias, **options): 75 def get_lexer_by_name(_alias, **options):
67 """ 76 """Get a lexer by an alias.
68 Get a lexer by an alias. 77
69 """ 78 Raises ClassNotFound if not found.
79 """
80 if not _alias:
81 raise ClassNotFound('no lexer for alias %r found' % _alias)
82
70 # lookup builtin lexers 83 # lookup builtin lexers
71 for module_name, name, aliases, _, _ in LEXERS.values(): 84 for module_name, name, aliases, _, _ in itervalues(LEXERS):
72 if _alias in aliases: 85 if _alias.lower() in aliases:
73 if name not in _lexer_cache: 86 if name not in _lexer_cache:
74 _load_lexers(module_name) 87 _load_lexers(module_name)
75 return _lexer_cache[name](**options) 88 return _lexer_cache[name](**options)
76 # continue with lexers from setuptools entrypoints 89 # continue with lexers from setuptools entrypoints
77 for cls in find_plugin_lexers(): 90 for cls in find_plugin_lexers():
78 if _alias in cls.aliases: 91 if _alias in cls.aliases:
79 return cls(**options) 92 return cls(**options)
80 raise ClassNotFound('no lexer for alias %r found' % _alias) 93 raise ClassNotFound('no lexer for alias %r found' % _alias)
81 94
82 95
83 def get_lexer_for_filename(_fn, code=None, **options): 96 def find_lexer_class_for_filename(_fn, code=None):
84 """ 97 """Get a lexer for a filename.
85 Get a lexer for a filename. If multiple lexers match the filename 98
86 pattern, use ``analyze_text()`` to figure out which one is more 99 If multiple lexers match the filename pattern, use ``analyse_text()`` to
87 appropriate. 100 figure out which one is more appropriate.
101
102 Returns None if not found.
88 """ 103 """
89 matches = [] 104 matches = []
90 fn = basename(_fn) 105 fn = basename(_fn)
91 for modname, name, _, filenames, _ in LEXERS.values(): 106 for modname, name, _, filenames, _ in itervalues(LEXERS):
92 for filename in filenames: 107 for filename in filenames:
93 if fnmatch.fnmatch(fn, filename): 108 if _fn_matches(fn, filename):
94 if name not in _lexer_cache: 109 if name not in _lexer_cache:
95 _load_lexers(modname) 110 _load_lexers(modname)
96 matches.append((_lexer_cache[name], filename)) 111 matches.append((_lexer_cache[name], filename))
97 for cls in find_plugin_lexers(): 112 for cls in find_plugin_lexers():
98 for filename in cls.filenames: 113 for filename in cls.filenames:
99 if fnmatch.fnmatch(fn, filename): 114 if _fn_matches(fn, filename):
100 matches.append((cls, filename)) 115 matches.append((cls, filename))
101 116
102 if sys.version_info > (3,) and isinstance(code, bytes): 117 if sys.version_info > (3,) and isinstance(code, bytes):
103 # decode it, since all analyse_text functions expect unicode 118 # decode it, since all analyse_text functions expect unicode
104 code = code.decode('latin1') 119 code = guess_decode(code)
105 120
106 def get_rating(info): 121 def get_rating(info):
107 cls, filename = info 122 cls, filename = info
108 # explicit patterns get a bonus 123 # explicit patterns get a bonus
109 bonus = '*' not in filename and 0.5 or 0 124 bonus = '*' not in filename and 0.5 or 0
115 return cls.analyse_text(code) + bonus 130 return cls.analyse_text(code) + bonus
116 return cls.priority + bonus 131 return cls.priority + bonus
117 132
118 if matches: 133 if matches:
119 matches.sort(key=get_rating) 134 matches.sort(key=get_rating)
120 #print "Possible lexers, after sort:", matches 135 # print "Possible lexers, after sort:", matches
121 return matches[-1][0](**options) 136 return matches[-1][0]
122 raise ClassNotFound('no lexer for filename %r found' % _fn) 137
138
139 def get_lexer_for_filename(_fn, code=None, **options):
140 """Get a lexer for a filename.
141
142 If multiple lexers match the filename pattern, use ``analyse_text()`` to
143 figure out which one is more appropriate.
144
145 Raises ClassNotFound if not found.
146 """
147 res = find_lexer_class_for_filename(_fn, code)
148 if not res:
149 raise ClassNotFound('no lexer for filename %r found' % _fn)
150 return res(**options)
123 151
124 152
125 def get_lexer_for_mimetype(_mime, **options): 153 def get_lexer_for_mimetype(_mime, **options):
126 """ 154 """Get a lexer for a mimetype.
127 Get a lexer for a mimetype. 155
128 """ 156 Raises ClassNotFound if not found.
129 for modname, name, _, _, mimetypes in LEXERS.values(): 157 """
158 for modname, name, _, _, mimetypes in itervalues(LEXERS):
130 if _mime in mimetypes: 159 if _mime in mimetypes:
131 if name not in _lexer_cache: 160 if name not in _lexer_cache:
132 _load_lexers(modname) 161 _load_lexers(modname)
133 return _lexer_cache[name](**options) 162 return _lexer_cache[name](**options)
134 for cls in find_plugin_lexers(): 163 for cls in find_plugin_lexers():
135 if _mime in cls.mimetypes: 164 if _mime in cls.mimetypes:
136 return cls(**options) 165 return cls(**options)
137 raise ClassNotFound('no lexer for mimetype %r found' % _mime) 166 raise ClassNotFound('no lexer for mimetype %r found' % _mime)
138 167
139 168
140 def _iter_lexerclasses(): 169 def _iter_lexerclasses(plugins=True):
141 """ 170 """Return an iterator over all lexer classes."""
142 Return an iterator over all lexer classes.
143 """
144 for key in sorted(LEXERS): 171 for key in sorted(LEXERS):
145 module_name, name = LEXERS[key][:2] 172 module_name, name = LEXERS[key][:2]
146 if name not in _lexer_cache: 173 if name not in _lexer_cache:
147 _load_lexers(module_name) 174 _load_lexers(module_name)
148 yield _lexer_cache[name] 175 yield _lexer_cache[name]
149 for lexer in find_plugin_lexers(): 176 if plugins:
150 yield lexer 177 for lexer in find_plugin_lexers():
178 yield lexer
151 179
152 180
153 def guess_lexer_for_filename(_fn, _text, **options): 181 def guess_lexer_for_filename(_fn, _text, **options):
154 """ 182 """
155 Lookup all lexers that handle those filenames primary (``filenames``) 183 Lookup all lexers that handle those filenames primary (``filenames``)
165 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac> 193 <pygments.lexers.templates.HtmlDjangoLexer object at 0xb7d2f2ac>
166 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }') 194 >>> guess_lexer_for_filename('style.css', 'a { color: <?= $link ?> }')
167 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c> 195 <pygments.lexers.templates.CssPhpLexer object at 0xb7ba518c>
168 """ 196 """
169 fn = basename(_fn) 197 fn = basename(_fn)
170 primary = None 198 primary = {}
171 matching_lexers = set() 199 matching_lexers = set()
172 for lexer in _iter_lexerclasses(): 200 for lexer in _iter_lexerclasses():
173 for filename in lexer.filenames: 201 for filename in lexer.filenames:
174 if fnmatch.fnmatch(fn, filename): 202 if _fn_matches(fn, filename):
175 matching_lexers.add(lexer) 203 matching_lexers.add(lexer)
176 primary = lexer 204 primary[lexer] = True
177 for filename in lexer.alias_filenames: 205 for filename in lexer.alias_filenames:
178 if fnmatch.fnmatch(fn, filename): 206 if _fn_matches(fn, filename):
179 matching_lexers.add(lexer) 207 matching_lexers.add(lexer)
208 primary[lexer] = False
180 if not matching_lexers: 209 if not matching_lexers:
181 raise ClassNotFound('no lexer for filename %r found' % fn) 210 raise ClassNotFound('no lexer for filename %r found' % fn)
182 if len(matching_lexers) == 1: 211 if len(matching_lexers) == 1:
183 return matching_lexers.pop()(**options) 212 return matching_lexers.pop()(**options)
184 result = [] 213 result = []
185 for lexer in matching_lexers: 214 for lexer in matching_lexers:
186 rv = lexer.analyse_text(_text) 215 rv = lexer.analyse_text(_text)
187 if rv == 1.0: 216 if rv == 1.0:
188 return lexer(**options) 217 return lexer(**options)
189 result.append((rv, lexer)) 218 result.append((rv, lexer))
190 result.sort(key=lambda k: k[0]) 219
191 if not result[-1][0] and primary is not None: 220 def type_sort(t):
192 return primary(**options) 221 # sort by:
222 # - analyse score
223 # - is primary filename pattern?
224 # - priority
225 # - last resort: class name
226 return (t[0], primary[t[1]], t[1].priority, t[1].__name__)
227 result.sort(key=type_sort)
228
193 return result[-1][1](**options) 229 return result[-1][1](**options)
194 230
195 231
196 def guess_lexer(_text, **options): 232 def guess_lexer(_text, **options):
197 """ 233 """Guess a lexer by strong distinctions in the text (eg, shebang)."""
198 Guess a lexer by strong distinctions in the text (eg, shebang). 234
199 """ 235 # try to get a vim modeline first
236 ft = get_filetype_from_buffer(_text)
237
238 if ft is not None:
239 try:
240 return get_lexer_by_name(ft, **options)
241 except ClassNotFound:
242 pass
243
200 best_lexer = [0.0, None] 244 best_lexer = [0.0, None]
201 for lexer in _iter_lexerclasses(): 245 for lexer in _iter_lexerclasses():
202 rv = lexer.analyse_text(_text) 246 rv = lexer.analyse_text(_text)
203 if rv == 1.0: 247 if rv == 1.0:
204 return lexer(**options) 248 return lexer(**options)
220 setattr(self, name, cls) 264 setattr(self, name, cls)
221 return cls 265 return cls
222 raise AttributeError(name) 266 raise AttributeError(name)
223 267
224 268
225 oldmod = sys.modules['pygments.lexers'] 269 oldmod = sys.modules[__name__]
226 newmod = _automodule('pygments.lexers') 270 newmod = _automodule(__name__)
227 newmod.__dict__.update(oldmod.__dict__) 271 newmod.__dict__.update(oldmod.__dict__)
228 sys.modules['pygments.lexers'] = newmod 272 sys.modules[__name__] = newmod
229 del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types 273 del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types

eric ide

mercurial