ThirdParty/Pygments/pygments/lexers/math.py

changeset 0
de9c2efb9d02
child 684
2f29a0b6e1c7
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.math
4 ~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for math languages.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 try:
14 set
15 except NameError:
16 from sets import Set as set
17
18 from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
19 from pygments.token import Comment, String, Punctuation, Keyword, Name, \
20 Operator, Number, Text, Generic
21
22 from pygments.lexers.agile import PythonLexer
23
24 __all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'NumPyLexer',
25 'SLexer']
26
27
28 class MuPADLexer(RegexLexer):
29 """
30 A `MuPAD <http://www.mupad.com>`_ lexer.
31 Contributed by Christopher Creutzig <christopher@creutzig.de>.
32
33 *New in Pygments 0.8.*
34 """
35 name = 'MuPAD'
36 aliases = ['mupad']
37 filenames = ['*.mu']
38
39 tokens = {
40 'root' : [
41 (r'//.*?$', Comment.Single),
42 (r'/\*', Comment.Multiline, 'comment'),
43 (r'"(?:[^"\\]|\\.)*"', String),
44 (r'\(|\)|\[|\]|\{|\}', Punctuation),
45 (r'''(?x)\b(?:
46 next|break|end|
47 axiom|end_axiom|category|end_category|domain|end_domain|inherits|
48 if|%if|then|elif|else|end_if|
49 case|of|do|otherwise|end_case|
50 while|end_while|
51 repeat|until|end_repeat|
52 for|from|to|downto|step|end_for|
53 proc|local|option|save|begin|end_proc|
54 delete|frame
55 )\b''', Keyword),
56 (r'''(?x)\b(?:
57 DOM_ARRAY|DOM_BOOL|DOM_COMPLEX|DOM_DOMAIN|DOM_EXEC|DOM_EXPR|
58 DOM_FAIL|DOM_FLOAT|DOM_FRAME|DOM_FUNC_ENV|DOM_HFARRAY|DOM_IDENT|
59 DOM_INT|DOM_INTERVAL|DOM_LIST|DOM_NIL|DOM_NULL|DOM_POLY|DOM_PROC|
60 DOM_PROC_ENV|DOM_RAT|DOM_SET|DOM_STRING|DOM_TABLE|DOM_VAR
61 )\b''', Name.Class),
62 (r'''(?x)\b(?:
63 PI|EULER|E|CATALAN|
64 NIL|FAIL|undefined|infinity|
65 TRUE|FALSE|UNKNOWN
66 )\b''',
67 Name.Constant),
68 (r'\b(?:dom|procname)\b', Name.Builtin.Pseudo),
69 (r'\.|,|:|;|=|\+|-|\*|/|\^|@|>|<|\$|\||!|\'|%|~=', Operator),
70 (r'''(?x)\b(?:
71 and|or|not|xor|
72 assuming|
73 div|mod|
74 union|minus|intersect|in|subset
75 )\b''',
76 Operator.Word),
77 (r'\b(?:I|RDN_INF|RD_NINF|RD_NAN)\b', Number),
78 #(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin),
79 (r'''(?x)
80 ((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)
81 (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''',
82 bygroups(Name.Function, Punctuation)),
83 (r'''(?x)
84 (?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)
85 (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable),
86 (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number),
87 (r'\.[0-9]+(?:e[0-9]+)?', Number),
88 (r'.', Text)
89 ],
90 'comment' : [
91 (r'[^*/]', Comment.Multiline),
92 (r'/\*', Comment.Multiline, '#push'),
93 (r'\*/', Comment.Multiline, '#pop'),
94 (r'[*/]', Comment.Multiline)
95 ]
96 }
97
98
99 class MatlabLexer(RegexLexer):
100 """
101 For Matlab (or GNU Octave) source code.
102 Contributed by Ken Schutte <kschutte@csail.mit.edu>.
103
104 *New in Pygments 0.10.*
105 """
106 name = 'Matlab'
107 aliases = ['matlab', 'octave']
108 filenames = ['*.m']
109 mimetypes = ['text/matlab']
110
111 #
112 # These lists are generated automatically.
113 # Run the following in bash shell:
114 #
115 # for f in elfun specfun elmat; do
116 # echo -n "$f = "
117 # matlab -nojvm -r "help $f;exit;" | perl -ne \
118 # 'push(@c,$1) if /^ (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}'
119 # done
120 #
121 # elfun: Elementary math functions
122 # specfun: Special Math functions
123 # elmat: Elementary matrices and matrix manipulation
124 #
125 # taken from Matlab version 7.4.0.336 (R2007a)
126 #
127 elfun = ["sin","sind","sinh","asin","asind","asinh","cos","cosd","cosh",
128 "acos","acosd","acosh","tan","tand","tanh","atan","atand","atan2",
129 "atanh","sec","secd","sech","asec","asecd","asech","csc","cscd",
130 "csch","acsc","acscd","acsch","cot","cotd","coth","acot","acotd",
131 "acoth","hypot","exp","expm1","log","log1p","log10","log2","pow2",
132 "realpow","reallog","realsqrt","sqrt","nthroot","nextpow2","abs",
133 "angle","complex","conj","imag","real","unwrap","isreal","cplxpair",
134 "fix","floor","ceil","round","mod","rem","sign"]
135 specfun = ["airy","besselj","bessely","besselh","besseli","besselk","beta",
136 "betainc","betaln","ellipj","ellipke","erf","erfc","erfcx",
137 "erfinv","expint","gamma","gammainc","gammaln","psi","legendre",
138 "cross","dot","factor","isprime","primes","gcd","lcm","rat",
139 "rats","perms","nchoosek","factorial","cart2sph","cart2pol",
140 "pol2cart","sph2cart","hsv2rgb","rgb2hsv"]
141 elmat = ["zeros","ones","eye","repmat","rand","randn","linspace","logspace",
142 "freqspace","meshgrid","accumarray","size","length","ndims","numel",
143 "disp","isempty","isequal","isequalwithequalnans","cat","reshape",
144 "diag","blkdiag","tril","triu","fliplr","flipud","flipdim","rot90",
145 "find","end","sub2ind","ind2sub","bsxfun","ndgrid","permute",
146 "ipermute","shiftdim","circshift","squeeze","isscalar","isvector",
147 "ans","eps","realmax","realmin","pi","i","inf","nan","isnan",
148 "isinf","isfinite","j","why","compan","gallery","hadamard","hankel",
149 "hilb","invhilb","magic","pascal","rosser","toeplitz","vander",
150 "wilkinson"]
151
152 tokens = {
153 'root': [
154 # line starting with '!' is sent as a system command. not sure what
155 # label to use...
156 (r'^!.*', String.Other),
157 (r'%.*$', Comment),
158 (r'^\s*function', Keyword, 'deffunc'),
159
160 # from 'iskeyword' on version 7.4.0.336 (R2007a):
161 (r'(break|case|catch|classdef|continue|else|elseif|end|for|function|'
162 r'global|if|otherwise|parfor|persistent|return|switch|try|while)\b',
163 Keyword),
164
165 ("(" + "|".join(elfun+specfun+elmat) + r')\b', Name.Builtin),
166
167 # operators:
168 (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator),
169 # operators requiring escape for re:
170 (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator),
171
172 # punctuation:
173 (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
174 (r'=|:|;', Punctuation),
175
176 # quote can be transpose, instead of string:
177 # (not great, but handles common cases...)
178 (r'(?<=[\w\)\]])\'', Operator),
179
180 (r'(?<![\w\)\]])\'', String, 'string'),
181 ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
182 (r'.', Text),
183 ],
184 'string': [
185 (r'[^\']*\'', String, '#pop')
186 ],
187 'deffunc': [
188 (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)',
189 bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation,
190 Text.Whitespace, Name.Function, Punctuation, Text,
191 Punctuation, Text.Whitespace), '#pop'),
192 ],
193 }
194
195 def analyse_text(text):
196 if re.match('^\s*%', text, re.M): # comment
197 return 0.9
198 elif re.match('^!\w+', text, re.M): # system cmd
199 return 0.9
200 return 0.1
201
202 line_re = re.compile('.*?\n')
203
204 class MatlabSessionLexer(Lexer):
205 """
206 For Matlab (or GNU Octave) sessions. Modeled after PythonConsoleLexer.
207 Contributed by Ken Schutte <kschutte@csail.mit.edu>.
208
209 *New in Pygments 0.10.*
210 """
211 name = 'Matlab session'
212 aliases = ['matlabsession']
213
214 def get_tokens_unprocessed(self, text):
215 mlexer = MatlabLexer(**self.options)
216
217 curcode = ''
218 insertions = []
219
220 for match in line_re.finditer(text):
221 line = match.group()
222
223 if line.startswith('>>'):
224 insertions.append((len(curcode),
225 [(0, Generic.Prompt, line[:3])]))
226 curcode += line[3:]
227
228 elif line.startswith('???'):
229
230 idx = len(curcode)
231
232 # without is showing error on same line as before...?
233 line = "\n" + line
234 token = (0, Generic.Traceback, line)
235 insertions.append( (idx, [token,]) )
236
237 else:
238 if curcode:
239 for item in do_insertions(
240 insertions, mlexer.get_tokens_unprocessed(curcode)):
241 yield item
242 curcode = ''
243 insertions = []
244
245 yield match.start(), Generic.Output, line
246
247 if curcode: # or item:
248 for item in do_insertions(
249 insertions, mlexer.get_tokens_unprocessed(curcode)):
250 yield item
251
252
253 class NumPyLexer(PythonLexer):
254 '''
255 A Python lexer recognizing Numerical Python builtins.
256
257 *New in Pygments 0.10.*
258 '''
259
260 name = 'NumPy'
261 aliases = ['numpy']
262
263 # override the mimetypes to not inherit them from python
264 mimetypes = []
265 filenames = []
266
267 EXTRA_KEYWORDS = set([
268 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
269 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
270 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
271 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
272 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
273 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
274 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
275 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
276 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
277 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
278 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
279 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
280 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
281 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
282 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
283 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
284 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
285 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
286 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
287 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
288 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
289 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
290 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
291 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
292 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
293 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
294 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
295 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
296 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
297 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
298 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
299 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
300 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
301 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
302 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
303 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
304 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
305 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
306 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
307 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
308 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
309 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
310 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
311 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
312 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
313 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
314 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
315 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
316 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
317 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
318 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
319 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
320 'set_numeric_ops', 'set_printoptions', 'set_string_function',
321 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
322 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
323 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
324 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
325 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
326 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
327 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
328 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
329 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
330 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
331 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
332 ])
333
334 def get_tokens_unprocessed(self, text):
335 for index, token, value in \
336 PythonLexer.get_tokens_unprocessed(self, text):
337 if token is Name and value in self.EXTRA_KEYWORDS:
338 yield index, Keyword.Pseudo, value
339 else:
340 yield index, token, value
341
342
343 class SLexer(RegexLexer):
344 """
345 For S, S-plus, and R source code.
346
347 *New in Pygments 0.10.*
348 """
349
350 name = 'S'
351 aliases = ['splus', 's', 'r']
352 filenames = ['*.S', '*.R']
353 mimetypes = ['text/S-plus', 'text/S', 'text/R']
354
355 tokens = {
356 'comments': [
357 (r'#.*$', Comment.Single),
358 ],
359 'valid_name': [
360 (r'[a-zA-Z][0-9a-zA-Z\._]+', Text),
361 (r'`.+`', String.Backtick),
362 ],
363 'punctuation': [
364 (r'\[|\]|\[\[|\]\]|\$|\(|\)|@|:::?|;|,', Punctuation),
365 ],
366 'keywords': [
367 (r'for(?=\s*\()|while(?=\s*\()|if(?=\s*\()|(?<=\s)else|'
368 r'(?<=\s)break(?=;|$)|return(?=\s*\()|function(?=\s*\()',
369 Keyword.Reserved)
370 ],
371 'operators': [
372 (r'<-|-|==|<=|>=|<|>|&&|&|!=|\|\|?', Operator),
373 (r'\*|\+|\^|/|%%|%/%|=', Operator),
374 (r'%in%|%*%', Operator)
375 ],
376 'builtin_symbols': [
377 (r'(NULL|NA|TRUE|FALSE|NaN)\b', Keyword.Constant),
378 (r'(T|F)\b', Keyword.Variable),
379 ],
380 'numbers': [
381 (r'(?<![0-9a-zA-Z\)\}\]`\"])(?=\s*)[-\+]?[0-9]+'
382 r'(\.[0-9]*)?(E[0-9][-\+]?(\.[0-9]*)?)?', Number),
383 (r'\.[0-9]*(E[0-9][-\+]?(\.[0-9]*)?)?', Number),
384 ],
385 'statements': [
386 include('comments'),
387 # whitespaces
388 (r'\s+', Text),
389 (r'\'', String, 'string_squote'),
390 (r'\"', String, 'string_dquote'),
391 include('builtin_symbols'),
392 include('numbers'),
393 include('keywords'),
394 include('punctuation'),
395 include('operators'),
396 include('valid_name'),
397 ],
398 'root': [
399 include('statements'),
400 # blocks:
401 (r'\{|\}', Punctuation),
402 #(r'\{', Punctuation, 'block'),
403 (r'.', Text),
404 ],
405 #'block': [
406 # include('statements'),
407 # ('\{', Punctuation, '#push'),
408 # ('\}', Punctuation, '#pop')
409 #],
410 'string_squote': [
411 (r'[^\']*\'', String, '#pop'),
412 ],
413 'string_dquote': [
414 (r'[^\"]*\"', String, '#pop'),
415 ],
416 }
417
418 def analyse_text(text):
419 return '<-' in text

eric ide

mercurial