|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.math |
|
4 ~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for math languages. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 |
|
18 from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions |
|
19 from pygments.token import Comment, String, Punctuation, Keyword, Name, \ |
|
20 Operator, Number, Text, Generic |
|
21 |
|
22 from pygments.lexers.agile import PythonLexer |
|
23 |
|
24 __all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'NumPyLexer', |
|
25 'SLexer'] |
|
26 |
|
27 |
|
28 class MuPADLexer(RegexLexer): |
|
29 """ |
|
30 A `MuPAD <http://www.mupad.com>`_ lexer. |
|
31 Contributed by Christopher Creutzig <christopher@creutzig.de>. |
|
32 |
|
33 *New in Pygments 0.8.* |
|
34 """ |
|
35 name = 'MuPAD' |
|
36 aliases = ['mupad'] |
|
37 filenames = ['*.mu'] |
|
38 |
|
39 tokens = { |
|
40 'root' : [ |
|
41 (r'//.*?$', Comment.Single), |
|
42 (r'/\*', Comment.Multiline, 'comment'), |
|
43 (r'"(?:[^"\\]|\\.)*"', String), |
|
44 (r'\(|\)|\[|\]|\{|\}', Punctuation), |
|
45 (r'''(?x)\b(?: |
|
46 next|break|end| |
|
47 axiom|end_axiom|category|end_category|domain|end_domain|inherits| |
|
48 if|%if|then|elif|else|end_if| |
|
49 case|of|do|otherwise|end_case| |
|
50 while|end_while| |
|
51 repeat|until|end_repeat| |
|
52 for|from|to|downto|step|end_for| |
|
53 proc|local|option|save|begin|end_proc| |
|
54 delete|frame |
|
55 )\b''', Keyword), |
|
56 (r'''(?x)\b(?: |
|
57 DOM_ARRAY|DOM_BOOL|DOM_COMPLEX|DOM_DOMAIN|DOM_EXEC|DOM_EXPR| |
|
58 DOM_FAIL|DOM_FLOAT|DOM_FRAME|DOM_FUNC_ENV|DOM_HFARRAY|DOM_IDENT| |
|
59 DOM_INT|DOM_INTERVAL|DOM_LIST|DOM_NIL|DOM_NULL|DOM_POLY|DOM_PROC| |
|
60 DOM_PROC_ENV|DOM_RAT|DOM_SET|DOM_STRING|DOM_TABLE|DOM_VAR |
|
61 )\b''', Name.Class), |
|
62 (r'''(?x)\b(?: |
|
63 PI|EULER|E|CATALAN| |
|
64 NIL|FAIL|undefined|infinity| |
|
65 TRUE|FALSE|UNKNOWN |
|
66 )\b''', |
|
67 Name.Constant), |
|
68 (r'\b(?:dom|procname)\b', Name.Builtin.Pseudo), |
|
69 (r'\.|,|:|;|=|\+|-|\*|/|\^|@|>|<|\$|\||!|\'|%|~=', Operator), |
|
70 (r'''(?x)\b(?: |
|
71 and|or|not|xor| |
|
72 assuming| |
|
73 div|mod| |
|
74 union|minus|intersect|in|subset |
|
75 )\b''', |
|
76 Operator.Word), |
|
77 (r'\b(?:I|RDN_INF|RD_NINF|RD_NAN)\b', Number), |
|
78 #(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin), |
|
79 (r'''(?x) |
|
80 ((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) |
|
81 (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''', |
|
82 bygroups(Name.Function, Punctuation)), |
|
83 (r'''(?x) |
|
84 (?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`) |
|
85 (?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable), |
|
86 (r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number), |
|
87 (r'\.[0-9]+(?:e[0-9]+)?', Number), |
|
88 (r'.', Text) |
|
89 ], |
|
90 'comment' : [ |
|
91 (r'[^*/]', Comment.Multiline), |
|
92 (r'/\*', Comment.Multiline, '#push'), |
|
93 (r'\*/', Comment.Multiline, '#pop'), |
|
94 (r'[*/]', Comment.Multiline) |
|
95 ] |
|
96 } |
|
97 |
|
98 |
|
99 class MatlabLexer(RegexLexer): |
|
100 """ |
|
101 For Matlab (or GNU Octave) source code. |
|
102 Contributed by Ken Schutte <kschutte@csail.mit.edu>. |
|
103 |
|
104 *New in Pygments 0.10.* |
|
105 """ |
|
106 name = 'Matlab' |
|
107 aliases = ['matlab', 'octave'] |
|
108 filenames = ['*.m'] |
|
109 mimetypes = ['text/matlab'] |
|
110 |
|
111 # |
|
112 # These lists are generated automatically. |
|
113 # Run the following in bash shell: |
|
114 # |
|
115 # for f in elfun specfun elmat; do |
|
116 # echo -n "$f = " |
|
117 # matlab -nojvm -r "help $f;exit;" | perl -ne \ |
|
118 # 'push(@c,$1) if /^ (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}' |
|
119 # done |
|
120 # |
|
121 # elfun: Elementary math functions |
|
122 # specfun: Special Math functions |
|
123 # elmat: Elementary matrices and matrix manipulation |
|
124 # |
|
125 # taken from Matlab version 7.4.0.336 (R2007a) |
|
126 # |
|
127 elfun = ["sin","sind","sinh","asin","asind","asinh","cos","cosd","cosh", |
|
128 "acos","acosd","acosh","tan","tand","tanh","atan","atand","atan2", |
|
129 "atanh","sec","secd","sech","asec","asecd","asech","csc","cscd", |
|
130 "csch","acsc","acscd","acsch","cot","cotd","coth","acot","acotd", |
|
131 "acoth","hypot","exp","expm1","log","log1p","log10","log2","pow2", |
|
132 "realpow","reallog","realsqrt","sqrt","nthroot","nextpow2","abs", |
|
133 "angle","complex","conj","imag","real","unwrap","isreal","cplxpair", |
|
134 "fix","floor","ceil","round","mod","rem","sign"] |
|
135 specfun = ["airy","besselj","bessely","besselh","besseli","besselk","beta", |
|
136 "betainc","betaln","ellipj","ellipke","erf","erfc","erfcx", |
|
137 "erfinv","expint","gamma","gammainc","gammaln","psi","legendre", |
|
138 "cross","dot","factor","isprime","primes","gcd","lcm","rat", |
|
139 "rats","perms","nchoosek","factorial","cart2sph","cart2pol", |
|
140 "pol2cart","sph2cart","hsv2rgb","rgb2hsv"] |
|
141 elmat = ["zeros","ones","eye","repmat","rand","randn","linspace","logspace", |
|
142 "freqspace","meshgrid","accumarray","size","length","ndims","numel", |
|
143 "disp","isempty","isequal","isequalwithequalnans","cat","reshape", |
|
144 "diag","blkdiag","tril","triu","fliplr","flipud","flipdim","rot90", |
|
145 "find","end","sub2ind","ind2sub","bsxfun","ndgrid","permute", |
|
146 "ipermute","shiftdim","circshift","squeeze","isscalar","isvector", |
|
147 "ans","eps","realmax","realmin","pi","i","inf","nan","isnan", |
|
148 "isinf","isfinite","j","why","compan","gallery","hadamard","hankel", |
|
149 "hilb","invhilb","magic","pascal","rosser","toeplitz","vander", |
|
150 "wilkinson"] |
|
151 |
|
152 tokens = { |
|
153 'root': [ |
|
154 # line starting with '!' is sent as a system command. not sure what |
|
155 # label to use... |
|
156 (r'^!.*', String.Other), |
|
157 (r'%.*$', Comment), |
|
158 (r'^\s*function', Keyword, 'deffunc'), |
|
159 |
|
160 # from 'iskeyword' on version 7.4.0.336 (R2007a): |
|
161 (r'(break|case|catch|classdef|continue|else|elseif|end|for|function|' |
|
162 r'global|if|otherwise|parfor|persistent|return|switch|try|while)\b', |
|
163 Keyword), |
|
164 |
|
165 ("(" + "|".join(elfun+specfun+elmat) + r')\b', Name.Builtin), |
|
166 |
|
167 # operators: |
|
168 (r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator), |
|
169 # operators requiring escape for re: |
|
170 (r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator), |
|
171 |
|
172 # punctuation: |
|
173 (r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation), |
|
174 (r'=|:|;', Punctuation), |
|
175 |
|
176 # quote can be transpose, instead of string: |
|
177 # (not great, but handles common cases...) |
|
178 (r'(?<=[\w\)\]])\'', Operator), |
|
179 |
|
180 (r'(?<![\w\)\]])\'', String, 'string'), |
|
181 ('[a-zA-Z_][a-zA-Z0-9_]*', Name), |
|
182 (r'.', Text), |
|
183 ], |
|
184 'string': [ |
|
185 (r'[^\']*\'', String, '#pop') |
|
186 ], |
|
187 'deffunc': [ |
|
188 (r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)', |
|
189 bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation, |
|
190 Text.Whitespace, Name.Function, Punctuation, Text, |
|
191 Punctuation, Text.Whitespace), '#pop'), |
|
192 ], |
|
193 } |
|
194 |
|
195 def analyse_text(text): |
|
196 if re.match('^\s*%', text, re.M): # comment |
|
197 return 0.9 |
|
198 elif re.match('^!\w+', text, re.M): # system cmd |
|
199 return 0.9 |
|
200 return 0.1 |
|
201 |
|
202 line_re = re.compile('.*?\n') |
|
203 |
|
204 class MatlabSessionLexer(Lexer): |
|
205 """ |
|
206 For Matlab (or GNU Octave) sessions. Modeled after PythonConsoleLexer. |
|
207 Contributed by Ken Schutte <kschutte@csail.mit.edu>. |
|
208 |
|
209 *New in Pygments 0.10.* |
|
210 """ |
|
211 name = 'Matlab session' |
|
212 aliases = ['matlabsession'] |
|
213 |
|
214 def get_tokens_unprocessed(self, text): |
|
215 mlexer = MatlabLexer(**self.options) |
|
216 |
|
217 curcode = '' |
|
218 insertions = [] |
|
219 |
|
220 for match in line_re.finditer(text): |
|
221 line = match.group() |
|
222 |
|
223 if line.startswith('>>'): |
|
224 insertions.append((len(curcode), |
|
225 [(0, Generic.Prompt, line[:3])])) |
|
226 curcode += line[3:] |
|
227 |
|
228 elif line.startswith('???'): |
|
229 |
|
230 idx = len(curcode) |
|
231 |
|
232 # without is showing error on same line as before...? |
|
233 line = "\n" + line |
|
234 token = (0, Generic.Traceback, line) |
|
235 insertions.append( (idx, [token,]) ) |
|
236 |
|
237 else: |
|
238 if curcode: |
|
239 for item in do_insertions( |
|
240 insertions, mlexer.get_tokens_unprocessed(curcode)): |
|
241 yield item |
|
242 curcode = '' |
|
243 insertions = [] |
|
244 |
|
245 yield match.start(), Generic.Output, line |
|
246 |
|
247 if curcode: # or item: |
|
248 for item in do_insertions( |
|
249 insertions, mlexer.get_tokens_unprocessed(curcode)): |
|
250 yield item |
|
251 |
|
252 |
|
253 class NumPyLexer(PythonLexer): |
|
254 ''' |
|
255 A Python lexer recognizing Numerical Python builtins. |
|
256 |
|
257 *New in Pygments 0.10.* |
|
258 ''' |
|
259 |
|
260 name = 'NumPy' |
|
261 aliases = ['numpy'] |
|
262 |
|
263 # override the mimetypes to not inherit them from python |
|
264 mimetypes = [] |
|
265 filenames = [] |
|
266 |
|
267 EXTRA_KEYWORDS = set([ |
|
268 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose', |
|
269 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append', |
|
270 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh', |
|
271 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin', |
|
272 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal', |
|
273 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange', |
|
274 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray', |
|
275 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype', |
|
276 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett', |
|
277 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial', |
|
278 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman', |
|
279 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_', |
|
280 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type', |
|
281 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate', |
|
282 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov', |
|
283 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate', |
|
284 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide', |
|
285 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty', |
|
286 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye', |
|
287 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill', |
|
288 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud', |
|
289 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer', |
|
290 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring', |
|
291 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include', |
|
292 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize', |
|
293 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater', |
|
294 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram', |
|
295 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0', |
|
296 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info', |
|
297 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d', |
|
298 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj', |
|
299 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf', |
|
300 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_', |
|
301 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_', |
|
302 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort', |
|
303 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2', |
|
304 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace', |
|
305 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype', |
|
306 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min', |
|
307 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan', |
|
308 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum', |
|
309 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer', |
|
310 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones', |
|
311 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload', |
|
312 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv', |
|
313 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod', |
|
314 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers', |
|
315 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close', |
|
316 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require', |
|
317 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll', |
|
318 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_', |
|
319 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select', |
|
320 'set_numeric_ops', 'set_printoptions', 'set_string_function', |
|
321 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj', |
|
322 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape', |
|
323 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh', |
|
324 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source', |
|
325 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std', |
|
326 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot', |
|
327 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose', |
|
328 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict', |
|
329 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index', |
|
330 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises', |
|
331 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like' |
|
332 ]) |
|
333 |
|
334 def get_tokens_unprocessed(self, text): |
|
335 for index, token, value in \ |
|
336 PythonLexer.get_tokens_unprocessed(self, text): |
|
337 if token is Name and value in self.EXTRA_KEYWORDS: |
|
338 yield index, Keyword.Pseudo, value |
|
339 else: |
|
340 yield index, token, value |
|
341 |
|
342 |
|
343 class SLexer(RegexLexer): |
|
344 """ |
|
345 For S, S-plus, and R source code. |
|
346 |
|
347 *New in Pygments 0.10.* |
|
348 """ |
|
349 |
|
350 name = 'S' |
|
351 aliases = ['splus', 's', 'r'] |
|
352 filenames = ['*.S', '*.R'] |
|
353 mimetypes = ['text/S-plus', 'text/S', 'text/R'] |
|
354 |
|
355 tokens = { |
|
356 'comments': [ |
|
357 (r'#.*$', Comment.Single), |
|
358 ], |
|
359 'valid_name': [ |
|
360 (r'[a-zA-Z][0-9a-zA-Z\._]+', Text), |
|
361 (r'`.+`', String.Backtick), |
|
362 ], |
|
363 'punctuation': [ |
|
364 (r'\[|\]|\[\[|\]\]|\$|\(|\)|@|:::?|;|,', Punctuation), |
|
365 ], |
|
366 'keywords': [ |
|
367 (r'for(?=\s*\()|while(?=\s*\()|if(?=\s*\()|(?<=\s)else|' |
|
368 r'(?<=\s)break(?=;|$)|return(?=\s*\()|function(?=\s*\()', |
|
369 Keyword.Reserved) |
|
370 ], |
|
371 'operators': [ |
|
372 (r'<-|-|==|<=|>=|<|>|&&|&|!=|\|\|?', Operator), |
|
373 (r'\*|\+|\^|/|%%|%/%|=', Operator), |
|
374 (r'%in%|%*%', Operator) |
|
375 ], |
|
376 'builtin_symbols': [ |
|
377 (r'(NULL|NA|TRUE|FALSE|NaN)\b', Keyword.Constant), |
|
378 (r'(T|F)\b', Keyword.Variable), |
|
379 ], |
|
380 'numbers': [ |
|
381 (r'(?<![0-9a-zA-Z\)\}\]`\"])(?=\s*)[-\+]?[0-9]+' |
|
382 r'(\.[0-9]*)?(E[0-9][-\+]?(\.[0-9]*)?)?', Number), |
|
383 (r'\.[0-9]*(E[0-9][-\+]?(\.[0-9]*)?)?', Number), |
|
384 ], |
|
385 'statements': [ |
|
386 include('comments'), |
|
387 # whitespaces |
|
388 (r'\s+', Text), |
|
389 (r'\'', String, 'string_squote'), |
|
390 (r'\"', String, 'string_dquote'), |
|
391 include('builtin_symbols'), |
|
392 include('numbers'), |
|
393 include('keywords'), |
|
394 include('punctuation'), |
|
395 include('operators'), |
|
396 include('valid_name'), |
|
397 ], |
|
398 'root': [ |
|
399 include('statements'), |
|
400 # blocks: |
|
401 (r'\{|\}', Punctuation), |
|
402 #(r'\{', Punctuation, 'block'), |
|
403 (r'.', Text), |
|
404 ], |
|
405 #'block': [ |
|
406 # include('statements'), |
|
407 # ('\{', Punctuation, '#push'), |
|
408 # ('\}', Punctuation, '#pop') |
|
409 #], |
|
410 'string_squote': [ |
|
411 (r'[^\']*\'', String, '#pop'), |
|
412 ], |
|
413 'string_dquote': [ |
|
414 (r'[^\"]*\"', String, '#pop'), |
|
415 ], |
|
416 } |
|
417 |
|
418 def analyse_text(text): |
|
419 return '<-' in text |