|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.modeling |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for modeling languages. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, include, bygroups, using, default |
|
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
16 Number, Punctuation |
|
17 |
|
18 from pygments.lexers.html import HtmlLexer |
|
19 from pygments.lexers import _stan_builtins |
|
20 |
|
21 __all__ = ['ModelicaLexer', 'BugsLexer', 'JagsLexer', 'StanLexer'] |
|
22 |
|
23 |
|
24 class ModelicaLexer(RegexLexer): |
|
25 """ |
|
26 For `Modelica <http://www.modelica.org/>`_ source code. |
|
27 |
|
28 .. versionadded:: 1.1 |
|
29 """ |
|
30 name = 'Modelica' |
|
31 aliases = ['modelica'] |
|
32 filenames = ['*.mo'] |
|
33 mimetypes = ['text/x-modelica'] |
|
34 |
|
35 flags = re.DOTALL | re.MULTILINE |
|
36 |
|
37 _name = r"(?:'(?:[^\\']|\\.)+'|[a-zA-Z_]\w*)" |
|
38 |
|
39 tokens = { |
|
40 'whitespace': [ |
|
41 (u'[\\s\ufeff]+', Text), |
|
42 (r'//[^\n]*\n?', Comment.Single), |
|
43 (r'/\*.*?\*/', Comment.Multiline) |
|
44 ], |
|
45 'root': [ |
|
46 include('whitespace'), |
|
47 (r'"', String.Double, 'string'), |
|
48 (r'[()\[\]{},;]+', Punctuation), |
|
49 (r'\.?[*^/+-]|\.|<>|[<>:=]=?', Operator), |
|
50 (r'\d+(\.?\d*[eE][-+]?\d+|\.\d*)', Number.Float), |
|
51 (r'\d+', Number.Integer), |
|
52 (r'(abs|acos|actualStream|array|asin|assert|AssertionLevel|atan|' |
|
53 r'atan2|backSample|Boolean|cardinality|cat|ceil|change|Clock|' |
|
54 r'Connections|cos|cosh|cross|delay|diagonal|div|edge|exp|' |
|
55 r'ExternalObject|fill|floor|getInstanceName|hold|homotopy|' |
|
56 r'identity|inStream|integer|Integer|interval|inverse|isPresent|' |
|
57 r'linspace|log|log10|matrix|max|min|mod|ndims|noClock|noEvent|' |
|
58 r'ones|outerProduct|pre|previous|product|Real|reinit|rem|rooted|' |
|
59 r'sample|scalar|semiLinear|shiftSample|sign|sin|sinh|size|skew|' |
|
60 r'smooth|spatialDistribution|sqrt|StateSelect|String|subSample|' |
|
61 r'sum|superSample|symmetric|tan|tanh|terminal|terminate|time|' |
|
62 r'transpose|vector|zeros)\b', Name.Builtin), |
|
63 (r'(algorithm|annotation|break|connect|constant|constrainedby|der|' |
|
64 r'discrete|each|else|elseif|elsewhen|encapsulated|enumeration|' |
|
65 r'equation|exit|expandable|extends|external|final|flow|for|if|' |
|
66 r'import|impure|in|initial|inner|input|loop|nondiscrete|outer|' |
|
67 r'output|parameter|partial|protected|public|pure|redeclare|' |
|
68 r'replaceable|return|stream|then|when|while)\b', |
|
69 Keyword.Reserved), |
|
70 (r'(and|not|or)\b', Operator.Word), |
|
71 (r'(block|class|connector|end|function|model|operator|package|' |
|
72 r'record|type)\b', Keyword.Reserved, 'class'), |
|
73 (r'(false|true)\b', Keyword.Constant), |
|
74 (r'within\b', Keyword.Reserved, 'package-prefix'), |
|
75 (_name, Name) |
|
76 ], |
|
77 'class': [ |
|
78 include('whitespace'), |
|
79 (r'(function|record)\b', Keyword.Reserved), |
|
80 (r'(if|for|when|while)\b', Keyword.Reserved, '#pop'), |
|
81 (_name, Name.Class, '#pop'), |
|
82 default('#pop') |
|
83 ], |
|
84 'package-prefix': [ |
|
85 include('whitespace'), |
|
86 (_name, Name.Namespace, '#pop'), |
|
87 default('#pop') |
|
88 ], |
|
89 'string': [ |
|
90 (r'"', String.Double, '#pop'), |
|
91 (r'\\[\'"?\\abfnrtv]', String.Escape), |
|
92 (r'(?i)<\s*html\s*>([^\\"]|\\.)+?(<\s*/\s*html\s*>|(?="))', |
|
93 using(HtmlLexer)), |
|
94 (r'<|\\?[^"\\<]+', String.Double) |
|
95 ] |
|
96 } |
|
97 |
|
98 |
|
99 class BugsLexer(RegexLexer): |
|
100 """ |
|
101 Pygments Lexer for `OpenBugs <http://www.openbugs.net/>`_ and WinBugs |
|
102 models. |
|
103 |
|
104 .. versionadded:: 1.6 |
|
105 """ |
|
106 |
|
107 name = 'BUGS' |
|
108 aliases = ['bugs', 'winbugs', 'openbugs'] |
|
109 filenames = ['*.bug'] |
|
110 |
|
111 _FUNCTIONS = ( |
|
112 # Scalar functions |
|
113 'abs', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', |
|
114 'cloglog', 'cos', 'cosh', 'cumulative', 'cut', 'density', 'deviance', |
|
115 'equals', 'expr', 'gammap', 'ilogit', 'icloglog', 'integral', 'log', |
|
116 'logfact', 'loggam', 'logit', 'max', 'min', 'phi', 'post.p.value', |
|
117 'pow', 'prior.p.value', 'probit', 'replicate.post', 'replicate.prior', |
|
118 'round', 'sin', 'sinh', 'solution', 'sqrt', 'step', 'tan', 'tanh', |
|
119 'trunc', |
|
120 # Vector functions |
|
121 'inprod', 'interp.lin', 'inverse', 'logdet', 'mean', 'eigen.vals', |
|
122 'ode', 'prod', 'p.valueM', 'rank', 'ranked', 'replicate.postM', |
|
123 'sd', 'sort', 'sum', |
|
124 # Special |
|
125 'D', 'I', 'F', 'T', 'C') |
|
126 """ OpenBUGS built-in functions |
|
127 |
|
128 From http://www.openbugs.info/Manuals/ModelSpecification.html#ContentsAII |
|
129 |
|
130 This also includes |
|
131 |
|
132 - T, C, I : Truncation and censoring. |
|
133 ``T`` and ``C`` are in OpenBUGS. ``I`` in WinBUGS. |
|
134 - D : ODE |
|
135 - F : Functional http://www.openbugs.info/Examples/Functionals.html |
|
136 |
|
137 """ |
|
138 |
|
139 _DISTRIBUTIONS = ('dbern', 'dbin', 'dcat', 'dnegbin', 'dpois', |
|
140 'dhyper', 'dbeta', 'dchisqr', 'ddexp', 'dexp', |
|
141 'dflat', 'dgamma', 'dgev', 'df', 'dggamma', 'dgpar', |
|
142 'dloglik', 'dlnorm', 'dlogis', 'dnorm', 'dpar', |
|
143 'dt', 'dunif', 'dweib', 'dmulti', 'ddirch', 'dmnorm', |
|
144 'dmt', 'dwish') |
|
145 """ OpenBUGS built-in distributions |
|
146 |
|
147 Functions from |
|
148 http://www.openbugs.info/Manuals/ModelSpecification.html#ContentsAI |
|
149 """ |
|
150 |
|
151 tokens = { |
|
152 'whitespace': [ |
|
153 (r"\s+", Text), |
|
154 ], |
|
155 'comments': [ |
|
156 # Comments |
|
157 (r'#.*$', Comment.Single), |
|
158 ], |
|
159 'root': [ |
|
160 # Comments |
|
161 include('comments'), |
|
162 include('whitespace'), |
|
163 # Block start |
|
164 (r'(model)(\s+)(\{)', |
|
165 bygroups(Keyword.Namespace, Text, Punctuation)), |
|
166 # Reserved Words |
|
167 (r'(for|in)(?![\w.])', Keyword.Reserved), |
|
168 # Built-in Functions |
|
169 (r'(%s)(?=\s*\()' |
|
170 % r'|'.join(_FUNCTIONS + _DISTRIBUTIONS), |
|
171 Name.Builtin), |
|
172 # Regular variable names |
|
173 (r'[A-Za-z][\w.]*', Name), |
|
174 # Number Literals |
|
175 (r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', Number), |
|
176 # Punctuation |
|
177 (r'\[|\]|\(|\)|:|,|;', Punctuation), |
|
178 # Assignment operators |
|
179 # SLexer makes these tokens Operators. |
|
180 (r'<-|~', Operator), |
|
181 # Infix and prefix operators |
|
182 (r'\+|-|\*|/', Operator), |
|
183 # Block |
|
184 (r'[{}]', Punctuation), |
|
185 ] |
|
186 } |
|
187 |
|
188 def analyse_text(text): |
|
189 if re.search(r"^\s*model\s*{", text, re.M): |
|
190 return 0.7 |
|
191 else: |
|
192 return 0.0 |
|
193 |
|
194 |
|
195 class JagsLexer(RegexLexer): |
|
196 """ |
|
197 Pygments Lexer for JAGS. |
|
198 |
|
199 .. versionadded:: 1.6 |
|
200 """ |
|
201 |
|
202 name = 'JAGS' |
|
203 aliases = ['jags'] |
|
204 filenames = ['*.jag', '*.bug'] |
|
205 |
|
206 # JAGS |
|
207 _FUNCTIONS = ( |
|
208 'abs', 'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctanh', |
|
209 'cos', 'cosh', 'cloglog', |
|
210 'equals', 'exp', 'icloglog', 'ifelse', 'ilogit', 'log', 'logfact', |
|
211 'loggam', 'logit', 'phi', 'pow', 'probit', 'round', 'sin', 'sinh', |
|
212 'sqrt', 'step', 'tan', 'tanh', 'trunc', 'inprod', 'interp.lin', |
|
213 'logdet', 'max', 'mean', 'min', 'prod', 'sum', 'sd', 'inverse', |
|
214 'rank', 'sort', 't', 'acos', 'acosh', 'asin', 'asinh', 'atan', |
|
215 # Truncation/Censoring (should I include) |
|
216 'T', 'I') |
|
217 # Distributions with density, probability and quartile functions |
|
218 _DISTRIBUTIONS = tuple('[dpq]%s' % x for x in |
|
219 ('bern', 'beta', 'dchiqsqr', 'ddexp', 'dexp', |
|
220 'df', 'gamma', 'gen.gamma', 'logis', 'lnorm', |
|
221 'negbin', 'nchisqr', 'norm', 'par', 'pois', 'weib')) |
|
222 # Other distributions without density and probability |
|
223 _OTHER_DISTRIBUTIONS = ( |
|
224 'dt', 'dunif', 'dbetabin', 'dbern', 'dbin', 'dcat', 'dhyper', |
|
225 'ddirch', 'dmnorm', 'dwish', 'dmt', 'dmulti', 'dbinom', 'dchisq', |
|
226 'dnbinom', 'dweibull', 'ddirich') |
|
227 |
|
228 tokens = { |
|
229 'whitespace': [ |
|
230 (r"\s+", Text), |
|
231 ], |
|
232 'names': [ |
|
233 # Regular variable names |
|
234 (r'[a-zA-Z][\w.]*\b', Name), |
|
235 ], |
|
236 'comments': [ |
|
237 # do not use stateful comments |
|
238 (r'(?s)/\*.*?\*/', Comment.Multiline), |
|
239 # Comments |
|
240 (r'#.*$', Comment.Single), |
|
241 ], |
|
242 'root': [ |
|
243 # Comments |
|
244 include('comments'), |
|
245 include('whitespace'), |
|
246 # Block start |
|
247 (r'(model|data)(\s+)(\{)', |
|
248 bygroups(Keyword.Namespace, Text, Punctuation)), |
|
249 (r'var(?![\w.])', Keyword.Declaration), |
|
250 # Reserved Words |
|
251 (r'(for|in)(?![\w.])', Keyword.Reserved), |
|
252 # Builtins |
|
253 # Need to use lookahead because . is a valid char |
|
254 (r'(%s)(?=\s*\()' % r'|'.join(_FUNCTIONS |
|
255 + _DISTRIBUTIONS |
|
256 + _OTHER_DISTRIBUTIONS), |
|
257 Name.Builtin), |
|
258 # Names |
|
259 include('names'), |
|
260 # Number Literals |
|
261 (r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', Number), |
|
262 (r'\[|\]|\(|\)|:|,|;', Punctuation), |
|
263 # Assignment operators |
|
264 (r'<-|~', Operator), |
|
265 # # JAGS includes many more than OpenBUGS |
|
266 (r'\+|-|\*|\/|\|\|[&]{2}|[<>=]=?|\^|%.*?%', Operator), |
|
267 (r'[{}]', Punctuation), |
|
268 ] |
|
269 } |
|
270 |
|
271 def analyse_text(text): |
|
272 if re.search(r'^\s*model\s*\{', text, re.M): |
|
273 if re.search(r'^\s*data\s*\{', text, re.M): |
|
274 return 0.9 |
|
275 elif re.search(r'^\s*var', text, re.M): |
|
276 return 0.9 |
|
277 else: |
|
278 return 0.3 |
|
279 else: |
|
280 return 0 |
|
281 |
|
282 |
|
283 class StanLexer(RegexLexer): |
|
284 """Pygments Lexer for Stan models. |
|
285 |
|
286 The Stan modeling language is specified in the *Stan Modeling Language |
|
287 User's Guide and Reference Manual, v2.4.0*, |
|
288 `pdf <https://github.com/stan-dev/stan/releases/download/v2.4.0/stan-reference-2.4.0.pdf>`__. |
|
289 |
|
290 .. versionadded:: 1.6 |
|
291 """ |
|
292 |
|
293 name = 'Stan' |
|
294 aliases = ['stan'] |
|
295 filenames = ['*.stan'] |
|
296 |
|
297 tokens = { |
|
298 'whitespace': [ |
|
299 (r"\s+", Text), |
|
300 ], |
|
301 'comments': [ |
|
302 (r'(?s)/\*.*?\*/', Comment.Multiline), |
|
303 # Comments |
|
304 (r'(//|#).*$', Comment.Single), |
|
305 ], |
|
306 'root': [ |
|
307 # Stan is more restrictive on strings than this regex |
|
308 (r'"[^"]*"', String), |
|
309 # Comments |
|
310 include('comments'), |
|
311 # block start |
|
312 include('whitespace'), |
|
313 # Block start |
|
314 (r'(%s)(\s*)(\{)' % |
|
315 r'|'.join(('functions', 'data', r'transformed\s+?data', |
|
316 'parameters', r'transformed\s+parameters', |
|
317 'model', r'generated\s+quantities')), |
|
318 bygroups(Keyword.Namespace, Text, Punctuation)), |
|
319 # Reserved Words |
|
320 (r'(%s)\b' % r'|'.join(_stan_builtins.KEYWORDS), Keyword), |
|
321 # Truncation |
|
322 (r'T(?=\s*\[)', Keyword), |
|
323 # Data types |
|
324 (r'(%s)\b' % r'|'.join(_stan_builtins.TYPES), Keyword.Type), |
|
325 # Punctuation |
|
326 (r"[;:,\[\]()]", Punctuation), |
|
327 # Builtin |
|
328 (r'(%s)(?=\s*\()' |
|
329 % r'|'.join(_stan_builtins.FUNCTIONS |
|
330 + _stan_builtins.DISTRIBUTIONS), |
|
331 Name.Builtin), |
|
332 # Special names ending in __, like lp__ |
|
333 (r'[A-Za-z]\w*__\b', Name.Builtin.Pseudo), |
|
334 (r'(%s)\b' % r'|'.join(_stan_builtins.RESERVED), Keyword.Reserved), |
|
335 # Regular variable names |
|
336 (r'[A-Za-z]\w*\b', Name), |
|
337 # Real Literals |
|
338 (r'-?[0-9]+(\.[0-9]+)?[eE]-?[0-9]+', Number.Float), |
|
339 (r'-?[0-9]*\.[0-9]*', Number.Float), |
|
340 # Integer Literals |
|
341 (r'-?[0-9]+', Number.Integer), |
|
342 # Assignment operators |
|
343 # SLexer makes these tokens Operators. |
|
344 (r'<-|~', Operator), |
|
345 # Infix, prefix and postfix operators (and = ) |
|
346 (r"\+|-|\.?\*|\.?/|\\|'|\^|==?|!=?|<=?|>=?|\|\||&&", Operator), |
|
347 # Block delimiters |
|
348 (r'[{}]', Punctuation), |
|
349 ] |
|
350 } |
|
351 |
|
352 def analyse_text(text): |
|
353 if re.search(r'^\s*parameters\s*\{', text, re.M): |
|
354 return 1.0 |
|
355 else: |
|
356 return 0.0 |