|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.ml |
|
4 ~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for ML family languages. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, include, bygroups, default, words |
|
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
16 Number, Punctuation, Error |
|
17 |
|
18 __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer'] |
|
19 |
|
20 |
|
21 class SMLLexer(RegexLexer): |
|
22 """ |
|
23 For the Standard ML language. |
|
24 |
|
25 .. versionadded:: 1.5 |
|
26 """ |
|
27 |
|
28 name = 'Standard ML' |
|
29 aliases = ['sml'] |
|
30 filenames = ['*.sml', '*.sig', '*.fun'] |
|
31 mimetypes = ['text/x-standardml', 'application/x-standardml'] |
|
32 |
|
33 alphanumid_reserved = set(( |
|
34 # Core |
|
35 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else', |
|
36 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', |
|
37 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', |
|
38 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', |
|
39 # Modules |
|
40 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', |
|
41 'struct', 'structure', 'where', |
|
42 )) |
|
43 |
|
44 symbolicid_reserved = set(( |
|
45 # Core |
|
46 ':', '\|', '=', '=>', '->', '#', |
|
47 # Modules |
|
48 ':>', |
|
49 )) |
|
50 |
|
51 nonid_reserved = set(('(', ')', '[', ']', '{', '}', ',', ';', '...', '_')) |
|
52 |
|
53 alphanumid_re = r"[a-zA-Z][\w']*" |
|
54 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" |
|
55 |
|
56 # A character constant is a sequence of the form #s, where s is a string |
|
57 # constant denoting a string of size one character. This setup just parses |
|
58 # the entire string as either a String.Double or a String.Char (depending |
|
59 # on the argument), even if the String.Char is an erronous |
|
60 # multiple-character string. |
|
61 def stringy(whatkind): |
|
62 return [ |
|
63 (r'[^"\\]', whatkind), |
|
64 (r'\\[\\"abtnvfr]', String.Escape), |
|
65 # Control-character notation is used for codes < 32, |
|
66 # where \^@ == \000 |
|
67 (r'\\\^[\x40-\x5e]', String.Escape), |
|
68 # Docs say 'decimal digits' |
|
69 (r'\\[0-9]{3}', String.Escape), |
|
70 (r'\\u[0-9a-fA-F]{4}', String.Escape), |
|
71 (r'\\\s+\\', String.Interpol), |
|
72 (r'"', whatkind, '#pop'), |
|
73 ] |
|
74 |
|
75 # Callbacks for distinguishing tokens and reserved words |
|
76 def long_id_callback(self, match): |
|
77 if match.group(1) in self.alphanumid_reserved: |
|
78 token = Error |
|
79 else: |
|
80 token = Name.Namespace |
|
81 yield match.start(1), token, match.group(1) |
|
82 yield match.start(2), Punctuation, match.group(2) |
|
83 |
|
84 def end_id_callback(self, match): |
|
85 if match.group(1) in self.alphanumid_reserved: |
|
86 token = Error |
|
87 elif match.group(1) in self.symbolicid_reserved: |
|
88 token = Error |
|
89 else: |
|
90 token = Name |
|
91 yield match.start(1), token, match.group(1) |
|
92 |
|
93 def id_callback(self, match): |
|
94 str = match.group(1) |
|
95 if str in self.alphanumid_reserved: |
|
96 token = Keyword.Reserved |
|
97 elif str in self.symbolicid_reserved: |
|
98 token = Punctuation |
|
99 else: |
|
100 token = Name |
|
101 yield match.start(1), token, str |
|
102 |
|
103 tokens = { |
|
104 # Whitespace and comments are (almost) everywhere |
|
105 'whitespace': [ |
|
106 (r'\s+', Text), |
|
107 (r'\(\*', Comment.Multiline, 'comment'), |
|
108 ], |
|
109 |
|
110 'delimiters': [ |
|
111 # This lexer treats these delimiters specially: |
|
112 # Delimiters define scopes, and the scope is how the meaning of |
|
113 # the `|' is resolved - is it a case/handle expression, or function |
|
114 # definition by cases? (This is not how the Definition works, but |
|
115 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations) |
|
116 (r'\(|\[|\{', Punctuation, 'main'), |
|
117 (r'\)|\]|\}', Punctuation, '#pop'), |
|
118 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')), |
|
119 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'), |
|
120 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'), |
|
121 ], |
|
122 |
|
123 'core': [ |
|
124 # Punctuation that doesn't overlap symbolic identifiers |
|
125 (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved), |
|
126 Punctuation), |
|
127 |
|
128 # Special constants: strings, floats, numbers in decimal and hex |
|
129 (r'#"', String.Char, 'char'), |
|
130 (r'"', String.Double, 'string'), |
|
131 (r'~?0x[0-9a-fA-F]+', Number.Hex), |
|
132 (r'0wx[0-9a-fA-F]+', Number.Hex), |
|
133 (r'0w\d+', Number.Integer), |
|
134 (r'~?\d+\.\d+[eE]~?\d+', Number.Float), |
|
135 (r'~?\d+\.\d+', Number.Float), |
|
136 (r'~?\d+[eE]~?\d+', Number.Float), |
|
137 (r'~?\d+', Number.Integer), |
|
138 |
|
139 # Labels |
|
140 (r'#\s*[1-9][0-9]*', Name.Label), |
|
141 (r'#\s*(%s)' % alphanumid_re, Name.Label), |
|
142 (r'#\s+(%s)' % symbolicid_re, Name.Label), |
|
143 # Some reserved words trigger a special, local lexer state change |
|
144 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'), |
|
145 (r'(?=\b(exception)\b(?!\'))', Text, ('ename')), |
|
146 (r'\b(functor|include|open|signature|structure)\b(?!\')', |
|
147 Keyword.Reserved, 'sname'), |
|
148 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'), |
|
149 |
|
150 # Regular identifiers, long and otherwise |
|
151 (r'\'[\w\']*', Name.Decorator), |
|
152 (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"), |
|
153 (r'(%s)' % alphanumid_re, id_callback), |
|
154 (r'(%s)' % symbolicid_re, id_callback), |
|
155 ], |
|
156 'dotted': [ |
|
157 (r'(%s)(\.)' % alphanumid_re, long_id_callback), |
|
158 (r'(%s)' % alphanumid_re, end_id_callback, "#pop"), |
|
159 (r'(%s)' % symbolicid_re, end_id_callback, "#pop"), |
|
160 (r'\s+', Error), |
|
161 (r'\S+', Error), |
|
162 ], |
|
163 |
|
164 |
|
165 # Main parser (prevents errors in files that have scoping errors) |
|
166 'root': [ |
|
167 default('main') |
|
168 ], |
|
169 |
|
170 # In this scope, I expect '|' to not be followed by a function name, |
|
171 # and I expect 'and' to be followed by a binding site |
|
172 'main': [ |
|
173 include('whitespace'), |
|
174 |
|
175 # Special behavior of val/and/fun |
|
176 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'), |
|
177 (r'\b(fun)\b(?!\')', Keyword.Reserved, |
|
178 ('#pop', 'main-fun', 'fname')), |
|
179 |
|
180 include('delimiters'), |
|
181 include('core'), |
|
182 (r'\S+', Error), |
|
183 ], |
|
184 |
|
185 # In this scope, I expect '|' and 'and' to be followed by a function |
|
186 'main-fun': [ |
|
187 include('whitespace'), |
|
188 |
|
189 (r'\s', Text), |
|
190 (r'\(\*', Comment.Multiline, 'comment'), |
|
191 |
|
192 # Special behavior of val/and/fun |
|
193 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'), |
|
194 (r'\b(val)\b(?!\')', Keyword.Reserved, |
|
195 ('#pop', 'main', 'vname')), |
|
196 |
|
197 # Special behavior of '|' and '|'-manipulating keywords |
|
198 (r'\|', Punctuation, 'fname'), |
|
199 (r'\b(case|handle)\b(?!\')', Keyword.Reserved, |
|
200 ('#pop', 'main')), |
|
201 |
|
202 include('delimiters'), |
|
203 include('core'), |
|
204 (r'\S+', Error), |
|
205 ], |
|
206 |
|
207 # Character and string parsers |
|
208 'char': stringy(String.Char), |
|
209 'string': stringy(String.Double), |
|
210 |
|
211 'breakout': [ |
|
212 (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'), |
|
213 ], |
|
214 |
|
215 # Dealing with what comes after module system keywords |
|
216 'sname': [ |
|
217 include('whitespace'), |
|
218 include('breakout'), |
|
219 |
|
220 (r'(%s)' % alphanumid_re, Name.Namespace), |
|
221 default('#pop'), |
|
222 ], |
|
223 |
|
224 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword |
|
225 'fname': [ |
|
226 include('whitespace'), |
|
227 (r'\'[\w\']*', Name.Decorator), |
|
228 (r'\(', Punctuation, 'tyvarseq'), |
|
229 |
|
230 (r'(%s)' % alphanumid_re, Name.Function, '#pop'), |
|
231 (r'(%s)' % symbolicid_re, Name.Function, '#pop'), |
|
232 |
|
233 # Ignore interesting function declarations like "fun (x + y) = ..." |
|
234 default('#pop'), |
|
235 ], |
|
236 |
|
237 # Dealing with what comes after the 'val' (or 'and') keyword |
|
238 'vname': [ |
|
239 include('whitespace'), |
|
240 (r'\'[\w\']*', Name.Decorator), |
|
241 (r'\(', Punctuation, 'tyvarseq'), |
|
242 |
|
243 (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re), |
|
244 bygroups(Name.Variable, Text, Punctuation), '#pop'), |
|
245 (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re), |
|
246 bygroups(Name.Variable, Text, Punctuation), '#pop'), |
|
247 (r'(%s)' % alphanumid_re, Name.Variable, '#pop'), |
|
248 (r'(%s)' % symbolicid_re, Name.Variable, '#pop'), |
|
249 |
|
250 # Ignore interesting patterns like 'val (x, y)' |
|
251 default('#pop'), |
|
252 ], |
|
253 |
|
254 # Dealing with what comes after the 'type' (or 'and') keyword |
|
255 'tname': [ |
|
256 include('whitespace'), |
|
257 include('breakout'), |
|
258 |
|
259 (r'\'[\w\']*', Name.Decorator), |
|
260 (r'\(', Punctuation, 'tyvarseq'), |
|
261 (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')), |
|
262 |
|
263 (r'(%s)' % alphanumid_re, Keyword.Type), |
|
264 (r'(%s)' % symbolicid_re, Keyword.Type), |
|
265 (r'\S+', Error, '#pop'), |
|
266 ], |
|
267 |
|
268 # A type binding includes most identifiers |
|
269 'typbind': [ |
|
270 include('whitespace'), |
|
271 |
|
272 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), |
|
273 |
|
274 include('breakout'), |
|
275 include('core'), |
|
276 (r'\S+', Error, '#pop'), |
|
277 ], |
|
278 |
|
279 # Dealing with what comes after the 'datatype' (or 'and') keyword |
|
280 'dname': [ |
|
281 include('whitespace'), |
|
282 include('breakout'), |
|
283 |
|
284 (r'\'[\w\']*', Name.Decorator), |
|
285 (r'\(', Punctuation, 'tyvarseq'), |
|
286 (r'(=)(\s*)(datatype)', |
|
287 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'), |
|
288 (r'=(?!%s)' % symbolicid_re, Punctuation, |
|
289 ('#pop', 'datbind', 'datcon')), |
|
290 |
|
291 (r'(%s)' % alphanumid_re, Keyword.Type), |
|
292 (r'(%s)' % symbolicid_re, Keyword.Type), |
|
293 (r'\S+', Error, '#pop'), |
|
294 ], |
|
295 |
|
296 # common case - A | B | C of int |
|
297 'datbind': [ |
|
298 include('whitespace'), |
|
299 |
|
300 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')), |
|
301 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')), |
|
302 (r'\b(of)\b(?!\')', Keyword.Reserved), |
|
303 |
|
304 (r'(\|)(\s*)(%s)' % alphanumid_re, |
|
305 bygroups(Punctuation, Text, Name.Class)), |
|
306 (r'(\|)(\s+)(%s)' % symbolicid_re, |
|
307 bygroups(Punctuation, Text, Name.Class)), |
|
308 |
|
309 include('breakout'), |
|
310 include('core'), |
|
311 (r'\S+', Error), |
|
312 ], |
|
313 |
|
314 # Dealing with what comes after an exception |
|
315 'ename': [ |
|
316 include('whitespace'), |
|
317 |
|
318 (r'(exception|and)\b(\s+)(%s)' % alphanumid_re, |
|
319 bygroups(Keyword.Reserved, Text, Name.Class)), |
|
320 (r'(exception|and)\b(\s*)(%s)' % symbolicid_re, |
|
321 bygroups(Keyword.Reserved, Text, Name.Class)), |
|
322 (r'\b(of)\b(?!\')', Keyword.Reserved), |
|
323 |
|
324 include('breakout'), |
|
325 include('core'), |
|
326 (r'\S+', Error), |
|
327 ], |
|
328 |
|
329 'datcon': [ |
|
330 include('whitespace'), |
|
331 (r'(%s)' % alphanumid_re, Name.Class, '#pop'), |
|
332 (r'(%s)' % symbolicid_re, Name.Class, '#pop'), |
|
333 (r'\S+', Error, '#pop'), |
|
334 ], |
|
335 |
|
336 # Series of type variables |
|
337 'tyvarseq': [ |
|
338 (r'\s', Text), |
|
339 (r'\(\*', Comment.Multiline, 'comment'), |
|
340 |
|
341 (r'\'[\w\']*', Name.Decorator), |
|
342 (alphanumid_re, Name), |
|
343 (r',', Punctuation), |
|
344 (r'\)', Punctuation, '#pop'), |
|
345 (symbolicid_re, Name), |
|
346 ], |
|
347 |
|
348 'comment': [ |
|
349 (r'[^(*)]', Comment.Multiline), |
|
350 (r'\(\*', Comment.Multiline, '#push'), |
|
351 (r'\*\)', Comment.Multiline, '#pop'), |
|
352 (r'[(*)]', Comment.Multiline), |
|
353 ], |
|
354 } |
|
355 |
|
356 |
|
357 class OcamlLexer(RegexLexer): |
|
358 """ |
|
359 For the OCaml language. |
|
360 |
|
361 .. versionadded:: 0.7 |
|
362 """ |
|
363 |
|
364 name = 'OCaml' |
|
365 aliases = ['ocaml'] |
|
366 filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] |
|
367 mimetypes = ['text/x-ocaml'] |
|
368 |
|
369 keywords = ( |
|
370 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', |
|
371 'downto', 'else', 'end', 'exception', 'external', 'false', |
|
372 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', |
|
373 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', |
|
374 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', |
|
375 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', |
|
376 'type', 'value', 'val', 'virtual', 'when', 'while', 'with', |
|
377 ) |
|
378 keyopts = ( |
|
379 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-', |
|
380 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<', |
|
381 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>', |
|
382 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~' |
|
383 ) |
|
384 |
|
385 operators = r'[!$%&*+\./:<=>?@^|~-]' |
|
386 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or') |
|
387 prefix_syms = r'[!?~]' |
|
388 infix_syms = r'[=<>@^|&+\*/$%-]' |
|
389 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array') |
|
390 |
|
391 tokens = { |
|
392 'escape-sequence': [ |
|
393 (r'\\[\\"\'ntbr]', String.Escape), |
|
394 (r'\\[0-9]{3}', String.Escape), |
|
395 (r'\\x[0-9a-fA-F]{2}', String.Escape), |
|
396 ], |
|
397 'root': [ |
|
398 (r'\s+', Text), |
|
399 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), |
|
400 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'), |
|
401 (r'\b([A-Z][\w\']*)', Name.Class), |
|
402 (r'\(\*(?![)])', Comment, 'comment'), |
|
403 (r'\b(%s)\b' % '|'.join(keywords), Keyword), |
|
404 (r'(%s)' % '|'.join(keyopts[::-1]), Operator), |
|
405 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), |
|
406 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), |
|
407 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), |
|
408 |
|
409 (r"[^\W\d][\w']*", Name), |
|
410 |
|
411 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), |
|
412 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), |
|
413 (r'0[oO][0-7][0-7_]*', Number.Oct), |
|
414 (r'0[bB][01][01_]*', Number.Bin), |
|
415 (r'\d[\d_]*', Number.Integer), |
|
416 |
|
417 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", |
|
418 String.Char), |
|
419 (r"'.'", String.Char), |
|
420 (r"'", Keyword), # a stray quote is another syntax element |
|
421 |
|
422 (r'"', String.Double, 'string'), |
|
423 |
|
424 (r'[~?][a-z][\w\']*:', Name.Variable), |
|
425 ], |
|
426 'comment': [ |
|
427 (r'[^(*)]+', Comment), |
|
428 (r'\(\*', Comment, '#push'), |
|
429 (r'\*\)', Comment, '#pop'), |
|
430 (r'[(*)]', Comment), |
|
431 ], |
|
432 'string': [ |
|
433 (r'[^\\"]+', String.Double), |
|
434 include('escape-sequence'), |
|
435 (r'\\\n', String.Double), |
|
436 (r'"', String.Double, '#pop'), |
|
437 ], |
|
438 'dotted': [ |
|
439 (r'\s+', Text), |
|
440 (r'\.', Punctuation), |
|
441 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace), |
|
442 (r'[A-Z][\w\']*', Name.Class, '#pop'), |
|
443 (r'[a-z_][\w\']*', Name, '#pop'), |
|
444 default('#pop'), |
|
445 ], |
|
446 } |
|
447 |
|
448 |
|
449 class OpaLexer(RegexLexer): |
|
450 """ |
|
451 Lexer for the Opa language (http://opalang.org). |
|
452 |
|
453 .. versionadded:: 1.5 |
|
454 """ |
|
455 |
|
456 name = 'Opa' |
|
457 aliases = ['opa'] |
|
458 filenames = ['*.opa'] |
|
459 mimetypes = ['text/x-opa'] |
|
460 |
|
461 # most of these aren't strictly keywords |
|
462 # but if you color only real keywords, you might just |
|
463 # as well not color anything |
|
464 keywords = ( |
|
465 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do', |
|
466 'else', 'end', 'external', 'forall', 'function', 'if', 'import', |
|
467 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then', |
|
468 'type', 'val', 'with', 'xml_parser', |
|
469 ) |
|
470 |
|
471 # matches both stuff and `stuff` |
|
472 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))' |
|
473 |
|
474 op_re = r'[.=\-<>,@~%/+?*&^!]' |
|
475 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere |
|
476 # because they are also used for inserts |
|
477 |
|
478 tokens = { |
|
479 # copied from the caml lexer, should be adapted |
|
480 'escape-sequence': [ |
|
481 (r'\\[\\"\'ntr}]', String.Escape), |
|
482 (r'\\[0-9]{3}', String.Escape), |
|
483 (r'\\x[0-9a-fA-F]{2}', String.Escape), |
|
484 ], |
|
485 |
|
486 # factorizing these rules, because they are inserted many times |
|
487 'comments': [ |
|
488 (r'/\*', Comment, 'nested-comment'), |
|
489 (r'//.*?$', Comment), |
|
490 ], |
|
491 'comments-and-spaces': [ |
|
492 include('comments'), |
|
493 (r'\s+', Text), |
|
494 ], |
|
495 |
|
496 'root': [ |
|
497 include('comments-and-spaces'), |
|
498 # keywords |
|
499 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword), |
|
500 # directives |
|
501 # we could parse the actual set of directives instead of anything |
|
502 # starting with @, but this is troublesome |
|
503 # because it needs to be adjusted all the time |
|
504 # and assuming we parse only sources that compile, it is useless |
|
505 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo), |
|
506 |
|
507 # number literals |
|
508 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float), |
|
509 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float), |
|
510 (r'-?\d+[eE][+\-]?\d+', Number.Float), |
|
511 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
512 (r'0[oO][0-7]+', Number.Oct), |
|
513 (r'0[bB][01]+', Number.Bin), |
|
514 (r'\d+', Number.Integer), |
|
515 # color literals |
|
516 (r'#[\da-fA-F]{3,6}', Number.Integer), |
|
517 |
|
518 # string literals |
|
519 (r'"', String.Double, 'string'), |
|
520 # char literal, should be checked because this is the regexp from |
|
521 # the caml lexer |
|
522 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'", |
|
523 String.Char), |
|
524 |
|
525 # this is meant to deal with embedded exprs in strings |
|
526 # every time we find a '}' we pop a state so that if we were |
|
527 # inside a string, we are back in the string state |
|
528 # as a consequence, we must also push a state every time we find a |
|
529 # '{' or else we will have errors when parsing {} for instance |
|
530 (r'\{', Operator, '#push'), |
|
531 (r'\}', Operator, '#pop'), |
|
532 |
|
533 # html literals |
|
534 # this is a much more strict that the actual parser, |
|
535 # since a<b would not be parsed as html |
|
536 # but then again, the parser is way too lax, and we can't hope |
|
537 # to have something as tolerant |
|
538 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'), |
|
539 |
|
540 # db path |
|
541 # matching the '[_]' in '/a[_]' because it is a part |
|
542 # of the syntax of the db path definition |
|
543 # unfortunately, i don't know how to match the ']' in |
|
544 # /a[1], so this is somewhat inconsistent |
|
545 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable), |
|
546 # putting the same color on <- as on db path, since |
|
547 # it can be used only to mean Db.write |
|
548 (r'<-(?!'+op_re+r')', Name.Variable), |
|
549 |
|
550 # 'modules' |
|
551 # although modules are not distinguished by their names as in caml |
|
552 # the standard library seems to follow the convention that modules |
|
553 # only area capitalized |
|
554 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace), |
|
555 |
|
556 # operators |
|
557 # = has a special role because this is the only |
|
558 # way to syntactic distinguish binding constructions |
|
559 # unfortunately, this colors the equal in {x=2} too |
|
560 (r'=(?!'+op_re+r')', Keyword), |
|
561 (r'(%s)+' % op_re, Operator), |
|
562 (r'(%s)+' % punc_re, Operator), |
|
563 |
|
564 # coercions |
|
565 (r':', Operator, 'type'), |
|
566 # type variables |
|
567 # we need this rule because we don't parse specially type |
|
568 # definitions so in "type t('a) = ...", "'a" is parsed by 'root' |
|
569 ("'"+ident_re, Keyword.Type), |
|
570 |
|
571 # id literal, #something, or #{expr} |
|
572 (r'#'+ident_re, String.Single), |
|
573 (r'#(?=\{)', String.Single), |
|
574 |
|
575 # identifiers |
|
576 # this avoids to color '2' in 'a2' as an integer |
|
577 (ident_re, Text), |
|
578 |
|
579 # default, not sure if that is needed or not |
|
580 # (r'.', Text), |
|
581 ], |
|
582 |
|
583 # it is quite painful to have to parse types to know where they end |
|
584 # this is the general rule for a type |
|
585 # a type is either: |
|
586 # * -> ty |
|
587 # * type-with-slash |
|
588 # * type-with-slash -> ty |
|
589 # * type-with-slash (, type-with-slash)+ -> ty |
|
590 # |
|
591 # the code is pretty funky in here, but this code would roughly |
|
592 # translate in caml to: |
|
593 # let rec type stream = |
|
594 # match stream with |
|
595 # | [< "->"; stream >] -> type stream |
|
596 # | [< ""; stream >] -> |
|
597 # type_with_slash stream |
|
598 # type_lhs_1 stream; |
|
599 # and type_1 stream = ... |
|
600 'type': [ |
|
601 include('comments-and-spaces'), |
|
602 (r'->', Keyword.Type), |
|
603 default(('#pop', 'type-lhs-1', 'type-with-slash')), |
|
604 ], |
|
605 |
|
606 # parses all the atomic or closed constructions in the syntax of type |
|
607 # expressions: record types, tuple types, type constructors, basic type |
|
608 # and type variables |
|
609 'type-1': [ |
|
610 include('comments-and-spaces'), |
|
611 (r'\(', Keyword.Type, ('#pop', 'type-tuple')), |
|
612 (r'~?\{', Keyword.Type, ('#pop', 'type-record')), |
|
613 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')), |
|
614 (ident_re, Keyword.Type, '#pop'), |
|
615 ("'"+ident_re, Keyword.Type), |
|
616 # this case is not in the syntax but sometimes |
|
617 # we think we are parsing types when in fact we are parsing |
|
618 # some css, so we just pop the states until we get back into |
|
619 # the root state |
|
620 default('#pop'), |
|
621 ], |
|
622 |
|
623 # type-with-slash is either: |
|
624 # * type-1 |
|
625 # * type-1 (/ type-1)+ |
|
626 'type-with-slash': [ |
|
627 include('comments-and-spaces'), |
|
628 default(('#pop', 'slash-type-1', 'type-1')), |
|
629 ], |
|
630 'slash-type-1': [ |
|
631 include('comments-and-spaces'), |
|
632 ('/', Keyword.Type, ('#pop', 'type-1')), |
|
633 # same remark as above |
|
634 default('#pop'), |
|
635 ], |
|
636 |
|
637 # we go in this state after having parsed a type-with-slash |
|
638 # while trying to parse a type |
|
639 # and at this point we must determine if we are parsing an arrow |
|
640 # type (in which case we must continue parsing) or not (in which |
|
641 # case we stop) |
|
642 'type-lhs-1': [ |
|
643 include('comments-and-spaces'), |
|
644 (r'->', Keyword.Type, ('#pop', 'type')), |
|
645 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')), |
|
646 default('#pop'), |
|
647 ], |
|
648 'type-arrow': [ |
|
649 include('comments-and-spaces'), |
|
650 # the look ahead here allows to parse f(x : int, y : float -> truc) |
|
651 # correctly |
|
652 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'), |
|
653 (r'->', Keyword.Type, ('#pop', 'type')), |
|
654 # same remark as above |
|
655 default('#pop'), |
|
656 ], |
|
657 |
|
658 # no need to do precise parsing for tuples and records |
|
659 # because they are closed constructions, so we can simply |
|
660 # find the closing delimiter |
|
661 # note that this function would be not work if the source |
|
662 # contained identifiers like `{)` (although it could be patched |
|
663 # to support it) |
|
664 'type-tuple': [ |
|
665 include('comments-and-spaces'), |
|
666 (r'[^()/*]+', Keyword.Type), |
|
667 (r'[/*]', Keyword.Type), |
|
668 (r'\(', Keyword.Type, '#push'), |
|
669 (r'\)', Keyword.Type, '#pop'), |
|
670 ], |
|
671 'type-record': [ |
|
672 include('comments-and-spaces'), |
|
673 (r'[^{}/*]+', Keyword.Type), |
|
674 (r'[/*]', Keyword.Type), |
|
675 (r'\{', Keyword.Type, '#push'), |
|
676 (r'\}', Keyword.Type, '#pop'), |
|
677 ], |
|
678 |
|
679 # 'type-tuple': [ |
|
680 # include('comments-and-spaces'), |
|
681 # (r'\)', Keyword.Type, '#pop'), |
|
682 # default(('#pop', 'type-tuple-1', 'type-1')), |
|
683 # ], |
|
684 # 'type-tuple-1': [ |
|
685 # include('comments-and-spaces'), |
|
686 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,) |
|
687 # (r',', Keyword.Type, 'type-1'), |
|
688 # ], |
|
689 # 'type-record':[ |
|
690 # include('comments-and-spaces'), |
|
691 # (r'\}', Keyword.Type, '#pop'), |
|
692 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'), |
|
693 # ], |
|
694 # 'type-record-field-expr': [ |
|
695 # |
|
696 # ], |
|
697 |
|
698 'nested-comment': [ |
|
699 (r'[^/*]+', Comment), |
|
700 (r'/\*', Comment, '#push'), |
|
701 (r'\*/', Comment, '#pop'), |
|
702 (r'[/*]', Comment), |
|
703 ], |
|
704 |
|
705 # the copy pasting between string and single-string |
|
706 # is kinda sad. Is there a way to avoid that?? |
|
707 'string': [ |
|
708 (r'[^\\"{]+', String.Double), |
|
709 (r'"', String.Double, '#pop'), |
|
710 (r'\{', Operator, 'root'), |
|
711 include('escape-sequence'), |
|
712 ], |
|
713 'single-string': [ |
|
714 (r'[^\\\'{]+', String.Double), |
|
715 (r'\'', String.Double, '#pop'), |
|
716 (r'\{', Operator, 'root'), |
|
717 include('escape-sequence'), |
|
718 ], |
|
719 |
|
720 # all the html stuff |
|
721 # can't really reuse some existing html parser |
|
722 # because we must be able to parse embedded expressions |
|
723 |
|
724 # we are in this state after someone parsed the '<' that |
|
725 # started the html literal |
|
726 'html-open-tag': [ |
|
727 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')), |
|
728 (r'>', String.Single, ('#pop', 'html-content')), |
|
729 ], |
|
730 |
|
731 # we are in this state after someone parsed the '</' that |
|
732 # started the end of the closing tag |
|
733 'html-end-tag': [ |
|
734 # this is a star, because </> is allowed |
|
735 (r'[\w\-:]*>', String.Single, '#pop'), |
|
736 ], |
|
737 |
|
738 # we are in this state after having parsed '<ident(:ident)?' |
|
739 # we thus parse a possibly empty list of attributes |
|
740 'html-attr': [ |
|
741 (r'\s+', Text), |
|
742 (r'[\w\-:]+=', String.Single, 'html-attr-value'), |
|
743 (r'/>', String.Single, '#pop'), |
|
744 (r'>', String.Single, ('#pop', 'html-content')), |
|
745 ], |
|
746 |
|
747 'html-attr-value': [ |
|
748 (r"'", String.Single, ('#pop', 'single-string')), |
|
749 (r'"', String.Single, ('#pop', 'string')), |
|
750 (r'#'+ident_re, String.Single, '#pop'), |
|
751 (r'#(?=\{)', String.Single, ('#pop', 'root')), |
|
752 (r'[^"\'{`=<>]+', String.Single, '#pop'), |
|
753 (r'\{', Operator, ('#pop', 'root')), # this is a tail call! |
|
754 ], |
|
755 |
|
756 # we should probably deal with '\' escapes here |
|
757 'html-content': [ |
|
758 (r'<!--', Comment, 'html-comment'), |
|
759 (r'</', String.Single, ('#pop', 'html-end-tag')), |
|
760 (r'<', String.Single, 'html-open-tag'), |
|
761 (r'\{', Operator, 'root'), |
|
762 (r'[^<{]+', String.Single), |
|
763 ], |
|
764 |
|
765 'html-comment': [ |
|
766 (r'-->', Comment, '#pop'), |
|
767 (r'[^\-]+|-', Comment), |
|
768 ], |
|
769 } |