ThirdParty/Pygments/pygments/lexers/ml.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.ml
4 ~~~~~~~~~~~~~~~~~~
5
6 Lexers for ML family languages.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import RegexLexer, include, bygroups, default, words
15 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Error
17
18 __all__ = ['SMLLexer', 'OcamlLexer', 'OpaLexer']
19
20
21 class SMLLexer(RegexLexer):
22 """
23 For the Standard ML language.
24
25 .. versionadded:: 1.5
26 """
27
28 name = 'Standard ML'
29 aliases = ['sml']
30 filenames = ['*.sml', '*.sig', '*.fun']
31 mimetypes = ['text/x-standardml', 'application/x-standardml']
32
33 alphanumid_reserved = set((
34 # Core
35 'abstype', 'and', 'andalso', 'as', 'case', 'datatype', 'do', 'else',
36 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
37 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
38 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
39 # Modules
40 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
41 'struct', 'structure', 'where',
42 ))
43
44 symbolicid_reserved = set((
45 # Core
46 ':', '\|', '=', '=>', '->', '#',
47 # Modules
48 ':>',
49 ))
50
51 nonid_reserved = set(('(', ')', '[', ']', '{', '}', ',', ';', '...', '_'))
52
53 alphanumid_re = r"[a-zA-Z][\w']*"
54 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
55
56 # A character constant is a sequence of the form #s, where s is a string
57 # constant denoting a string of size one character. This setup just parses
58 # the entire string as either a String.Double or a String.Char (depending
59 # on the argument), even if the String.Char is an erronous
60 # multiple-character string.
61 def stringy(whatkind):
62 return [
63 (r'[^"\\]', whatkind),
64 (r'\\[\\"abtnvfr]', String.Escape),
65 # Control-character notation is used for codes < 32,
66 # where \^@ == \000
67 (r'\\\^[\x40-\x5e]', String.Escape),
68 # Docs say 'decimal digits'
69 (r'\\[0-9]{3}', String.Escape),
70 (r'\\u[0-9a-fA-F]{4}', String.Escape),
71 (r'\\\s+\\', String.Interpol),
72 (r'"', whatkind, '#pop'),
73 ]
74
75 # Callbacks for distinguishing tokens and reserved words
76 def long_id_callback(self, match):
77 if match.group(1) in self.alphanumid_reserved:
78 token = Error
79 else:
80 token = Name.Namespace
81 yield match.start(1), token, match.group(1)
82 yield match.start(2), Punctuation, match.group(2)
83
84 def end_id_callback(self, match):
85 if match.group(1) in self.alphanumid_reserved:
86 token = Error
87 elif match.group(1) in self.symbolicid_reserved:
88 token = Error
89 else:
90 token = Name
91 yield match.start(1), token, match.group(1)
92
93 def id_callback(self, match):
94 str = match.group(1)
95 if str in self.alphanumid_reserved:
96 token = Keyword.Reserved
97 elif str in self.symbolicid_reserved:
98 token = Punctuation
99 else:
100 token = Name
101 yield match.start(1), token, str
102
103 tokens = {
104 # Whitespace and comments are (almost) everywhere
105 'whitespace': [
106 (r'\s+', Text),
107 (r'\(\*', Comment.Multiline, 'comment'),
108 ],
109
110 'delimiters': [
111 # This lexer treats these delimiters specially:
112 # Delimiters define scopes, and the scope is how the meaning of
113 # the `|' is resolved - is it a case/handle expression, or function
114 # definition by cases? (This is not how the Definition works, but
115 # it's how MLton behaves, see http://mlton.org/SMLNJDeviations)
116 (r'\(|\[|\{', Punctuation, 'main'),
117 (r'\)|\]|\}', Punctuation, '#pop'),
118 (r'\b(let|if|local)\b(?!\')', Keyword.Reserved, ('main', 'main')),
119 (r'\b(struct|sig|while)\b(?!\')', Keyword.Reserved, 'main'),
120 (r'\b(do|else|end|in|then)\b(?!\')', Keyword.Reserved, '#pop'),
121 ],
122
123 'core': [
124 # Punctuation that doesn't overlap symbolic identifiers
125 (r'(%s)' % '|'.join(re.escape(z) for z in nonid_reserved),
126 Punctuation),
127
128 # Special constants: strings, floats, numbers in decimal and hex
129 (r'#"', String.Char, 'char'),
130 (r'"', String.Double, 'string'),
131 (r'~?0x[0-9a-fA-F]+', Number.Hex),
132 (r'0wx[0-9a-fA-F]+', Number.Hex),
133 (r'0w\d+', Number.Integer),
134 (r'~?\d+\.\d+[eE]~?\d+', Number.Float),
135 (r'~?\d+\.\d+', Number.Float),
136 (r'~?\d+[eE]~?\d+', Number.Float),
137 (r'~?\d+', Number.Integer),
138
139 # Labels
140 (r'#\s*[1-9][0-9]*', Name.Label),
141 (r'#\s*(%s)' % alphanumid_re, Name.Label),
142 (r'#\s+(%s)' % symbolicid_re, Name.Label),
143 # Some reserved words trigger a special, local lexer state change
144 (r'\b(datatype|abstype)\b(?!\')', Keyword.Reserved, 'dname'),
145 (r'(?=\b(exception)\b(?!\'))', Text, ('ename')),
146 (r'\b(functor|include|open|signature|structure)\b(?!\')',
147 Keyword.Reserved, 'sname'),
148 (r'\b(type|eqtype)\b(?!\')', Keyword.Reserved, 'tname'),
149
150 # Regular identifiers, long and otherwise
151 (r'\'[\w\']*', Name.Decorator),
152 (r'(%s)(\.)' % alphanumid_re, long_id_callback, "dotted"),
153 (r'(%s)' % alphanumid_re, id_callback),
154 (r'(%s)' % symbolicid_re, id_callback),
155 ],
156 'dotted': [
157 (r'(%s)(\.)' % alphanumid_re, long_id_callback),
158 (r'(%s)' % alphanumid_re, end_id_callback, "#pop"),
159 (r'(%s)' % symbolicid_re, end_id_callback, "#pop"),
160 (r'\s+', Error),
161 (r'\S+', Error),
162 ],
163
164
165 # Main parser (prevents errors in files that have scoping errors)
166 'root': [
167 default('main')
168 ],
169
170 # In this scope, I expect '|' to not be followed by a function name,
171 # and I expect 'and' to be followed by a binding site
172 'main': [
173 include('whitespace'),
174
175 # Special behavior of val/and/fun
176 (r'\b(val|and)\b(?!\')', Keyword.Reserved, 'vname'),
177 (r'\b(fun)\b(?!\')', Keyword.Reserved,
178 ('#pop', 'main-fun', 'fname')),
179
180 include('delimiters'),
181 include('core'),
182 (r'\S+', Error),
183 ],
184
185 # In this scope, I expect '|' and 'and' to be followed by a function
186 'main-fun': [
187 include('whitespace'),
188
189 (r'\s', Text),
190 (r'\(\*', Comment.Multiline, 'comment'),
191
192 # Special behavior of val/and/fun
193 (r'\b(fun|and)\b(?!\')', Keyword.Reserved, 'fname'),
194 (r'\b(val)\b(?!\')', Keyword.Reserved,
195 ('#pop', 'main', 'vname')),
196
197 # Special behavior of '|' and '|'-manipulating keywords
198 (r'\|', Punctuation, 'fname'),
199 (r'\b(case|handle)\b(?!\')', Keyword.Reserved,
200 ('#pop', 'main')),
201
202 include('delimiters'),
203 include('core'),
204 (r'\S+', Error),
205 ],
206
207 # Character and string parsers
208 'char': stringy(String.Char),
209 'string': stringy(String.Double),
210
211 'breakout': [
212 (r'(?=\b(%s)\b(?!\'))' % '|'.join(alphanumid_reserved), Text, '#pop'),
213 ],
214
215 # Dealing with what comes after module system keywords
216 'sname': [
217 include('whitespace'),
218 include('breakout'),
219
220 (r'(%s)' % alphanumid_re, Name.Namespace),
221 default('#pop'),
222 ],
223
224 # Dealing with what comes after the 'fun' (or 'and' or '|') keyword
225 'fname': [
226 include('whitespace'),
227 (r'\'[\w\']*', Name.Decorator),
228 (r'\(', Punctuation, 'tyvarseq'),
229
230 (r'(%s)' % alphanumid_re, Name.Function, '#pop'),
231 (r'(%s)' % symbolicid_re, Name.Function, '#pop'),
232
233 # Ignore interesting function declarations like "fun (x + y) = ..."
234 default('#pop'),
235 ],
236
237 # Dealing with what comes after the 'val' (or 'and') keyword
238 'vname': [
239 include('whitespace'),
240 (r'\'[\w\']*', Name.Decorator),
241 (r'\(', Punctuation, 'tyvarseq'),
242
243 (r'(%s)(\s*)(=(?!%s))' % (alphanumid_re, symbolicid_re),
244 bygroups(Name.Variable, Text, Punctuation), '#pop'),
245 (r'(%s)(\s*)(=(?!%s))' % (symbolicid_re, symbolicid_re),
246 bygroups(Name.Variable, Text, Punctuation), '#pop'),
247 (r'(%s)' % alphanumid_re, Name.Variable, '#pop'),
248 (r'(%s)' % symbolicid_re, Name.Variable, '#pop'),
249
250 # Ignore interesting patterns like 'val (x, y)'
251 default('#pop'),
252 ],
253
254 # Dealing with what comes after the 'type' (or 'and') keyword
255 'tname': [
256 include('whitespace'),
257 include('breakout'),
258
259 (r'\'[\w\']*', Name.Decorator),
260 (r'\(', Punctuation, 'tyvarseq'),
261 (r'=(?!%s)' % symbolicid_re, Punctuation, ('#pop', 'typbind')),
262
263 (r'(%s)' % alphanumid_re, Keyword.Type),
264 (r'(%s)' % symbolicid_re, Keyword.Type),
265 (r'\S+', Error, '#pop'),
266 ],
267
268 # A type binding includes most identifiers
269 'typbind': [
270 include('whitespace'),
271
272 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
273
274 include('breakout'),
275 include('core'),
276 (r'\S+', Error, '#pop'),
277 ],
278
279 # Dealing with what comes after the 'datatype' (or 'and') keyword
280 'dname': [
281 include('whitespace'),
282 include('breakout'),
283
284 (r'\'[\w\']*', Name.Decorator),
285 (r'\(', Punctuation, 'tyvarseq'),
286 (r'(=)(\s*)(datatype)',
287 bygroups(Punctuation, Text, Keyword.Reserved), '#pop'),
288 (r'=(?!%s)' % symbolicid_re, Punctuation,
289 ('#pop', 'datbind', 'datcon')),
290
291 (r'(%s)' % alphanumid_re, Keyword.Type),
292 (r'(%s)' % symbolicid_re, Keyword.Type),
293 (r'\S+', Error, '#pop'),
294 ],
295
296 # common case - A | B | C of int
297 'datbind': [
298 include('whitespace'),
299
300 (r'\b(and)\b(?!\')', Keyword.Reserved, ('#pop', 'dname')),
301 (r'\b(withtype)\b(?!\')', Keyword.Reserved, ('#pop', 'tname')),
302 (r'\b(of)\b(?!\')', Keyword.Reserved),
303
304 (r'(\|)(\s*)(%s)' % alphanumid_re,
305 bygroups(Punctuation, Text, Name.Class)),
306 (r'(\|)(\s+)(%s)' % symbolicid_re,
307 bygroups(Punctuation, Text, Name.Class)),
308
309 include('breakout'),
310 include('core'),
311 (r'\S+', Error),
312 ],
313
314 # Dealing with what comes after an exception
315 'ename': [
316 include('whitespace'),
317
318 (r'(exception|and)\b(\s+)(%s)' % alphanumid_re,
319 bygroups(Keyword.Reserved, Text, Name.Class)),
320 (r'(exception|and)\b(\s*)(%s)' % symbolicid_re,
321 bygroups(Keyword.Reserved, Text, Name.Class)),
322 (r'\b(of)\b(?!\')', Keyword.Reserved),
323
324 include('breakout'),
325 include('core'),
326 (r'\S+', Error),
327 ],
328
329 'datcon': [
330 include('whitespace'),
331 (r'(%s)' % alphanumid_re, Name.Class, '#pop'),
332 (r'(%s)' % symbolicid_re, Name.Class, '#pop'),
333 (r'\S+', Error, '#pop'),
334 ],
335
336 # Series of type variables
337 'tyvarseq': [
338 (r'\s', Text),
339 (r'\(\*', Comment.Multiline, 'comment'),
340
341 (r'\'[\w\']*', Name.Decorator),
342 (alphanumid_re, Name),
343 (r',', Punctuation),
344 (r'\)', Punctuation, '#pop'),
345 (symbolicid_re, Name),
346 ],
347
348 'comment': [
349 (r'[^(*)]', Comment.Multiline),
350 (r'\(\*', Comment.Multiline, '#push'),
351 (r'\*\)', Comment.Multiline, '#pop'),
352 (r'[(*)]', Comment.Multiline),
353 ],
354 }
355
356
357 class OcamlLexer(RegexLexer):
358 """
359 For the OCaml language.
360
361 .. versionadded:: 0.7
362 """
363
364 name = 'OCaml'
365 aliases = ['ocaml']
366 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
367 mimetypes = ['text/x-ocaml']
368
369 keywords = (
370 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
371 'downto', 'else', 'end', 'exception', 'external', 'false',
372 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
373 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
374 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
375 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
376 'type', 'value', 'val', 'virtual', 'when', 'while', 'with',
377 )
378 keyopts = (
379 '!=', '#', '&', '&&', r'\(', r'\)', r'\*', r'\+', ',', '-',
380 r'-\.', '->', r'\.', r'\.\.', ':', '::', ':=', ':>', ';', ';;', '<',
381 '<-', '=', '>', '>]', r'>\}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
382 r'\[\|', ']', '_', '`', r'\{', r'\{<', r'\|', r'\|]', r'\}', '~'
383 )
384
385 operators = r'[!$%&*+\./:<=>?@^|~-]'
386 word_operators = ('and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or')
387 prefix_syms = r'[!?~]'
388 infix_syms = r'[=<>@^|&+\*/$%-]'
389 primitives = ('unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array')
390
391 tokens = {
392 'escape-sequence': [
393 (r'\\[\\"\'ntbr]', String.Escape),
394 (r'\\[0-9]{3}', String.Escape),
395 (r'\\x[0-9a-fA-F]{2}', String.Escape),
396 ],
397 'root': [
398 (r'\s+', Text),
399 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
400 (r'\b([A-Z][\w\']*)(?=\s*\.)', Name.Namespace, 'dotted'),
401 (r'\b([A-Z][\w\']*)', Name.Class),
402 (r'\(\*(?![)])', Comment, 'comment'),
403 (r'\b(%s)\b' % '|'.join(keywords), Keyword),
404 (r'(%s)' % '|'.join(keyopts[::-1]), Operator),
405 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
406 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
407 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
408
409 (r"[^\W\d][\w']*", Name),
410
411 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
412 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
413 (r'0[oO][0-7][0-7_]*', Number.Oct),
414 (r'0[bB][01][01_]*', Number.Bin),
415 (r'\d[\d_]*', Number.Integer),
416
417 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
418 String.Char),
419 (r"'.'", String.Char),
420 (r"'", Keyword), # a stray quote is another syntax element
421
422 (r'"', String.Double, 'string'),
423
424 (r'[~?][a-z][\w\']*:', Name.Variable),
425 ],
426 'comment': [
427 (r'[^(*)]+', Comment),
428 (r'\(\*', Comment, '#push'),
429 (r'\*\)', Comment, '#pop'),
430 (r'[(*)]', Comment),
431 ],
432 'string': [
433 (r'[^\\"]+', String.Double),
434 include('escape-sequence'),
435 (r'\\\n', String.Double),
436 (r'"', String.Double, '#pop'),
437 ],
438 'dotted': [
439 (r'\s+', Text),
440 (r'\.', Punctuation),
441 (r'[A-Z][\w\']*(?=\s*\.)', Name.Namespace),
442 (r'[A-Z][\w\']*', Name.Class, '#pop'),
443 (r'[a-z_][\w\']*', Name, '#pop'),
444 default('#pop'),
445 ],
446 }
447
448
449 class OpaLexer(RegexLexer):
450 """
451 Lexer for the Opa language (http://opalang.org).
452
453 .. versionadded:: 1.5
454 """
455
456 name = 'Opa'
457 aliases = ['opa']
458 filenames = ['*.opa']
459 mimetypes = ['text/x-opa']
460
461 # most of these aren't strictly keywords
462 # but if you color only real keywords, you might just
463 # as well not color anything
464 keywords = (
465 'and', 'as', 'begin', 'case', 'client', 'css', 'database', 'db', 'do',
466 'else', 'end', 'external', 'forall', 'function', 'if', 'import',
467 'match', 'module', 'or', 'package', 'parser', 'rec', 'server', 'then',
468 'type', 'val', 'with', 'xml_parser',
469 )
470
471 # matches both stuff and `stuff`
472 ident_re = r'(([a-zA-Z_]\w*)|(`[^`]*`))'
473
474 op_re = r'[.=\-<>,@~%/+?*&^!]'
475 punc_re = r'[()\[\],;|]' # '{' and '}' are treated elsewhere
476 # because they are also used for inserts
477
478 tokens = {
479 # copied from the caml lexer, should be adapted
480 'escape-sequence': [
481 (r'\\[\\"\'ntr}]', String.Escape),
482 (r'\\[0-9]{3}', String.Escape),
483 (r'\\x[0-9a-fA-F]{2}', String.Escape),
484 ],
485
486 # factorizing these rules, because they are inserted many times
487 'comments': [
488 (r'/\*', Comment, 'nested-comment'),
489 (r'//.*?$', Comment),
490 ],
491 'comments-and-spaces': [
492 include('comments'),
493 (r'\s+', Text),
494 ],
495
496 'root': [
497 include('comments-and-spaces'),
498 # keywords
499 (words(keywords, prefix=r'\b', suffix=r'\b'), Keyword),
500 # directives
501 # we could parse the actual set of directives instead of anything
502 # starting with @, but this is troublesome
503 # because it needs to be adjusted all the time
504 # and assuming we parse only sources that compile, it is useless
505 (r'@' + ident_re + r'\b', Name.Builtin.Pseudo),
506
507 # number literals
508 (r'-?.[\d]+([eE][+\-]?\d+)', Number.Float),
509 (r'-?\d+.\d*([eE][+\-]?\d+)', Number.Float),
510 (r'-?\d+[eE][+\-]?\d+', Number.Float),
511 (r'0[xX][\da-fA-F]+', Number.Hex),
512 (r'0[oO][0-7]+', Number.Oct),
513 (r'0[bB][01]+', Number.Bin),
514 (r'\d+', Number.Integer),
515 # color literals
516 (r'#[\da-fA-F]{3,6}', Number.Integer),
517
518 # string literals
519 (r'"', String.Double, 'string'),
520 # char literal, should be checked because this is the regexp from
521 # the caml lexer
522 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2})|.)'",
523 String.Char),
524
525 # this is meant to deal with embedded exprs in strings
526 # every time we find a '}' we pop a state so that if we were
527 # inside a string, we are back in the string state
528 # as a consequence, we must also push a state every time we find a
529 # '{' or else we will have errors when parsing {} for instance
530 (r'\{', Operator, '#push'),
531 (r'\}', Operator, '#pop'),
532
533 # html literals
534 # this is a much more strict that the actual parser,
535 # since a<b would not be parsed as html
536 # but then again, the parser is way too lax, and we can't hope
537 # to have something as tolerant
538 (r'<(?=[a-zA-Z>])', String.Single, 'html-open-tag'),
539
540 # db path
541 # matching the '[_]' in '/a[_]' because it is a part
542 # of the syntax of the db path definition
543 # unfortunately, i don't know how to match the ']' in
544 # /a[1], so this is somewhat inconsistent
545 (r'[@?!]?(/\w+)+(\[_\])?', Name.Variable),
546 # putting the same color on <- as on db path, since
547 # it can be used only to mean Db.write
548 (r'<-(?!'+op_re+r')', Name.Variable),
549
550 # 'modules'
551 # although modules are not distinguished by their names as in caml
552 # the standard library seems to follow the convention that modules
553 # only area capitalized
554 (r'\b([A-Z]\w*)(?=\.)', Name.Namespace),
555
556 # operators
557 # = has a special role because this is the only
558 # way to syntactic distinguish binding constructions
559 # unfortunately, this colors the equal in {x=2} too
560 (r'=(?!'+op_re+r')', Keyword),
561 (r'(%s)+' % op_re, Operator),
562 (r'(%s)+' % punc_re, Operator),
563
564 # coercions
565 (r':', Operator, 'type'),
566 # type variables
567 # we need this rule because we don't parse specially type
568 # definitions so in "type t('a) = ...", "'a" is parsed by 'root'
569 ("'"+ident_re, Keyword.Type),
570
571 # id literal, #something, or #{expr}
572 (r'#'+ident_re, String.Single),
573 (r'#(?=\{)', String.Single),
574
575 # identifiers
576 # this avoids to color '2' in 'a2' as an integer
577 (ident_re, Text),
578
579 # default, not sure if that is needed or not
580 # (r'.', Text),
581 ],
582
583 # it is quite painful to have to parse types to know where they end
584 # this is the general rule for a type
585 # a type is either:
586 # * -> ty
587 # * type-with-slash
588 # * type-with-slash -> ty
589 # * type-with-slash (, type-with-slash)+ -> ty
590 #
591 # the code is pretty funky in here, but this code would roughly
592 # translate in caml to:
593 # let rec type stream =
594 # match stream with
595 # | [< "->"; stream >] -> type stream
596 # | [< ""; stream >] ->
597 # type_with_slash stream
598 # type_lhs_1 stream;
599 # and type_1 stream = ...
600 'type': [
601 include('comments-and-spaces'),
602 (r'->', Keyword.Type),
603 default(('#pop', 'type-lhs-1', 'type-with-slash')),
604 ],
605
606 # parses all the atomic or closed constructions in the syntax of type
607 # expressions: record types, tuple types, type constructors, basic type
608 # and type variables
609 'type-1': [
610 include('comments-and-spaces'),
611 (r'\(', Keyword.Type, ('#pop', 'type-tuple')),
612 (r'~?\{', Keyword.Type, ('#pop', 'type-record')),
613 (ident_re+r'\(', Keyword.Type, ('#pop', 'type-tuple')),
614 (ident_re, Keyword.Type, '#pop'),
615 ("'"+ident_re, Keyword.Type),
616 # this case is not in the syntax but sometimes
617 # we think we are parsing types when in fact we are parsing
618 # some css, so we just pop the states until we get back into
619 # the root state
620 default('#pop'),
621 ],
622
623 # type-with-slash is either:
624 # * type-1
625 # * type-1 (/ type-1)+
626 'type-with-slash': [
627 include('comments-and-spaces'),
628 default(('#pop', 'slash-type-1', 'type-1')),
629 ],
630 'slash-type-1': [
631 include('comments-and-spaces'),
632 ('/', Keyword.Type, ('#pop', 'type-1')),
633 # same remark as above
634 default('#pop'),
635 ],
636
637 # we go in this state after having parsed a type-with-slash
638 # while trying to parse a type
639 # and at this point we must determine if we are parsing an arrow
640 # type (in which case we must continue parsing) or not (in which
641 # case we stop)
642 'type-lhs-1': [
643 include('comments-and-spaces'),
644 (r'->', Keyword.Type, ('#pop', 'type')),
645 (r'(?=,)', Keyword.Type, ('#pop', 'type-arrow')),
646 default('#pop'),
647 ],
648 'type-arrow': [
649 include('comments-and-spaces'),
650 # the look ahead here allows to parse f(x : int, y : float -> truc)
651 # correctly
652 (r',(?=[^:]*?->)', Keyword.Type, 'type-with-slash'),
653 (r'->', Keyword.Type, ('#pop', 'type')),
654 # same remark as above
655 default('#pop'),
656 ],
657
658 # no need to do precise parsing for tuples and records
659 # because they are closed constructions, so we can simply
660 # find the closing delimiter
661 # note that this function would be not work if the source
662 # contained identifiers like `{)` (although it could be patched
663 # to support it)
664 'type-tuple': [
665 include('comments-and-spaces'),
666 (r'[^()/*]+', Keyword.Type),
667 (r'[/*]', Keyword.Type),
668 (r'\(', Keyword.Type, '#push'),
669 (r'\)', Keyword.Type, '#pop'),
670 ],
671 'type-record': [
672 include('comments-and-spaces'),
673 (r'[^{}/*]+', Keyword.Type),
674 (r'[/*]', Keyword.Type),
675 (r'\{', Keyword.Type, '#push'),
676 (r'\}', Keyword.Type, '#pop'),
677 ],
678
679 # 'type-tuple': [
680 # include('comments-and-spaces'),
681 # (r'\)', Keyword.Type, '#pop'),
682 # default(('#pop', 'type-tuple-1', 'type-1')),
683 # ],
684 # 'type-tuple-1': [
685 # include('comments-and-spaces'),
686 # (r',?\s*\)', Keyword.Type, '#pop'), # ,) is a valid end of tuple, in (1,)
687 # (r',', Keyword.Type, 'type-1'),
688 # ],
689 # 'type-record':[
690 # include('comments-and-spaces'),
691 # (r'\}', Keyword.Type, '#pop'),
692 # (r'~?(?:\w+|`[^`]*`)', Keyword.Type, 'type-record-field-expr'),
693 # ],
694 # 'type-record-field-expr': [
695 #
696 # ],
697
698 'nested-comment': [
699 (r'[^/*]+', Comment),
700 (r'/\*', Comment, '#push'),
701 (r'\*/', Comment, '#pop'),
702 (r'[/*]', Comment),
703 ],
704
705 # the copy pasting between string and single-string
706 # is kinda sad. Is there a way to avoid that??
707 'string': [
708 (r'[^\\"{]+', String.Double),
709 (r'"', String.Double, '#pop'),
710 (r'\{', Operator, 'root'),
711 include('escape-sequence'),
712 ],
713 'single-string': [
714 (r'[^\\\'{]+', String.Double),
715 (r'\'', String.Double, '#pop'),
716 (r'\{', Operator, 'root'),
717 include('escape-sequence'),
718 ],
719
720 # all the html stuff
721 # can't really reuse some existing html parser
722 # because we must be able to parse embedded expressions
723
724 # we are in this state after someone parsed the '<' that
725 # started the html literal
726 'html-open-tag': [
727 (r'[\w\-:]+', String.Single, ('#pop', 'html-attr')),
728 (r'>', String.Single, ('#pop', 'html-content')),
729 ],
730
731 # we are in this state after someone parsed the '</' that
732 # started the end of the closing tag
733 'html-end-tag': [
734 # this is a star, because </> is allowed
735 (r'[\w\-:]*>', String.Single, '#pop'),
736 ],
737
738 # we are in this state after having parsed '<ident(:ident)?'
739 # we thus parse a possibly empty list of attributes
740 'html-attr': [
741 (r'\s+', Text),
742 (r'[\w\-:]+=', String.Single, 'html-attr-value'),
743 (r'/>', String.Single, '#pop'),
744 (r'>', String.Single, ('#pop', 'html-content')),
745 ],
746
747 'html-attr-value': [
748 (r"'", String.Single, ('#pop', 'single-string')),
749 (r'"', String.Single, ('#pop', 'string')),
750 (r'#'+ident_re, String.Single, '#pop'),
751 (r'#(?=\{)', String.Single, ('#pop', 'root')),
752 (r'[^"\'{`=<>]+', String.Single, '#pop'),
753 (r'\{', Operator, ('#pop', 'root')), # this is a tail call!
754 ],
755
756 # we should probably deal with '\' escapes here
757 'html-content': [
758 (r'<!--', Comment, 'html-comment'),
759 (r'</', String.Single, ('#pop', 'html-end-tag')),
760 (r'<', String.Single, 'html-open-tag'),
761 (r'\{', Operator, 'root'),
762 (r'[^<{]+', String.Single),
763 ],
764
765 'html-comment': [
766 (r'-->', Comment, '#pop'),
767 (r'[^\-]+|-', Comment),
768 ],
769 }

eric ide

mercurial