ThirdParty/Pygments/pygments/lexers/haskell.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.haskell
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Haskell and related languages.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
15 default, include
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic
18 from pygments import unistring as uni
19
20 __all__ = ['HaskellLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
22 'LiterateCryptolLexer', 'KokaLexer']
23
24
25 line_re = re.compile('.*?\n')
26
27
28 class HaskellLexer(RegexLexer):
29 """
30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
31
32 .. versionadded:: 0.8
33 """
34 name = 'Haskell'
35 aliases = ['haskell', 'hs']
36 filenames = ['*.hs']
37 mimetypes = ['text/x-haskell']
38
39 flags = re.MULTILINE | re.UNICODE
40
41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
42 'if', 'in', 'infix[lr]?', 'instance',
43 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
48
49 tokens = {
50 'root': [
51 # Whitespace:
52 (r'\s+', Text),
53 # (r'--\s*|.*$', Comment.Doc),
54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
55 (r'\{-', Comment.Multiline, 'comment'),
56 # Lexemes:
57 # Identifiers
58 (r'\bimport\b', Keyword.Reserved, 'import'),
59 (r'\bmodule\b', Keyword.Reserved, 'module'),
60 (r'\berror\b', Name.Exception),
61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
62 (r"'[^\\]'", String.Char), # this has to come before the TH quote
63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
64 (r"'?[_" + uni.Ll + r"][\w']*", Name),
65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
66 # Operators
67 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
68 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
69 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
70 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
71 # Numbers
72 (r'\d+[eE][+-]?\d+', Number.Float),
73 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
74 (r'0[oO][0-7]+', Number.Oct),
75 (r'0[xX][\da-fA-F]+', Number.Hex),
76 (r'\d+', Number.Integer),
77 # Character/String Literals
78 (r"'", String.Char, 'character'),
79 (r'"', String, 'string'),
80 # Special
81 (r'\[\]', Keyword.Type),
82 (r'\(\)', Name.Builtin),
83 (r'[][(),;`{}]', Punctuation),
84 ],
85 'import': [
86 # Import statements
87 (r'\s+', Text),
88 (r'"', String, 'string'),
89 # after "funclist" state
90 (r'\)', Punctuation, '#pop'),
91 (r'qualified\b', Keyword),
92 # import X as Y
93 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
94 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
95 # import X hiding (functions)
96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
97 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
98 # import X (functions)
99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
100 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
101 # import X
102 (r'[\w.]+', Name.Namespace, '#pop'),
103 ],
104 'module': [
105 (r'\s+', Text),
106 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
107 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
108 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
109 ],
110 'funclist': [
111 (r'\s+', Text),
112 (r'[' + uni.Lu + r']\w*', Keyword.Type),
113 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
114 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
115 (r'\{-', Comment.Multiline, 'comment'),
116 (r',', Punctuation),
117 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
118 # (HACK, but it makes sense to push two instances, believe me)
119 (r'\(', Punctuation, ('funclist', 'funclist')),
120 (r'\)', Punctuation, '#pop:2'),
121 ],
122 # NOTE: the next four states are shared in the AgdaLexer; make sure
123 # any change is compatible with Agda as well or copy over and change
124 'comment': [
125 # Multiline Comments
126 (r'[^-{}]+', Comment.Multiline),
127 (r'\{-', Comment.Multiline, '#push'),
128 (r'-\}', Comment.Multiline, '#pop'),
129 (r'[-{}]', Comment.Multiline),
130 ],
131 'character': [
132 # Allows multi-chars, incorrectly.
133 (r"[^\\']'", String.Char, '#pop'),
134 (r"\\", String.Escape, 'escape'),
135 ("'", String.Char, '#pop'),
136 ],
137 'string': [
138 (r'[^\\"]+', String),
139 (r"\\", String.Escape, 'escape'),
140 ('"', String, '#pop'),
141 ],
142 'escape': [
143 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
144 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
145 ('|'.join(ascii), String.Escape, '#pop'),
146 (r'o[0-7]+', String.Escape, '#pop'),
147 (r'x[\da-fA-F]+', String.Escape, '#pop'),
148 (r'\d+', String.Escape, '#pop'),
149 (r'\s+\\', String.Escape, '#pop'),
150 ],
151 }
152
153
154 class IdrisLexer(RegexLexer):
155 """
156 A lexer for the dependently typed programming language Idris.
157
158 Based on the Haskell and Agda Lexer.
159
160 .. versionadded:: 2.0
161 """
162 name = 'Idris'
163 aliases = ['idris', 'idr']
164 filenames = ['*.idr']
165 mimetypes = ['text/x-idris']
166
167 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
168 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
169 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
170 'total', 'partial',
171 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
172 'pattern', 'term', 'syntax', 'prefix',
173 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
174 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
175
176 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
177 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
178 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
179 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
180
181 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
182 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
183
184 tokens = {
185 'root': [
186 # Comments
187 (r'^(\s*)(%%%s)' % '|'.join(directives),
188 bygroups(Text, Keyword.Reserved)),
189 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
190 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
191 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
192 # Declaration
193 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
194 bygroups(Text, Name.Function, Text, Operator.Word, Text)),
195 # Identifiers
196 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
197 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
198 (r"('')?[A-Z][\w\']*", Keyword.Type),
199 (r'[a-z][\w\']*', Text),
200 # Special Symbols
201 (r'(<-|::|->|=>|=)', Operator.Word), # specials
202 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
203 # Numbers
204 (r'\d+[eE][+-]?\d+', Number.Float),
205 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
206 (r'0[xX][\da-fA-F]+', Number.Hex),
207 (r'\d+', Number.Integer),
208 # Strings
209 (r"'", String.Char, 'character'),
210 (r'"', String, 'string'),
211 (r'[^\s(){}]+', Text),
212 (r'\s+?', Text), # Whitespace
213 ],
214 'module': [
215 (r'\s+', Text),
216 (r'([A-Z][\w.]*)(\s+)(\()',
217 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
218 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
219 ],
220 'funclist': [
221 (r'\s+', Text),
222 (r'[A-Z]\w*', Keyword.Type),
223 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
224 (r'--.*$', Comment.Single),
225 (r'\{-', Comment.Multiline, 'comment'),
226 (r',', Punctuation),
227 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
228 # (HACK, but it makes sense to push two instances, believe me)
229 (r'\(', Punctuation, ('funclist', 'funclist')),
230 (r'\)', Punctuation, '#pop:2'),
231 ],
232 # NOTE: the next four states are shared in the AgdaLexer; make sure
233 # any change is compatible with Agda as well or copy over and change
234 'comment': [
235 # Multiline Comments
236 (r'[^-{}]+', Comment.Multiline),
237 (r'\{-', Comment.Multiline, '#push'),
238 (r'-\}', Comment.Multiline, '#pop'),
239 (r'[-{}]', Comment.Multiline),
240 ],
241 'character': [
242 # Allows multi-chars, incorrectly.
243 (r"[^\\']", String.Char),
244 (r"\\", String.Escape, 'escape'),
245 ("'", String.Char, '#pop'),
246 ],
247 'string': [
248 (r'[^\\"]+', String),
249 (r"\\", String.Escape, 'escape'),
250 ('"', String, '#pop'),
251 ],
252 'escape': [
253 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
254 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
255 ('|'.join(ascii), String.Escape, '#pop'),
256 (r'o[0-7]+', String.Escape, '#pop'),
257 (r'x[\da-fA-F]+', String.Escape, '#pop'),
258 (r'\d+', String.Escape, '#pop'),
259 (r'\s+\\', String.Escape, '#pop')
260 ],
261 }
262
263
264 class AgdaLexer(RegexLexer):
265 """
266 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
267 dependently typed functional programming language and proof assistant.
268
269 .. versionadded:: 2.0
270 """
271
272 name = 'Agda'
273 aliases = ['agda']
274 filenames = ['*.agda']
275 mimetypes = ['text/x-agda']
276
277 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
278 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
279 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
280 'pattern', 'postulate', 'primitive', 'private',
281 'quote', 'quoteGoal', 'quoteTerm',
282 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
283 'unquote', 'unquoteDecl', 'using', 'where', 'with']
284
285 tokens = {
286 'root': [
287 # Declaration
288 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
289 bygroups(Text, Name.Function, Text, Operator.Word, Text)),
290 # Comments
291 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
292 (r'\{-', Comment.Multiline, 'comment'),
293 # Holes
294 (r'\{!', Comment.Directive, 'hole'),
295 # Lexemes:
296 # Identifiers
297 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
298 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
299 (r'\b(Set|Prop)\b', Keyword.Type),
300 # Special Symbols
301 (r'(\(|\)|\{|\})', Operator),
302 (u'(\\.{1,3}|\\||\u039B|\u2200|\u2192|:|=|->)', Operator.Word),
303 # Numbers
304 (r'\d+[eE][+-]?\d+', Number.Float),
305 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
306 (r'0[xX][\da-fA-F]+', Number.Hex),
307 (r'\d+', Number.Integer),
308 # Strings
309 (r"'", String.Char, 'character'),
310 (r'"', String, 'string'),
311 (r'[^\s(){}]+', Text),
312 (r'\s+?', Text), # Whitespace
313 ],
314 'hole': [
315 # Holes
316 (r'[^!{}]+', Comment.Directive),
317 (r'\{!', Comment.Directive, '#push'),
318 (r'!\}', Comment.Directive, '#pop'),
319 (r'[!{}]', Comment.Directive),
320 ],
321 'module': [
322 (r'\{-', Comment.Multiline, 'comment'),
323 (r'[a-zA-Z][\w.]*', Name, '#pop'),
324 (r'[^a-zA-Z]+', Text)
325 ],
326 'comment': HaskellLexer.tokens['comment'],
327 'character': HaskellLexer.tokens['character'],
328 'string': HaskellLexer.tokens['string'],
329 'escape': HaskellLexer.tokens['escape']
330 }
331
332
333 class CryptolLexer(RegexLexer):
334 """
335 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
336
337 .. versionadded:: 2.0
338 """
339 name = 'Cryptol'
340 aliases = ['cryptol', 'cry']
341 filenames = ['*.cry']
342 mimetypes = ['text/x-cryptol']
343
344 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
345 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
346 'max', 'min', 'module', 'newtype', 'pragma', 'property',
347 'then', 'type', 'where', 'width')
348 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
349 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
350 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
351 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
352
353 tokens = {
354 'root': [
355 # Whitespace:
356 (r'\s+', Text),
357 # (r'--\s*|.*$', Comment.Doc),
358 (r'//.*$', Comment.Single),
359 (r'/\*', Comment.Multiline, 'comment'),
360 # Lexemes:
361 # Identifiers
362 (r'\bimport\b', Keyword.Reserved, 'import'),
363 (r'\bmodule\b', Keyword.Reserved, 'module'),
364 (r'\berror\b', Name.Exception),
365 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
366 (r'^[_a-z][\w\']*', Name.Function),
367 (r"'?[_a-z][\w']*", Name),
368 (r"('')?[A-Z][\w\']*", Keyword.Type),
369 # Operators
370 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
371 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
372 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
373 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
374 # Numbers
375 (r'\d+[eE][+-]?\d+', Number.Float),
376 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
377 (r'0[oO][0-7]+', Number.Oct),
378 (r'0[xX][\da-fA-F]+', Number.Hex),
379 (r'\d+', Number.Integer),
380 # Character/String Literals
381 (r"'", String.Char, 'character'),
382 (r'"', String, 'string'),
383 # Special
384 (r'\[\]', Keyword.Type),
385 (r'\(\)', Name.Builtin),
386 (r'[][(),;`{}]', Punctuation),
387 ],
388 'import': [
389 # Import statements
390 (r'\s+', Text),
391 (r'"', String, 'string'),
392 # after "funclist" state
393 (r'\)', Punctuation, '#pop'),
394 (r'qualified\b', Keyword),
395 # import X as Y
396 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
397 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
398 # import X hiding (functions)
399 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
400 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
401 # import X (functions)
402 (r'([A-Z][\w.]*)(\s+)(\()',
403 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
404 # import X
405 (r'[\w.]+', Name.Namespace, '#pop'),
406 ],
407 'module': [
408 (r'\s+', Text),
409 (r'([A-Z][\w.]*)(\s+)(\()',
410 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
411 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
412 ],
413 'funclist': [
414 (r'\s+', Text),
415 (r'[A-Z]\w*', Keyword.Type),
416 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
417 # TODO: these don't match the comments in docs, remove.
418 #(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
419 #(r'{-', Comment.Multiline, 'comment'),
420 (r',', Punctuation),
421 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
422 # (HACK, but it makes sense to push two instances, believe me)
423 (r'\(', Punctuation, ('funclist', 'funclist')),
424 (r'\)', Punctuation, '#pop:2'),
425 ],
426 'comment': [
427 # Multiline Comments
428 (r'[^/*]+', Comment.Multiline),
429 (r'/\*', Comment.Multiline, '#push'),
430 (r'\*/', Comment.Multiline, '#pop'),
431 (r'[*/]', Comment.Multiline),
432 ],
433 'character': [
434 # Allows multi-chars, incorrectly.
435 (r"[^\\']'", String.Char, '#pop'),
436 (r"\\", String.Escape, 'escape'),
437 ("'", String.Char, '#pop'),
438 ],
439 'string': [
440 (r'[^\\"]+', String),
441 (r"\\", String.Escape, 'escape'),
442 ('"', String, '#pop'),
443 ],
444 'escape': [
445 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
446 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
447 ('|'.join(ascii), String.Escape, '#pop'),
448 (r'o[0-7]+', String.Escape, '#pop'),
449 (r'x[\da-fA-F]+', String.Escape, '#pop'),
450 (r'\d+', String.Escape, '#pop'),
451 (r'\s+\\', String.Escape, '#pop'),
452 ],
453 }
454
455 EXTRA_KEYWORDS = set(('join', 'split', 'reverse', 'transpose', 'width',
456 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
457 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
458 'trace'))
459
460 def get_tokens_unprocessed(self, text):
461 stack = ['root']
462 for index, token, value in \
463 RegexLexer.get_tokens_unprocessed(self, text, stack):
464 if token is Name and value in self.EXTRA_KEYWORDS:
465 yield index, Name.Builtin, value
466 else:
467 yield index, token, value
468
469
470 class LiterateLexer(Lexer):
471 """
472 Base class for lexers of literate file formats based on LaTeX or Bird-style
473 (prefixing each code line with ">").
474
475 Additional options accepted:
476
477 `litstyle`
478 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
479 is autodetected: if the first non-whitespace character in the source
480 is a backslash or percent character, LaTeX is assumed, else Bird.
481 """
482
483 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
484
485 def __init__(self, baselexer, **options):
486 self.baselexer = baselexer
487 Lexer.__init__(self, **options)
488
489 def get_tokens_unprocessed(self, text):
490 style = self.options.get('litstyle')
491 if style is None:
492 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
493
494 code = ''
495 insertions = []
496 if style == 'bird':
497 # bird-style
498 for match in line_re.finditer(text):
499 line = match.group()
500 m = self.bird_re.match(line)
501 if m:
502 insertions.append((len(code),
503 [(0, Comment.Special, m.group(1))]))
504 code += m.group(2)
505 else:
506 insertions.append((len(code), [(0, Text, line)]))
507 else:
508 # latex-style
509 from pygments.lexers.markup import TexLexer
510 lxlexer = TexLexer(**self.options)
511 codelines = 0
512 latex = ''
513 for match in line_re.finditer(text):
514 line = match.group()
515 if codelines:
516 if line.lstrip().startswith('\\end{code}'):
517 codelines = 0
518 latex += line
519 else:
520 code += line
521 elif line.lstrip().startswith('\\begin{code}'):
522 codelines = 1
523 latex += line
524 insertions.append((len(code),
525 list(lxlexer.get_tokens_unprocessed(latex))))
526 latex = ''
527 else:
528 latex += line
529 insertions.append((len(code),
530 list(lxlexer.get_tokens_unprocessed(latex))))
531 for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
532 yield item
533
534
535 class LiterateHaskellLexer(LiterateLexer):
536 """
537 For Literate Haskell (Bird-style or LaTeX) source.
538
539 Additional options accepted:
540
541 `litstyle`
542 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
543 is autodetected: if the first non-whitespace character in the source
544 is a backslash or percent character, LaTeX is assumed, else Bird.
545
546 .. versionadded:: 0.9
547 """
548 name = 'Literate Haskell'
549 aliases = ['lhs', 'literate-haskell', 'lhaskell']
550 filenames = ['*.lhs']
551 mimetypes = ['text/x-literate-haskell']
552
553 def __init__(self, **options):
554 hslexer = HaskellLexer(**options)
555 LiterateLexer.__init__(self, hslexer, **options)
556
557
558 class LiterateIdrisLexer(LiterateLexer):
559 """
560 For Literate Idris (Bird-style or LaTeX) source.
561
562 Additional options accepted:
563
564 `litstyle`
565 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
566 is autodetected: if the first non-whitespace character in the source
567 is a backslash or percent character, LaTeX is assumed, else Bird.
568
569 .. versionadded:: 2.0
570 """
571 name = 'Literate Idris'
572 aliases = ['lidr', 'literate-idris', 'lidris']
573 filenames = ['*.lidr']
574 mimetypes = ['text/x-literate-idris']
575
576 def __init__(self, **options):
577 hslexer = IdrisLexer(**options)
578 LiterateLexer.__init__(self, hslexer, **options)
579
580
581 class LiterateAgdaLexer(LiterateLexer):
582 """
583 For Literate Agda source.
584
585 Additional options accepted:
586
587 `litstyle`
588 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
589 is autodetected: if the first non-whitespace character in the source
590 is a backslash or percent character, LaTeX is assumed, else Bird.
591
592 .. versionadded:: 2.0
593 """
594 name = 'Literate Agda'
595 aliases = ['lagda', 'literate-agda']
596 filenames = ['*.lagda']
597 mimetypes = ['text/x-literate-agda']
598
599 def __init__(self, **options):
600 agdalexer = AgdaLexer(**options)
601 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
602
603
604 class LiterateCryptolLexer(LiterateLexer):
605 """
606 For Literate Cryptol (Bird-style or LaTeX) source.
607
608 Additional options accepted:
609
610 `litstyle`
611 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
612 is autodetected: if the first non-whitespace character in the source
613 is a backslash or percent character, LaTeX is assumed, else Bird.
614
615 .. versionadded:: 2.0
616 """
617 name = 'Literate Cryptol'
618 aliases = ['lcry', 'literate-cryptol', 'lcryptol']
619 filenames = ['*.lcry']
620 mimetypes = ['text/x-literate-cryptol']
621
622 def __init__(self, **options):
623 crylexer = CryptolLexer(**options)
624 LiterateLexer.__init__(self, crylexer, **options)
625
626
627 class KokaLexer(RegexLexer):
628 """
629 Lexer for the `Koka <http://koka.codeplex.com>`_
630 language.
631
632 .. versionadded:: 1.6
633 """
634
635 name = 'Koka'
636 aliases = ['koka']
637 filenames = ['*.kk', '*.kki']
638 mimetypes = ['text/x-koka']
639
640 keywords = [
641 'infix', 'infixr', 'infixl',
642 'type', 'cotype', 'rectype', 'alias',
643 'struct', 'con',
644 'fun', 'function', 'val', 'var',
645 'external',
646 'if', 'then', 'else', 'elif', 'return', 'match',
647 'private', 'public', 'private',
648 'module', 'import', 'as',
649 'include', 'inline',
650 'rec',
651 'try', 'yield', 'enum',
652 'interface', 'instance',
653 ]
654
655 # keywords that are followed by a type
656 typeStartKeywords = [
657 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
658 ]
659
660 # keywords valid in a type
661 typekeywords = [
662 'forall', 'exists', 'some', 'with',
663 ]
664
665 # builtin names and special names
666 builtin = [
667 'for', 'while', 'repeat',
668 'foreach', 'foreach-indexed',
669 'error', 'catch', 'finally',
670 'cs', 'js', 'file', 'ref', 'assigned',
671 ]
672
673 # symbols that can be in an operator
674 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
675
676 # symbol boundary: an operator keyword should not be followed by any of these
677 sboundary = '(?!'+symbols+')'
678
679 # name boundary: a keyword should not be followed by any of these
680 boundary = '(?![\w/])'
681
682 # koka token abstractions
683 tokenType = Name.Attribute
684 tokenTypeDef = Name.Class
685 tokenConstructor = Generic.Emph
686
687 # main lexer
688 tokens = {
689 'root': [
690 include('whitespace'),
691
692 # go into type mode
693 (r'::?' + sboundary, tokenType, 'type'),
694 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
695 'alias-type'),
696 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
697 'struct-type'),
698 ((r'(%s)' % '|'.join(typeStartKeywords)) +
699 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
700 'type'),
701
702 # special sequences of tokens (we use ?: for non-capturing group as
703 # required by 'bygroups')
704 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
705 bygroups(Keyword, Text, Keyword, Name.Namespace)),
706 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
707 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
708 r'((?:[a-z]\w*/)*[a-z]\w*))?',
709 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
710 Keyword, Name.Namespace)),
711
712 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
713 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
714 bygroups(Keyword, Text, Name.Function)),
715 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
716 r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
717 bygroups(Keyword, Text, Keyword, Name.Function)),
718
719 # keywords
720 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
721 (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
722 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
723 (r'::?|:=|\->|[=.]' + sboundary, Keyword),
724
725 # names
726 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
727 bygroups(Name.Namespace, tokenConstructor)),
728 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
729 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
730 bygroups(Name.Namespace, Name)),
731 (r'_\w*', Name.Variable),
732
733 # literal string
734 (r'@"', String.Double, 'litstring'),
735
736 # operators
737 (symbols + "|/(?![*/])", Operator),
738 (r'`', Operator),
739 (r'[{}()\[\];,]', Punctuation),
740
741 # literals. No check for literal characters with len > 1
742 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
743 (r'0[xX][0-9a-fA-F]+', Number.Hex),
744 (r'[0-9]+', Number.Integer),
745
746 (r"'", String.Char, 'char'),
747 (r'"', String.Double, 'string'),
748 ],
749
750 # type started by alias
751 'alias-type': [
752 (r'=', Keyword),
753 include('type')
754 ],
755
756 # type started by struct
757 'struct-type': [
758 (r'(?=\((?!,*\)))', Punctuation, '#pop'),
759 include('type')
760 ],
761
762 # type started by colon
763 'type': [
764 (r'[(\[<]', tokenType, 'type-nested'),
765 include('type-content')
766 ],
767
768 # type nested in brackets: can contain parameters, comma etc.
769 'type-nested': [
770 (r'[)\]>]', tokenType, '#pop'),
771 (r'[(\[<]', tokenType, 'type-nested'),
772 (r',', tokenType),
773 (r'([a-z]\w*)(\s*)(:)(?!:)',
774 bygroups(Name, Text, tokenType)), # parameter name
775 include('type-content')
776 ],
777
778 # shared contents of a type
779 'type-content': [
780 include('whitespace'),
781
782 # keywords
783 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
784 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
785 Keyword, '#pop'), # need to match because names overlap...
786
787 # kinds
788 (r'[EPHVX]' + boundary, tokenType),
789
790 # type names
791 (r'[a-z][0-9]*(?![\w/])', tokenType),
792 (r'_\w*', tokenType.Variable), # Generic.Emph
793 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
794 bygroups(Name.Namespace, tokenType)),
795 (r'((?:[a-z]\w*/)*)([a-z]\w+)',
796 bygroups(Name.Namespace, tokenType)),
797
798 # type keyword operators
799 (r'::|->|[.:|]', tokenType),
800
801 # catchall
802 default('#pop')
803 ],
804
805 # comments and literals
806 'whitespace': [
807 (r'\n\s*#.*$', Comment.Preproc),
808 (r'\s+', Text),
809 (r'/\*', Comment.Multiline, 'comment'),
810 (r'//.*$', Comment.Single)
811 ],
812 'comment': [
813 (r'[^/*]+', Comment.Multiline),
814 (r'/\*', Comment.Multiline, '#push'),
815 (r'\*/', Comment.Multiline, '#pop'),
816 (r'[*/]', Comment.Multiline),
817 ],
818 'litstring': [
819 (r'[^"]+', String.Double),
820 (r'""', String.Escape),
821 (r'"', String.Double, '#pop'),
822 ],
823 'string': [
824 (r'[^\\"\n]+', String.Double),
825 include('escape-sequence'),
826 (r'["\n]', String.Double, '#pop'),
827 ],
828 'char': [
829 (r'[^\\\'\n]+', String.Char),
830 include('escape-sequence'),
831 (r'[\'\n]', String.Char, '#pop'),
832 ],
833 'escape-sequence': [
834 (r'\\[nrt\\"\']', String.Escape),
835 (r'\\x[0-9a-fA-F]{2}', String.Escape),
836 (r'\\u[0-9a-fA-F]{4}', String.Escape),
837 # Yes, \U literals are 6 hex digits.
838 (r'\\U[0-9a-fA-F]{6}', String.Escape)
839 ]
840 }

eric ide

mercurial