eric6/ThirdParty/Pygments/pygments/lexers/haskell.py

changeset 6942
2602857055c5
parent 6651
e8f3b5568b21
child 7547
21b0534faebc
equal deleted inserted replaced
6941:f99d60d6b59b 6942:2602857055c5
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.haskell
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Haskell and related languages.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
15 default, include
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic
18 from pygments import unistring as uni
19
20 __all__ = ['HaskellLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
22 'LiterateCryptolLexer', 'KokaLexer']
23
24
25 line_re = re.compile('.*?\n')
26
27
28 class HaskellLexer(RegexLexer):
29 """
30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
31
32 .. versionadded:: 0.8
33 """
34 name = 'Haskell'
35 aliases = ['haskell', 'hs']
36 filenames = ['*.hs']
37 mimetypes = ['text/x-haskell']
38
39 flags = re.MULTILINE | re.UNICODE
40
41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
42 'family', 'if', 'in', 'infix[lr]?', 'instance',
43 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
48
49 tokens = {
50 'root': [
51 # Whitespace:
52 (r'\s+', Text),
53 # (r'--\s*|.*$', Comment.Doc),
54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
55 (r'\{-', Comment.Multiline, 'comment'),
56 # Lexemes:
57 # Identifiers
58 (r'\bimport\b', Keyword.Reserved, 'import'),
59 (r'\bmodule\b', Keyword.Reserved, 'module'),
60 (r'\berror\b', Name.Exception),
61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
62 (r"'[^\\]'", String.Char), # this has to come before the TH quote
63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
64 (r"'?[_" + uni.Ll + r"][\w']*", Name),
65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
66 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
67 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
68 (r"(')\([^)]*\)", Keyword.Type), # ..
69 # Operators
70 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
71 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
72 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
73 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
74 # Numbers
75 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
76 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
77 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
78 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
79 (r'0[bB]_*[01](_*[01])*', Number.Bin),
80 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
81 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
82 (r'\d(_*\d)*', Number.Integer),
83 # Character/String Literals
84 (r"'", String.Char, 'character'),
85 (r'"', String, 'string'),
86 # Special
87 (r'\[\]', Keyword.Type),
88 (r'\(\)', Name.Builtin),
89 (r'[][(),;`{}]', Punctuation),
90 ],
91 'import': [
92 # Import statements
93 (r'\s+', Text),
94 (r'"', String, 'string'),
95 # after "funclist" state
96 (r'\)', Punctuation, '#pop'),
97 (r'qualified\b', Keyword),
98 # import X as Y
99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
100 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
101 # import X hiding (functions)
102 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
103 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
104 # import X (functions)
105 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
106 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
107 # import X
108 (r'[\w.]+', Name.Namespace, '#pop'),
109 ],
110 'module': [
111 (r'\s+', Text),
112 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
113 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
114 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
115 ],
116 'funclist': [
117 (r'\s+', Text),
118 (r'[' + uni.Lu + r']\w*', Keyword.Type),
119 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
120 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
121 (r'\{-', Comment.Multiline, 'comment'),
122 (r',', Punctuation),
123 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
124 # (HACK, but it makes sense to push two instances, believe me)
125 (r'\(', Punctuation, ('funclist', 'funclist')),
126 (r'\)', Punctuation, '#pop:2'),
127 ],
128 # NOTE: the next four states are shared in the AgdaLexer; make sure
129 # any change is compatible with Agda as well or copy over and change
130 'comment': [
131 # Multiline Comments
132 (r'[^-{}]+', Comment.Multiline),
133 (r'\{-', Comment.Multiline, '#push'),
134 (r'-\}', Comment.Multiline, '#pop'),
135 (r'[-{}]', Comment.Multiline),
136 ],
137 'character': [
138 # Allows multi-chars, incorrectly.
139 (r"[^\\']'", String.Char, '#pop'),
140 (r"\\", String.Escape, 'escape'),
141 ("'", String.Char, '#pop'),
142 ],
143 'string': [
144 (r'[^\\"]+', String),
145 (r"\\", String.Escape, 'escape'),
146 ('"', String, '#pop'),
147 ],
148 'escape': [
149 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
150 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
151 ('|'.join(ascii), String.Escape, '#pop'),
152 (r'o[0-7]+', String.Escape, '#pop'),
153 (r'x[\da-fA-F]+', String.Escape, '#pop'),
154 (r'\d+', String.Escape, '#pop'),
155 (r'\s+\\', String.Escape, '#pop'),
156 ],
157 }
158
159
160 class IdrisLexer(RegexLexer):
161 """
162 A lexer for the dependently typed programming language Idris.
163
164 Based on the Haskell and Agda Lexer.
165
166 .. versionadded:: 2.0
167 """
168 name = 'Idris'
169 aliases = ['idris', 'idr']
170 filenames = ['*.idr']
171 mimetypes = ['text/x-idris']
172
173 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
174 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
175 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
176 'total', 'partial',
177 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
178 'pattern', 'term', 'syntax', 'prefix',
179 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
180 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
181
182 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
183 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
184 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
185 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
186
187 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
188 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
189
190 tokens = {
191 'root': [
192 # Comments
193 (r'^(\s*)(%%%s)' % '|'.join(directives),
194 bygroups(Text, Keyword.Reserved)),
195 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
196 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
197 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
198 # Declaration
199 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
200 bygroups(Text, Name.Function, Text, Operator.Word, Text)),
201 # Identifiers
202 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
203 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
204 (r"('')?[A-Z][\w\']*", Keyword.Type),
205 (r'[a-z][\w\']*', Text),
206 # Special Symbols
207 (r'(<-|::|->|=>|=)', Operator.Word), # specials
208 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
209 # Numbers
210 (r'\d+[eE][+-]?\d+', Number.Float),
211 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
212 (r'0[xX][\da-fA-F]+', Number.Hex),
213 (r'\d+', Number.Integer),
214 # Strings
215 (r"'", String.Char, 'character'),
216 (r'"', String, 'string'),
217 (r'[^\s(){}]+', Text),
218 (r'\s+?', Text), # Whitespace
219 ],
220 'module': [
221 (r'\s+', Text),
222 (r'([A-Z][\w.]*)(\s+)(\()',
223 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
224 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
225 ],
226 'funclist': [
227 (r'\s+', Text),
228 (r'[A-Z]\w*', Keyword.Type),
229 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
230 (r'--.*$', Comment.Single),
231 (r'\{-', Comment.Multiline, 'comment'),
232 (r',', Punctuation),
233 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
234 # (HACK, but it makes sense to push two instances, believe me)
235 (r'\(', Punctuation, ('funclist', 'funclist')),
236 (r'\)', Punctuation, '#pop:2'),
237 ],
238 # NOTE: the next four states are shared in the AgdaLexer; make sure
239 # any change is compatible with Agda as well or copy over and change
240 'comment': [
241 # Multiline Comments
242 (r'[^-{}]+', Comment.Multiline),
243 (r'\{-', Comment.Multiline, '#push'),
244 (r'-\}', Comment.Multiline, '#pop'),
245 (r'[-{}]', Comment.Multiline),
246 ],
247 'character': [
248 # Allows multi-chars, incorrectly.
249 (r"[^\\']", String.Char),
250 (r"\\", String.Escape, 'escape'),
251 ("'", String.Char, '#pop'),
252 ],
253 'string': [
254 (r'[^\\"]+', String),
255 (r"\\", String.Escape, 'escape'),
256 ('"', String, '#pop'),
257 ],
258 'escape': [
259 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
260 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
261 ('|'.join(ascii), String.Escape, '#pop'),
262 (r'o[0-7]+', String.Escape, '#pop'),
263 (r'x[\da-fA-F]+', String.Escape, '#pop'),
264 (r'\d+', String.Escape, '#pop'),
265 (r'\s+\\', String.Escape, '#pop')
266 ],
267 }
268
269
270 class AgdaLexer(RegexLexer):
271 """
272 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
273 dependently typed functional programming language and proof assistant.
274
275 .. versionadded:: 2.0
276 """
277
278 name = 'Agda'
279 aliases = ['agda']
280 filenames = ['*.agda']
281 mimetypes = ['text/x-agda']
282
283 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
284 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
285 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
286 'pattern', 'postulate', 'primitive', 'private',
287 'quote', 'quoteGoal', 'quoteTerm',
288 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
289 'unquote', 'unquoteDecl', 'using', 'where', 'with']
290
291 tokens = {
292 'root': [
293 # Declaration
294 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
295 bygroups(Text, Name.Function, Text, Operator.Word, Text)),
296 # Comments
297 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
298 (r'\{-', Comment.Multiline, 'comment'),
299 # Holes
300 (r'\{!', Comment.Directive, 'hole'),
301 # Lexemes:
302 # Identifiers
303 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
304 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
305 (r'\b(Set|Prop)\b', Keyword.Type),
306 # Special Symbols
307 (r'(\(|\)|\{|\})', Operator),
308 (u'(\\.{1,3}|\\||\u039B|\u2200|\u2192|:|=|->)', Operator.Word),
309 # Numbers
310 (r'\d+[eE][+-]?\d+', Number.Float),
311 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
312 (r'0[xX][\da-fA-F]+', Number.Hex),
313 (r'\d+', Number.Integer),
314 # Strings
315 (r"'", String.Char, 'character'),
316 (r'"', String, 'string'),
317 (r'[^\s(){}]+', Text),
318 (r'\s+?', Text), # Whitespace
319 ],
320 'hole': [
321 # Holes
322 (r'[^!{}]+', Comment.Directive),
323 (r'\{!', Comment.Directive, '#push'),
324 (r'!\}', Comment.Directive, '#pop'),
325 (r'[!{}]', Comment.Directive),
326 ],
327 'module': [
328 (r'\{-', Comment.Multiline, 'comment'),
329 (r'[a-zA-Z][\w.]*', Name, '#pop'),
330 (r'[\W0-9_]+', Text)
331 ],
332 'comment': HaskellLexer.tokens['comment'],
333 'character': HaskellLexer.tokens['character'],
334 'string': HaskellLexer.tokens['string'],
335 'escape': HaskellLexer.tokens['escape']
336 }
337
338
339 class CryptolLexer(RegexLexer):
340 """
341 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
342
343 .. versionadded:: 2.0
344 """
345 name = 'Cryptol'
346 aliases = ['cryptol', 'cry']
347 filenames = ['*.cry']
348 mimetypes = ['text/x-cryptol']
349
350 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
351 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
352 'max', 'min', 'module', 'newtype', 'pragma', 'property',
353 'then', 'type', 'where', 'width')
354 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
355 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
356 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
357 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
358
359 tokens = {
360 'root': [
361 # Whitespace:
362 (r'\s+', Text),
363 # (r'--\s*|.*$', Comment.Doc),
364 (r'//.*$', Comment.Single),
365 (r'/\*', Comment.Multiline, 'comment'),
366 # Lexemes:
367 # Identifiers
368 (r'\bimport\b', Keyword.Reserved, 'import'),
369 (r'\bmodule\b', Keyword.Reserved, 'module'),
370 (r'\berror\b', Name.Exception),
371 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
372 (r'^[_a-z][\w\']*', Name.Function),
373 (r"'?[_a-z][\w']*", Name),
374 (r"('')?[A-Z][\w\']*", Keyword.Type),
375 # Operators
376 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
377 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
378 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
379 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
380 # Numbers
381 (r'\d+[eE][+-]?\d+', Number.Float),
382 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
383 (r'0[oO][0-7]+', Number.Oct),
384 (r'0[xX][\da-fA-F]+', Number.Hex),
385 (r'\d+', Number.Integer),
386 # Character/String Literals
387 (r"'", String.Char, 'character'),
388 (r'"', String, 'string'),
389 # Special
390 (r'\[\]', Keyword.Type),
391 (r'\(\)', Name.Builtin),
392 (r'[][(),;`{}]', Punctuation),
393 ],
394 'import': [
395 # Import statements
396 (r'\s+', Text),
397 (r'"', String, 'string'),
398 # after "funclist" state
399 (r'\)', Punctuation, '#pop'),
400 (r'qualified\b', Keyword),
401 # import X as Y
402 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
403 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
404 # import X hiding (functions)
405 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
406 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
407 # import X (functions)
408 (r'([A-Z][\w.]*)(\s+)(\()',
409 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
410 # import X
411 (r'[\w.]+', Name.Namespace, '#pop'),
412 ],
413 'module': [
414 (r'\s+', Text),
415 (r'([A-Z][\w.]*)(\s+)(\()',
416 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
417 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
418 ],
419 'funclist': [
420 (r'\s+', Text),
421 (r'[A-Z]\w*', Keyword.Type),
422 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
423 # TODO: these don't match the comments in docs, remove.
424 #(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
425 #(r'{-', Comment.Multiline, 'comment'),
426 (r',', Punctuation),
427 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
428 # (HACK, but it makes sense to push two instances, believe me)
429 (r'\(', Punctuation, ('funclist', 'funclist')),
430 (r'\)', Punctuation, '#pop:2'),
431 ],
432 'comment': [
433 # Multiline Comments
434 (r'[^/*]+', Comment.Multiline),
435 (r'/\*', Comment.Multiline, '#push'),
436 (r'\*/', Comment.Multiline, '#pop'),
437 (r'[*/]', Comment.Multiline),
438 ],
439 'character': [
440 # Allows multi-chars, incorrectly.
441 (r"[^\\']'", String.Char, '#pop'),
442 (r"\\", String.Escape, 'escape'),
443 ("'", String.Char, '#pop'),
444 ],
445 'string': [
446 (r'[^\\"]+', String),
447 (r"\\", String.Escape, 'escape'),
448 ('"', String, '#pop'),
449 ],
450 'escape': [
451 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
452 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
453 ('|'.join(ascii), String.Escape, '#pop'),
454 (r'o[0-7]+', String.Escape, '#pop'),
455 (r'x[\da-fA-F]+', String.Escape, '#pop'),
456 (r'\d+', String.Escape, '#pop'),
457 (r'\s+\\', String.Escape, '#pop'),
458 ],
459 }
460
461 EXTRA_KEYWORDS = set(('join', 'split', 'reverse', 'transpose', 'width',
462 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
463 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
464 'trace'))
465
466 def get_tokens_unprocessed(self, text):
467 stack = ['root']
468 for index, token, value in \
469 RegexLexer.get_tokens_unprocessed(self, text, stack):
470 if token is Name and value in self.EXTRA_KEYWORDS:
471 yield index, Name.Builtin, value
472 else:
473 yield index, token, value
474
475
476 class LiterateLexer(Lexer):
477 """
478 Base class for lexers of literate file formats based on LaTeX or Bird-style
479 (prefixing each code line with ">").
480
481 Additional options accepted:
482
483 `litstyle`
484 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
485 is autodetected: if the first non-whitespace character in the source
486 is a backslash or percent character, LaTeX is assumed, else Bird.
487 """
488
489 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
490
491 def __init__(self, baselexer, **options):
492 self.baselexer = baselexer
493 Lexer.__init__(self, **options)
494
495 def get_tokens_unprocessed(self, text):
496 style = self.options.get('litstyle')
497 if style is None:
498 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
499
500 code = ''
501 insertions = []
502 if style == 'bird':
503 # bird-style
504 for match in line_re.finditer(text):
505 line = match.group()
506 m = self.bird_re.match(line)
507 if m:
508 insertions.append((len(code),
509 [(0, Comment.Special, m.group(1))]))
510 code += m.group(2)
511 else:
512 insertions.append((len(code), [(0, Text, line)]))
513 else:
514 # latex-style
515 from pygments.lexers.markup import TexLexer
516 lxlexer = TexLexer(**self.options)
517 codelines = 0
518 latex = ''
519 for match in line_re.finditer(text):
520 line = match.group()
521 if codelines:
522 if line.lstrip().startswith('\\end{code}'):
523 codelines = 0
524 latex += line
525 else:
526 code += line
527 elif line.lstrip().startswith('\\begin{code}'):
528 codelines = 1
529 latex += line
530 insertions.append((len(code),
531 list(lxlexer.get_tokens_unprocessed(latex))))
532 latex = ''
533 else:
534 latex += line
535 insertions.append((len(code),
536 list(lxlexer.get_tokens_unprocessed(latex))))
537 for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
538 yield item
539
540
541 class LiterateHaskellLexer(LiterateLexer):
542 """
543 For Literate Haskell (Bird-style or LaTeX) source.
544
545 Additional options accepted:
546
547 `litstyle`
548 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
549 is autodetected: if the first non-whitespace character in the source
550 is a backslash or percent character, LaTeX is assumed, else Bird.
551
552 .. versionadded:: 0.9
553 """
554 name = 'Literate Haskell'
555 aliases = ['lhs', 'literate-haskell', 'lhaskell']
556 filenames = ['*.lhs']
557 mimetypes = ['text/x-literate-haskell']
558
559 def __init__(self, **options):
560 hslexer = HaskellLexer(**options)
561 LiterateLexer.__init__(self, hslexer, **options)
562
563
564 class LiterateIdrisLexer(LiterateLexer):
565 """
566 For Literate Idris (Bird-style or LaTeX) source.
567
568 Additional options accepted:
569
570 `litstyle`
571 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
572 is autodetected: if the first non-whitespace character in the source
573 is a backslash or percent character, LaTeX is assumed, else Bird.
574
575 .. versionadded:: 2.0
576 """
577 name = 'Literate Idris'
578 aliases = ['lidr', 'literate-idris', 'lidris']
579 filenames = ['*.lidr']
580 mimetypes = ['text/x-literate-idris']
581
582 def __init__(self, **options):
583 hslexer = IdrisLexer(**options)
584 LiterateLexer.__init__(self, hslexer, **options)
585
586
587 class LiterateAgdaLexer(LiterateLexer):
588 """
589 For Literate Agda source.
590
591 Additional options accepted:
592
593 `litstyle`
594 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
595 is autodetected: if the first non-whitespace character in the source
596 is a backslash or percent character, LaTeX is assumed, else Bird.
597
598 .. versionadded:: 2.0
599 """
600 name = 'Literate Agda'
601 aliases = ['lagda', 'literate-agda']
602 filenames = ['*.lagda']
603 mimetypes = ['text/x-literate-agda']
604
605 def __init__(self, **options):
606 agdalexer = AgdaLexer(**options)
607 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
608
609
610 class LiterateCryptolLexer(LiterateLexer):
611 """
612 For Literate Cryptol (Bird-style or LaTeX) source.
613
614 Additional options accepted:
615
616 `litstyle`
617 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
618 is autodetected: if the first non-whitespace character in the source
619 is a backslash or percent character, LaTeX is assumed, else Bird.
620
621 .. versionadded:: 2.0
622 """
623 name = 'Literate Cryptol'
624 aliases = ['lcry', 'literate-cryptol', 'lcryptol']
625 filenames = ['*.lcry']
626 mimetypes = ['text/x-literate-cryptol']
627
628 def __init__(self, **options):
629 crylexer = CryptolLexer(**options)
630 LiterateLexer.__init__(self, crylexer, **options)
631
632
633 class KokaLexer(RegexLexer):
634 """
635 Lexer for the `Koka <http://koka.codeplex.com>`_
636 language.
637
638 .. versionadded:: 1.6
639 """
640
641 name = 'Koka'
642 aliases = ['koka']
643 filenames = ['*.kk', '*.kki']
644 mimetypes = ['text/x-koka']
645
646 keywords = [
647 'infix', 'infixr', 'infixl',
648 'type', 'cotype', 'rectype', 'alias',
649 'struct', 'con',
650 'fun', 'function', 'val', 'var',
651 'external',
652 'if', 'then', 'else', 'elif', 'return', 'match',
653 'private', 'public', 'private',
654 'module', 'import', 'as',
655 'include', 'inline',
656 'rec',
657 'try', 'yield', 'enum',
658 'interface', 'instance',
659 ]
660
661 # keywords that are followed by a type
662 typeStartKeywords = [
663 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
664 ]
665
666 # keywords valid in a type
667 typekeywords = [
668 'forall', 'exists', 'some', 'with',
669 ]
670
671 # builtin names and special names
672 builtin = [
673 'for', 'while', 'repeat',
674 'foreach', 'foreach-indexed',
675 'error', 'catch', 'finally',
676 'cs', 'js', 'file', 'ref', 'assigned',
677 ]
678
679 # symbols that can be in an operator
680 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
681
682 # symbol boundary: an operator keyword should not be followed by any of these
683 sboundary = '(?!' + symbols + ')'
684
685 # name boundary: a keyword should not be followed by any of these
686 boundary = r'(?![\w/])'
687
688 # koka token abstractions
689 tokenType = Name.Attribute
690 tokenTypeDef = Name.Class
691 tokenConstructor = Generic.Emph
692
693 # main lexer
694 tokens = {
695 'root': [
696 include('whitespace'),
697
698 # go into type mode
699 (r'::?' + sboundary, tokenType, 'type'),
700 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
701 'alias-type'),
702 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
703 'struct-type'),
704 ((r'(%s)' % '|'.join(typeStartKeywords)) +
705 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
706 'type'),
707
708 # special sequences of tokens (we use ?: for non-capturing group as
709 # required by 'bygroups')
710 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
711 bygroups(Keyword, Text, Keyword, Name.Namespace)),
712 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
713 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
714 r'((?:[a-z]\w*/)*[a-z]\w*))?',
715 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
716 Keyword, Name.Namespace)),
717
718 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
719 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
720 bygroups(Keyword, Text, Name.Function)),
721 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
722 r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
723 bygroups(Keyword, Text, Keyword, Name.Function)),
724
725 # keywords
726 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
727 (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
728 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
729 (r'::?|:=|\->|[=.]' + sboundary, Keyword),
730
731 # names
732 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
733 bygroups(Name.Namespace, tokenConstructor)),
734 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
735 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
736 bygroups(Name.Namespace, Name)),
737 (r'_\w*', Name.Variable),
738
739 # literal string
740 (r'@"', String.Double, 'litstring'),
741
742 # operators
743 (symbols + "|/(?![*/])", Operator),
744 (r'`', Operator),
745 (r'[{}()\[\];,]', Punctuation),
746
747 # literals. No check for literal characters with len > 1
748 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
749 (r'0[xX][0-9a-fA-F]+', Number.Hex),
750 (r'[0-9]+', Number.Integer),
751
752 (r"'", String.Char, 'char'),
753 (r'"', String.Double, 'string'),
754 ],
755
756 # type started by alias
757 'alias-type': [
758 (r'=', Keyword),
759 include('type')
760 ],
761
762 # type started by struct
763 'struct-type': [
764 (r'(?=\((?!,*\)))', Punctuation, '#pop'),
765 include('type')
766 ],
767
768 # type started by colon
769 'type': [
770 (r'[(\[<]', tokenType, 'type-nested'),
771 include('type-content')
772 ],
773
774 # type nested in brackets: can contain parameters, comma etc.
775 'type-nested': [
776 (r'[)\]>]', tokenType, '#pop'),
777 (r'[(\[<]', tokenType, 'type-nested'),
778 (r',', tokenType),
779 (r'([a-z]\w*)(\s*)(:)(?!:)',
780 bygroups(Name, Text, tokenType)), # parameter name
781 include('type-content')
782 ],
783
784 # shared contents of a type
785 'type-content': [
786 include('whitespace'),
787
788 # keywords
789 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
790 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
791 Keyword, '#pop'), # need to match because names overlap...
792
793 # kinds
794 (r'[EPHVX]' + boundary, tokenType),
795
796 # type names
797 (r'[a-z][0-9]*(?![\w/])', tokenType),
798 (r'_\w*', tokenType.Variable), # Generic.Emph
799 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
800 bygroups(Name.Namespace, tokenType)),
801 (r'((?:[a-z]\w*/)*)([a-z]\w+)',
802 bygroups(Name.Namespace, tokenType)),
803
804 # type keyword operators
805 (r'::|->|[.:|]', tokenType),
806
807 # catchall
808 default('#pop')
809 ],
810
811 # comments and literals
812 'whitespace': [
813 (r'\n\s*#.*$', Comment.Preproc),
814 (r'\s+', Text),
815 (r'/\*', Comment.Multiline, 'comment'),
816 (r'//.*$', Comment.Single)
817 ],
818 'comment': [
819 (r'[^/*]+', Comment.Multiline),
820 (r'/\*', Comment.Multiline, '#push'),
821 (r'\*/', Comment.Multiline, '#pop'),
822 (r'[*/]', Comment.Multiline),
823 ],
824 'litstring': [
825 (r'[^"]+', String.Double),
826 (r'""', String.Escape),
827 (r'"', String.Double, '#pop'),
828 ],
829 'string': [
830 (r'[^\\"\n]+', String.Double),
831 include('escape-sequence'),
832 (r'["\n]', String.Double, '#pop'),
833 ],
834 'char': [
835 (r'[^\\\'\n]+', String.Char),
836 include('escape-sequence'),
837 (r'[\'\n]', String.Char, '#pop'),
838 ],
839 'escape-sequence': [
840 (r'\\[nrt\\"\']', String.Escape),
841 (r'\\x[0-9a-fA-F]{2}', String.Escape),
842 (r'\\u[0-9a-fA-F]{4}', String.Escape),
843 # Yes, \U literals are 6 hex digits.
844 (r'\\U[0-9a-fA-F]{6}', String.Escape)
845 ]
846 }

eric ide

mercurial