eric6/ThirdParty/Pygments/pygments/lexers/haskell.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.haskell
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Haskell and related languages.
7
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
15 default, include, inherit
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic
18 from pygments import unistring as uni
19
20 __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer',
22 'LiterateCryptolLexer', 'KokaLexer']
23
24
25 line_re = re.compile('.*?\n')
26
27
28 class HaskellLexer(RegexLexer):
29 """
30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
31
32 .. versionadded:: 0.8
33 """
34 name = 'Haskell'
35 aliases = ['haskell', 'hs']
36 filenames = ['*.hs']
37 mimetypes = ['text/x-haskell']
38
39 flags = re.MULTILINE | re.UNICODE
40
41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else',
42 'family', 'if', 'in', 'infix[lr]?', 'instance',
43 'let', 'newtype', 'of', 'then', 'type', 'where', '_')
44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
48
49 tokens = {
50 'root': [
51 # Whitespace:
52 (r'\s+', Text),
53 # (r'--\s*|.*$', Comment.Doc),
54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
55 (r'\{-', Comment.Multiline, 'comment'),
56 # Lexemes:
57 # Identifiers
58 (r'\bimport\b', Keyword.Reserved, 'import'),
59 (r'\bmodule\b', Keyword.Reserved, 'module'),
60 (r'\berror\b', Name.Exception),
61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
62 (r"'[^\\]'", String.Char), # this has to come before the TH quote
63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function),
64 (r"'?[_" + uni.Ll + r"][\w']*", Name),
65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type),
66 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type),
67 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC
68 (r"(')\([^)]*\)", Keyword.Type), # ..
69 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators
70 # Operators
71 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
72 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
73 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
74 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
75 # Numbers
76 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float),
77 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*'
78 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float),
79 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float),
80 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float),
81 (r'0[bB]_*[01](_*[01])*', Number.Bin),
82 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct),
83 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex),
84 (r'\d(_*\d)*', Number.Integer),
85 # Character/String Literals
86 (r"'", String.Char, 'character'),
87 (r'"', String, 'string'),
88 # Special
89 (r'\[\]', Keyword.Type),
90 (r'\(\)', Name.Builtin),
91 (r'[][(),;`{}]', Punctuation),
92 ],
93 'import': [
94 # Import statements
95 (r'\s+', Text),
96 (r'"', String, 'string'),
97 # after "funclist" state
98 (r'\)', Punctuation, '#pop'),
99 (r'qualified\b', Keyword),
100 # import X as Y
101 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
102 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
103 # import X hiding (functions)
104 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
105 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
106 # import X (functions)
107 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
108 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
109 # import X
110 (r'[\w.]+', Name.Namespace, '#pop'),
111 ],
112 'module': [
113 (r'\s+', Text),
114 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
115 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
116 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
117 ],
118 'funclist': [
119 (r'\s+', Text),
120 (r'[' + uni.Lu + r']\w*', Keyword.Type),
121 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
122 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
123 (r'\{-', Comment.Multiline, 'comment'),
124 (r',', Punctuation),
125 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
126 # (HACK, but it makes sense to push two instances, believe me)
127 (r'\(', Punctuation, ('funclist', 'funclist')),
128 (r'\)', Punctuation, '#pop:2'),
129 ],
130 # NOTE: the next four states are shared in the AgdaLexer; make sure
131 # any change is compatible with Agda as well or copy over and change
132 'comment': [
133 # Multiline Comments
134 (r'[^-{}]+', Comment.Multiline),
135 (r'\{-', Comment.Multiline, '#push'),
136 (r'-\}', Comment.Multiline, '#pop'),
137 (r'[-{}]', Comment.Multiline),
138 ],
139 'character': [
140 # Allows multi-chars, incorrectly.
141 (r"[^\\']'", String.Char, '#pop'),
142 (r"\\", String.Escape, 'escape'),
143 ("'", String.Char, '#pop'),
144 ],
145 'string': [
146 (r'[^\\"]+', String),
147 (r"\\", String.Escape, 'escape'),
148 ('"', String, '#pop'),
149 ],
150 'escape': [
151 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
152 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'),
153 ('|'.join(ascii), String.Escape, '#pop'),
154 (r'o[0-7]+', String.Escape, '#pop'),
155 (r'x[\da-fA-F]+', String.Escape, '#pop'),
156 (r'\d+', String.Escape, '#pop'),
157 (r'\s+\\', String.Escape, '#pop'),
158 ],
159 }
160
161
162 class HspecLexer(HaskellLexer):
163 """
164 A Haskell lexer with support for Hspec constructs.
165
166 .. versionadded:: 2.4.0
167 """
168
169 name = 'Hspec'
170 aliases = ['hspec']
171 filenames = []
172 mimetypes = []
173
174 tokens = {
175 'root': [
176 (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
177 (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
178 (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
179 inherit,
180 ],
181 }
182
183
184 class IdrisLexer(RegexLexer):
185 """
186 A lexer for the dependently typed programming language Idris.
187
188 Based on the Haskell and Agda Lexer.
189
190 .. versionadded:: 2.0
191 """
192 name = 'Idris'
193 aliases = ['idris', 'idr']
194 filenames = ['*.idr']
195 mimetypes = ['text/x-idris']
196
197 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else',
198 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto',
199 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract',
200 'total', 'partial',
201 'interface', 'implementation', 'export', 'covering', 'constructor',
202 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with',
203 'pattern', 'term', 'syntax', 'prefix',
204 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit',
205 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial')
206
207 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
208 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
209 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
210 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
211
212 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access',
213 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language')
214
215 tokens = {
216 'root': [
217 # Comments
218 (r'^(\s*)(%%(%s))' % '|'.join(directives),
219 bygroups(Text, Keyword.Reserved)),
220 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
221 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
222 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
223 # Declaration
224 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
225 bygroups(Text, Name.Function, Text, Operator.Word, Text)),
226 # Identifiers
227 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
228 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
229 (r"('')?[A-Z][\w\']*", Keyword.Type),
230 (r'[a-z][\w\']*', Text),
231 # Special Symbols
232 (r'(<-|::|->|=>|=)', Operator.Word), # specials
233 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
234 # Numbers
235 (r'\d+[eE][+-]?\d+', Number.Float),
236 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
237 (r'0[xX][\da-fA-F]+', Number.Hex),
238 (r'\d+', Number.Integer),
239 # Strings
240 (r"'", String.Char, 'character'),
241 (r'"', String, 'string'),
242 (r'[^\s(){}]+', Text),
243 (r'\s+?', Text), # Whitespace
244 ],
245 'module': [
246 (r'\s+', Text),
247 (r'([A-Z][\w.]*)(\s+)(\()',
248 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
249 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
250 ],
251 'funclist': [
252 (r'\s+', Text),
253 (r'[A-Z]\w*', Keyword.Type),
254 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
255 (r'--.*$', Comment.Single),
256 (r'\{-', Comment.Multiline, 'comment'),
257 (r',', Punctuation),
258 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
259 # (HACK, but it makes sense to push two instances, believe me)
260 (r'\(', Punctuation, ('funclist', 'funclist')),
261 (r'\)', Punctuation, '#pop:2'),
262 ],
263 # NOTE: the next four states are shared in the AgdaLexer; make sure
264 # any change is compatible with Agda as well or copy over and change
265 'comment': [
266 # Multiline Comments
267 (r'[^-{}]+', Comment.Multiline),
268 (r'\{-', Comment.Multiline, '#push'),
269 (r'-\}', Comment.Multiline, '#pop'),
270 (r'[-{}]', Comment.Multiline),
271 ],
272 'character': [
273 # Allows multi-chars, incorrectly.
274 (r"[^\\']", String.Char),
275 (r"\\", String.Escape, 'escape'),
276 ("'", String.Char, '#pop'),
277 ],
278 'string': [
279 (r'[^\\"]+', String),
280 (r"\\", String.Escape, 'escape'),
281 ('"', String, '#pop'),
282 ],
283 'escape': [
284 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
285 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
286 ('|'.join(ascii), String.Escape, '#pop'),
287 (r'o[0-7]+', String.Escape, '#pop'),
288 (r'x[\da-fA-F]+', String.Escape, '#pop'),
289 (r'\d+', String.Escape, '#pop'),
290 (r'\s+\\', String.Escape, '#pop')
291 ],
292 }
293
294
295 class AgdaLexer(RegexLexer):
296 """
297 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
298 dependently typed functional programming language and proof assistant.
299
300 .. versionadded:: 2.0
301 """
302
303 name = 'Agda'
304 aliases = ['agda']
305 filenames = ['*.agda']
306 mimetypes = ['text/x-agda']
307
308 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data',
309 'field', 'forall', 'hiding', 'in', 'inductive', 'infix',
310 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open',
311 'pattern', 'postulate', 'primitive', 'private',
312 'quote', 'quoteGoal', 'quoteTerm',
313 'record', 'renaming', 'rewrite', 'syntax', 'tactic',
314 'unquote', 'unquoteDecl', 'using', 'where', 'with']
315
316 tokens = {
317 'root': [
318 # Declaration
319 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
320 bygroups(Text, Name.Function, Text, Operator.Word, Text)),
321 # Comments
322 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
323 (r'\{-', Comment.Multiline, 'comment'),
324 # Holes
325 (r'\{!', Comment.Directive, 'hole'),
326 # Lexemes:
327 # Identifiers
328 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
329 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
330 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
331 # Special Symbols
332 (r'(\(|\)|\{|\})', Operator),
333 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word),
334 # Numbers
335 (r'\d+[eE][+-]?\d+', Number.Float),
336 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
337 (r'0[xX][\da-fA-F]+', Number.Hex),
338 (r'\d+', Number.Integer),
339 # Strings
340 (r"'", String.Char, 'character'),
341 (r'"', String, 'string'),
342 (r'[^\s(){}]+', Text),
343 (r'\s+?', Text), # Whitespace
344 ],
345 'hole': [
346 # Holes
347 (r'[^!{}]+', Comment.Directive),
348 (r'\{!', Comment.Directive, '#push'),
349 (r'!\}', Comment.Directive, '#pop'),
350 (r'[!{}]', Comment.Directive),
351 ],
352 'module': [
353 (r'\{-', Comment.Multiline, 'comment'),
354 (r'[a-zA-Z][\w.]*', Name, '#pop'),
355 (r'[\W0-9_]+', Text)
356 ],
357 'comment': HaskellLexer.tokens['comment'],
358 'character': HaskellLexer.tokens['character'],
359 'string': HaskellLexer.tokens['string'],
360 'escape': HaskellLexer.tokens['escape']
361 }
362
363
364 class CryptolLexer(RegexLexer):
365 """
366 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report.
367
368 .. versionadded:: 2.0
369 """
370 name = 'Cryptol'
371 aliases = ['cryptol', 'cry']
372 filenames = ['*.cry']
373 mimetypes = ['text/x-cryptol']
374
375 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else',
376 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2',
377 'max', 'min', 'module', 'newtype', 'pragma', 'property',
378 'then', 'type', 'where', 'width')
379 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK',
380 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE',
381 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN',
382 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL')
383
384 tokens = {
385 'root': [
386 # Whitespace:
387 (r'\s+', Text),
388 # (r'--\s*|.*$', Comment.Doc),
389 (r'//.*$', Comment.Single),
390 (r'/\*', Comment.Multiline, 'comment'),
391 # Lexemes:
392 # Identifiers
393 (r'\bimport\b', Keyword.Reserved, 'import'),
394 (r'\bmodule\b', Keyword.Reserved, 'module'),
395 (r'\berror\b', Name.Exception),
396 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
397 (r'^[_a-z][\w\']*', Name.Function),
398 (r"'?[_a-z][\w']*", Name),
399 (r"('')?[A-Z][\w\']*", Keyword.Type),
400 # Operators
401 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
402 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
403 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
404 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
405 # Numbers
406 (r'\d+[eE][+-]?\d+', Number.Float),
407 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
408 (r'0[oO][0-7]+', Number.Oct),
409 (r'0[xX][\da-fA-F]+', Number.Hex),
410 (r'\d+', Number.Integer),
411 # Character/String Literals
412 (r"'", String.Char, 'character'),
413 (r'"', String, 'string'),
414 # Special
415 (r'\[\]', Keyword.Type),
416 (r'\(\)', Name.Builtin),
417 (r'[][(),;`{}]', Punctuation),
418 ],
419 'import': [
420 # Import statements
421 (r'\s+', Text),
422 (r'"', String, 'string'),
423 # after "funclist" state
424 (r'\)', Punctuation, '#pop'),
425 (r'qualified\b', Keyword),
426 # import X as Y
427 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
428 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
429 # import X hiding (functions)
430 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
431 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
432 # import X (functions)
433 (r'([A-Z][\w.]*)(\s+)(\()',
434 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
435 # import X
436 (r'[\w.]+', Name.Namespace, '#pop'),
437 ],
438 'module': [
439 (r'\s+', Text),
440 (r'([A-Z][\w.]*)(\s+)(\()',
441 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
442 (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
443 ],
444 'funclist': [
445 (r'\s+', Text),
446 (r'[A-Z]\w*', Keyword.Type),
447 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
448 # TODO: these don't match the comments in docs, remove.
449 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
450 # (r'{-', Comment.Multiline, 'comment'),
451 (r',', Punctuation),
452 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
453 # (HACK, but it makes sense to push two instances, believe me)
454 (r'\(', Punctuation, ('funclist', 'funclist')),
455 (r'\)', Punctuation, '#pop:2'),
456 ],
457 'comment': [
458 # Multiline Comments
459 (r'[^/*]+', Comment.Multiline),
460 (r'/\*', Comment.Multiline, '#push'),
461 (r'\*/', Comment.Multiline, '#pop'),
462 (r'[*/]', Comment.Multiline),
463 ],
464 'character': [
465 # Allows multi-chars, incorrectly.
466 (r"[^\\']'", String.Char, '#pop'),
467 (r"\\", String.Escape, 'escape'),
468 ("'", String.Char, '#pop'),
469 ],
470 'string': [
471 (r'[^\\"]+', String),
472 (r"\\", String.Escape, 'escape'),
473 ('"', String, '#pop'),
474 ],
475 'escape': [
476 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
477 (r'\^[][A-Z@^_]', String.Escape, '#pop'),
478 ('|'.join(ascii), String.Escape, '#pop'),
479 (r'o[0-7]+', String.Escape, '#pop'),
480 (r'x[\da-fA-F]+', String.Escape, '#pop'),
481 (r'\d+', String.Escape, '#pop'),
482 (r'\s+\\', String.Escape, '#pop'),
483 ],
484 }
485
486 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width',
487 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const',
488 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error',
489 'trace'}
490
491 def get_tokens_unprocessed(self, text):
492 stack = ['root']
493 for index, token, value in \
494 RegexLexer.get_tokens_unprocessed(self, text, stack):
495 if token is Name and value in self.EXTRA_KEYWORDS:
496 yield index, Name.Builtin, value
497 else:
498 yield index, token, value
499
500
501 class LiterateLexer(Lexer):
502 """
503 Base class for lexers of literate file formats based on LaTeX or Bird-style
504 (prefixing each code line with ">").
505
506 Additional options accepted:
507
508 `litstyle`
509 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
510 is autodetected: if the first non-whitespace character in the source
511 is a backslash or percent character, LaTeX is assumed, else Bird.
512 """
513
514 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
515
516 def __init__(self, baselexer, **options):
517 self.baselexer = baselexer
518 Lexer.__init__(self, **options)
519
520 def get_tokens_unprocessed(self, text):
521 style = self.options.get('litstyle')
522 if style is None:
523 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
524
525 code = ''
526 insertions = []
527 if style == 'bird':
528 # bird-style
529 for match in line_re.finditer(text):
530 line = match.group()
531 m = self.bird_re.match(line)
532 if m:
533 insertions.append((len(code),
534 [(0, Comment.Special, m.group(1))]))
535 code += m.group(2)
536 else:
537 insertions.append((len(code), [(0, Text, line)]))
538 else:
539 # latex-style
540 from pygments.lexers.markup import TexLexer
541 lxlexer = TexLexer(**self.options)
542 codelines = 0
543 latex = ''
544 for match in line_re.finditer(text):
545 line = match.group()
546 if codelines:
547 if line.lstrip().startswith('\\end{code}'):
548 codelines = 0
549 latex += line
550 else:
551 code += line
552 elif line.lstrip().startswith('\\begin{code}'):
553 codelines = 1
554 latex += line
555 insertions.append((len(code),
556 list(lxlexer.get_tokens_unprocessed(latex))))
557 latex = ''
558 else:
559 latex += line
560 insertions.append((len(code),
561 list(lxlexer.get_tokens_unprocessed(latex))))
562 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code))
563
564
565 class LiterateHaskellLexer(LiterateLexer):
566 """
567 For Literate Haskell (Bird-style or LaTeX) source.
568
569 Additional options accepted:
570
571 `litstyle`
572 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
573 is autodetected: if the first non-whitespace character in the source
574 is a backslash or percent character, LaTeX is assumed, else Bird.
575
576 .. versionadded:: 0.9
577 """
578 name = 'Literate Haskell'
579 aliases = ['lhs', 'literate-haskell', 'lhaskell']
580 filenames = ['*.lhs']
581 mimetypes = ['text/x-literate-haskell']
582
583 def __init__(self, **options):
584 hslexer = HaskellLexer(**options)
585 LiterateLexer.__init__(self, hslexer, **options)
586
587
588 class LiterateIdrisLexer(LiterateLexer):
589 """
590 For Literate Idris (Bird-style or LaTeX) source.
591
592 Additional options accepted:
593
594 `litstyle`
595 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
596 is autodetected: if the first non-whitespace character in the source
597 is a backslash or percent character, LaTeX is assumed, else Bird.
598
599 .. versionadded:: 2.0
600 """
601 name = 'Literate Idris'
602 aliases = ['lidr', 'literate-idris', 'lidris']
603 filenames = ['*.lidr']
604 mimetypes = ['text/x-literate-idris']
605
606 def __init__(self, **options):
607 hslexer = IdrisLexer(**options)
608 LiterateLexer.__init__(self, hslexer, **options)
609
610
611 class LiterateAgdaLexer(LiterateLexer):
612 """
613 For Literate Agda source.
614
615 Additional options accepted:
616
617 `litstyle`
618 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
619 is autodetected: if the first non-whitespace character in the source
620 is a backslash or percent character, LaTeX is assumed, else Bird.
621
622 .. versionadded:: 2.0
623 """
624 name = 'Literate Agda'
625 aliases = ['lagda', 'literate-agda']
626 filenames = ['*.lagda']
627 mimetypes = ['text/x-literate-agda']
628
629 def __init__(self, **options):
630 agdalexer = AgdaLexer(**options)
631 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
632
633
634 class LiterateCryptolLexer(LiterateLexer):
635 """
636 For Literate Cryptol (Bird-style or LaTeX) source.
637
638 Additional options accepted:
639
640 `litstyle`
641 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
642 is autodetected: if the first non-whitespace character in the source
643 is a backslash or percent character, LaTeX is assumed, else Bird.
644
645 .. versionadded:: 2.0
646 """
647 name = 'Literate Cryptol'
648 aliases = ['lcry', 'literate-cryptol', 'lcryptol']
649 filenames = ['*.lcry']
650 mimetypes = ['text/x-literate-cryptol']
651
652 def __init__(self, **options):
653 crylexer = CryptolLexer(**options)
654 LiterateLexer.__init__(self, crylexer, **options)
655
656
657 class KokaLexer(RegexLexer):
658 """
659 Lexer for the `Koka <http://koka.codeplex.com>`_
660 language.
661
662 .. versionadded:: 1.6
663 """
664
665 name = 'Koka'
666 aliases = ['koka']
667 filenames = ['*.kk', '*.kki']
668 mimetypes = ['text/x-koka']
669
670 keywords = [
671 'infix', 'infixr', 'infixl',
672 'type', 'cotype', 'rectype', 'alias',
673 'struct', 'con',
674 'fun', 'function', 'val', 'var',
675 'external',
676 'if', 'then', 'else', 'elif', 'return', 'match',
677 'private', 'public', 'private',
678 'module', 'import', 'as',
679 'include', 'inline',
680 'rec',
681 'try', 'yield', 'enum',
682 'interface', 'instance',
683 ]
684
685 # keywords that are followed by a type
686 typeStartKeywords = [
687 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum',
688 ]
689
690 # keywords valid in a type
691 typekeywords = [
692 'forall', 'exists', 'some', 'with',
693 ]
694
695 # builtin names and special names
696 builtin = [
697 'for', 'while', 'repeat',
698 'foreach', 'foreach-indexed',
699 'error', 'catch', 'finally',
700 'cs', 'js', 'file', 'ref', 'assigned',
701 ]
702
703 # symbols that can be in an operator
704 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+'
705
706 # symbol boundary: an operator keyword should not be followed by any of these
707 sboundary = '(?!' + symbols + ')'
708
709 # name boundary: a keyword should not be followed by any of these
710 boundary = r'(?![\w/])'
711
712 # koka token abstractions
713 tokenType = Name.Attribute
714 tokenTypeDef = Name.Class
715 tokenConstructor = Generic.Emph
716
717 # main lexer
718 tokens = {
719 'root': [
720 include('whitespace'),
721
722 # go into type mode
723 (r'::?' + sboundary, tokenType, 'type'),
724 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
725 'alias-type'),
726 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
727 'struct-type'),
728 ((r'(%s)' % '|'.join(typeStartKeywords)) +
729 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
730 'type'),
731
732 # special sequences of tokens (we use ?: for non-capturing group as
733 # required by 'bygroups')
734 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
735 bygroups(Keyword, Text, Keyword, Name.Namespace)),
736 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
737 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
738 r'((?:[a-z]\w*/)*[a-z]\w*))?',
739 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
740 Keyword, Name.Namespace)),
741
742 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
743 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
744 bygroups(Keyword, Text, Name.Function)),
745 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
746 r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
747 bygroups(Keyword, Text, Keyword, Name.Function)),
748
749 # keywords
750 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
751 (r'(%s)' % '|'.join(keywords) + boundary, Keyword),
752 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo),
753 (r'::?|:=|\->|[=.]' + sboundary, Keyword),
754
755 # names
756 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
757 bygroups(Name.Namespace, tokenConstructor)),
758 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)),
759 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))',
760 bygroups(Name.Namespace, Name)),
761 (r'_\w*', Name.Variable),
762
763 # literal string
764 (r'@"', String.Double, 'litstring'),
765
766 # operators
767 (symbols + "|/(?![*/])", Operator),
768 (r'`', Operator),
769 (r'[{}()\[\];,]', Punctuation),
770
771 # literals. No check for literal characters with len > 1
772 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float),
773 (r'0[xX][0-9a-fA-F]+', Number.Hex),
774 (r'[0-9]+', Number.Integer),
775
776 (r"'", String.Char, 'char'),
777 (r'"', String.Double, 'string'),
778 ],
779
780 # type started by alias
781 'alias-type': [
782 (r'=', Keyword),
783 include('type')
784 ],
785
786 # type started by struct
787 'struct-type': [
788 (r'(?=\((?!,*\)))', Punctuation, '#pop'),
789 include('type')
790 ],
791
792 # type started by colon
793 'type': [
794 (r'[(\[<]', tokenType, 'type-nested'),
795 include('type-content')
796 ],
797
798 # type nested in brackets: can contain parameters, comma etc.
799 'type-nested': [
800 (r'[)\]>]', tokenType, '#pop'),
801 (r'[(\[<]', tokenType, 'type-nested'),
802 (r',', tokenType),
803 (r'([a-z]\w*)(\s*)(:)(?!:)',
804 bygroups(Name, Text, tokenType)), # parameter name
805 include('type-content')
806 ],
807
808 # shared contents of a type
809 'type-content': [
810 include('whitespace'),
811
812 # keywords
813 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword),
814 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))',
815 Keyword, '#pop'), # need to match because names overlap...
816
817 # kinds
818 (r'[EPHVX]' + boundary, tokenType),
819
820 # type names
821 (r'[a-z][0-9]*(?![\w/])', tokenType),
822 (r'_\w*', tokenType.Variable), # Generic.Emph
823 (r'((?:[a-z]\w*/)*)([A-Z]\w*)',
824 bygroups(Name.Namespace, tokenType)),
825 (r'((?:[a-z]\w*/)*)([a-z]\w+)',
826 bygroups(Name.Namespace, tokenType)),
827
828 # type keyword operators
829 (r'::|->|[.:|]', tokenType),
830
831 # catchall
832 default('#pop')
833 ],
834
835 # comments and literals
836 'whitespace': [
837 (r'\n\s*#.*$', Comment.Preproc),
838 (r'\s+', Text),
839 (r'/\*', Comment.Multiline, 'comment'),
840 (r'//.*$', Comment.Single)
841 ],
842 'comment': [
843 (r'[^/*]+', Comment.Multiline),
844 (r'/\*', Comment.Multiline, '#push'),
845 (r'\*/', Comment.Multiline, '#pop'),
846 (r'[*/]', Comment.Multiline),
847 ],
848 'litstring': [
849 (r'[^"]+', String.Double),
850 (r'""', String.Escape),
851 (r'"', String.Double, '#pop'),
852 ],
853 'string': [
854 (r'[^\\"\n]+', String.Double),
855 include('escape-sequence'),
856 (r'["\n]', String.Double, '#pop'),
857 ],
858 'char': [
859 (r'[^\\\'\n]+', String.Char),
860 include('escape-sequence'),
861 (r'[\'\n]', String.Char, '#pop'),
862 ],
863 'escape-sequence': [
864 (r'\\[nrt\\"\']', String.Escape),
865 (r'\\x[0-9a-fA-F]{2}', String.Escape),
866 (r'\\u[0-9a-fA-F]{4}', String.Escape),
867 # Yes, \U literals are 6 hex digits.
868 (r'\\U[0-9a-fA-F]{6}', String.Escape)
869 ]
870 }

eric ide

mercurial