|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.haskell |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for Haskell and related languages. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ |
|
15 default, include |
|
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
17 Number, Punctuation, Generic |
|
18 from pygments import unistring as uni |
|
19 |
|
20 __all__ = ['HaskellLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', |
|
21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', |
|
22 'LiterateCryptolLexer', 'KokaLexer'] |
|
23 |
|
24 |
|
25 line_re = re.compile('.*?\n') |
|
26 |
|
27 |
|
28 class HaskellLexer(RegexLexer): |
|
29 """ |
|
30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. |
|
31 |
|
32 .. versionadded:: 0.8 |
|
33 """ |
|
34 name = 'Haskell' |
|
35 aliases = ['haskell', 'hs'] |
|
36 filenames = ['*.hs'] |
|
37 mimetypes = ['text/x-haskell'] |
|
38 |
|
39 flags = re.MULTILINE | re.UNICODE |
|
40 |
|
41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', |
|
42 'if', 'in', 'infix[lr]?', 'instance', |
|
43 'let', 'newtype', 'of', 'then', 'type', 'where', '_') |
|
44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', |
|
45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', |
|
46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', |
|
47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') |
|
48 |
|
49 tokens = { |
|
50 'root': [ |
|
51 # Whitespace: |
|
52 (r'\s+', Text), |
|
53 # (r'--\s*|.*$', Comment.Doc), |
|
54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
55 (r'\{-', Comment.Multiline, 'comment'), |
|
56 # Lexemes: |
|
57 # Identifiers |
|
58 (r'\bimport\b', Keyword.Reserved, 'import'), |
|
59 (r'\bmodule\b', Keyword.Reserved, 'module'), |
|
60 (r'\berror\b', Name.Exception), |
|
61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
62 (r"'[^\\]'", String.Char), # this has to come before the TH quote |
|
63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), |
|
64 (r"'?[_" + uni.Ll + r"][\w']*", Name), |
|
65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), |
|
66 # Operators |
|
67 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator |
|
68 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
69 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators |
|
70 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators |
|
71 # Numbers |
|
72 (r'\d+[eE][+-]?\d+', Number.Float), |
|
73 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
74 (r'0[oO][0-7]+', Number.Oct), |
|
75 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
76 (r'\d+', Number.Integer), |
|
77 # Character/String Literals |
|
78 (r"'", String.Char, 'character'), |
|
79 (r'"', String, 'string'), |
|
80 # Special |
|
81 (r'\[\]', Keyword.Type), |
|
82 (r'\(\)', Name.Builtin), |
|
83 (r'[][(),;`{}]', Punctuation), |
|
84 ], |
|
85 'import': [ |
|
86 # Import statements |
|
87 (r'\s+', Text), |
|
88 (r'"', String, 'string'), |
|
89 # after "funclist" state |
|
90 (r'\)', Punctuation, '#pop'), |
|
91 (r'qualified\b', Keyword), |
|
92 # import X as Y |
|
93 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', |
|
94 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), |
|
95 # import X hiding (functions) |
|
96 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', |
|
97 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), |
|
98 # import X (functions) |
|
99 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', |
|
100 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
101 # import X |
|
102 (r'[\w.]+', Name.Namespace, '#pop'), |
|
103 ], |
|
104 'module': [ |
|
105 (r'\s+', Text), |
|
106 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', |
|
107 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
108 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), |
|
109 ], |
|
110 'funclist': [ |
|
111 (r'\s+', Text), |
|
112 (r'[' + uni.Lu + r']\w*', Keyword.Type), |
|
113 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), |
|
114 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
115 (r'\{-', Comment.Multiline, 'comment'), |
|
116 (r',', Punctuation), |
|
117 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
118 # (HACK, but it makes sense to push two instances, believe me) |
|
119 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
120 (r'\)', Punctuation, '#pop:2'), |
|
121 ], |
|
122 # NOTE: the next four states are shared in the AgdaLexer; make sure |
|
123 # any change is compatible with Agda as well or copy over and change |
|
124 'comment': [ |
|
125 # Multiline Comments |
|
126 (r'[^-{}]+', Comment.Multiline), |
|
127 (r'\{-', Comment.Multiline, '#push'), |
|
128 (r'-\}', Comment.Multiline, '#pop'), |
|
129 (r'[-{}]', Comment.Multiline), |
|
130 ], |
|
131 'character': [ |
|
132 # Allows multi-chars, incorrectly. |
|
133 (r"[^\\']'", String.Char, '#pop'), |
|
134 (r"\\", String.Escape, 'escape'), |
|
135 ("'", String.Char, '#pop'), |
|
136 ], |
|
137 'string': [ |
|
138 (r'[^\\"]+', String), |
|
139 (r"\\", String.Escape, 'escape'), |
|
140 ('"', String, '#pop'), |
|
141 ], |
|
142 'escape': [ |
|
143 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
144 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), |
|
145 ('|'.join(ascii), String.Escape, '#pop'), |
|
146 (r'o[0-7]+', String.Escape, '#pop'), |
|
147 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
148 (r'\d+', String.Escape, '#pop'), |
|
149 (r'\s+\\', String.Escape, '#pop'), |
|
150 ], |
|
151 } |
|
152 |
|
153 |
|
154 class IdrisLexer(RegexLexer): |
|
155 """ |
|
156 A lexer for the dependently typed programming language Idris. |
|
157 |
|
158 Based on the Haskell and Agda Lexer. |
|
159 |
|
160 .. versionadded:: 2.0 |
|
161 """ |
|
162 name = 'Idris' |
|
163 aliases = ['idris', 'idr'] |
|
164 filenames = ['*.idr'] |
|
165 mimetypes = ['text/x-idris'] |
|
166 |
|
167 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', |
|
168 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', |
|
169 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', |
|
170 'total', 'partial', |
|
171 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', |
|
172 'pattern', 'term', 'syntax', 'prefix', |
|
173 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', |
|
174 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') |
|
175 |
|
176 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', |
|
177 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', |
|
178 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', |
|
179 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') |
|
180 |
|
181 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', |
|
182 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') |
|
183 |
|
184 tokens = { |
|
185 'root': [ |
|
186 # Comments |
|
187 (r'^(\s*)(%%%s)' % '|'.join(directives), |
|
188 bygroups(Text, Keyword.Reserved)), |
|
189 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)), |
|
190 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)), |
|
191 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'), |
|
192 # Declaration |
|
193 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', |
|
194 bygroups(Text, Name.Function, Text, Operator.Word, Text)), |
|
195 # Identifiers |
|
196 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
197 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), |
|
198 (r"('')?[A-Z][\w\']*", Keyword.Type), |
|
199 (r'[a-z][\w\']*', Text), |
|
200 # Special Symbols |
|
201 (r'(<-|::|->|=>|=)', Operator.Word), # specials |
|
202 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
203 # Numbers |
|
204 (r'\d+[eE][+-]?\d+', Number.Float), |
|
205 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
206 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
207 (r'\d+', Number.Integer), |
|
208 # Strings |
|
209 (r"'", String.Char, 'character'), |
|
210 (r'"', String, 'string'), |
|
211 (r'[^\s(){}]+', Text), |
|
212 (r'\s+?', Text), # Whitespace |
|
213 ], |
|
214 'module': [ |
|
215 (r'\s+', Text), |
|
216 (r'([A-Z][\w.]*)(\s+)(\()', |
|
217 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
218 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), |
|
219 ], |
|
220 'funclist': [ |
|
221 (r'\s+', Text), |
|
222 (r'[A-Z]\w*', Keyword.Type), |
|
223 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), |
|
224 (r'--.*$', Comment.Single), |
|
225 (r'\{-', Comment.Multiline, 'comment'), |
|
226 (r',', Punctuation), |
|
227 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
228 # (HACK, but it makes sense to push two instances, believe me) |
|
229 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
230 (r'\)', Punctuation, '#pop:2'), |
|
231 ], |
|
232 # NOTE: the next four states are shared in the AgdaLexer; make sure |
|
233 # any change is compatible with Agda as well or copy over and change |
|
234 'comment': [ |
|
235 # Multiline Comments |
|
236 (r'[^-{}]+', Comment.Multiline), |
|
237 (r'\{-', Comment.Multiline, '#push'), |
|
238 (r'-\}', Comment.Multiline, '#pop'), |
|
239 (r'[-{}]', Comment.Multiline), |
|
240 ], |
|
241 'character': [ |
|
242 # Allows multi-chars, incorrectly. |
|
243 (r"[^\\']", String.Char), |
|
244 (r"\\", String.Escape, 'escape'), |
|
245 ("'", String.Char, '#pop'), |
|
246 ], |
|
247 'string': [ |
|
248 (r'[^\\"]+', String), |
|
249 (r"\\", String.Escape, 'escape'), |
|
250 ('"', String, '#pop'), |
|
251 ], |
|
252 'escape': [ |
|
253 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
254 (r'\^[][A-Z@^_]', String.Escape, '#pop'), |
|
255 ('|'.join(ascii), String.Escape, '#pop'), |
|
256 (r'o[0-7]+', String.Escape, '#pop'), |
|
257 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
258 (r'\d+', String.Escape, '#pop'), |
|
259 (r'\s+\\', String.Escape, '#pop') |
|
260 ], |
|
261 } |
|
262 |
|
263 |
|
264 class AgdaLexer(RegexLexer): |
|
265 """ |
|
266 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_ |
|
267 dependently typed functional programming language and proof assistant. |
|
268 |
|
269 .. versionadded:: 2.0 |
|
270 """ |
|
271 |
|
272 name = 'Agda' |
|
273 aliases = ['agda'] |
|
274 filenames = ['*.agda'] |
|
275 mimetypes = ['text/x-agda'] |
|
276 |
|
277 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data', |
|
278 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', |
|
279 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open', |
|
280 'pattern', 'postulate', 'primitive', 'private', |
|
281 'quote', 'quoteGoal', 'quoteTerm', |
|
282 'record', 'renaming', 'rewrite', 'syntax', 'tactic', |
|
283 'unquote', 'unquoteDecl', 'using', 'where', 'with'] |
|
284 |
|
285 tokens = { |
|
286 'root': [ |
|
287 # Declaration |
|
288 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', |
|
289 bygroups(Text, Name.Function, Text, Operator.Word, Text)), |
|
290 # Comments |
|
291 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
292 (r'\{-', Comment.Multiline, 'comment'), |
|
293 # Holes |
|
294 (r'\{!', Comment.Directive, 'hole'), |
|
295 # Lexemes: |
|
296 # Identifiers |
|
297 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
298 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), |
|
299 (r'\b(Set|Prop)\b', Keyword.Type), |
|
300 # Special Symbols |
|
301 (r'(\(|\)|\{|\})', Operator), |
|
302 (u'(\\.{1,3}|\\||\u039B|\u2200|\u2192|:|=|->)', Operator.Word), |
|
303 # Numbers |
|
304 (r'\d+[eE][+-]?\d+', Number.Float), |
|
305 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
306 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
307 (r'\d+', Number.Integer), |
|
308 # Strings |
|
309 (r"'", String.Char, 'character'), |
|
310 (r'"', String, 'string'), |
|
311 (r'[^\s(){}]+', Text), |
|
312 (r'\s+?', Text), # Whitespace |
|
313 ], |
|
314 'hole': [ |
|
315 # Holes |
|
316 (r'[^!{}]+', Comment.Directive), |
|
317 (r'\{!', Comment.Directive, '#push'), |
|
318 (r'!\}', Comment.Directive, '#pop'), |
|
319 (r'[!{}]', Comment.Directive), |
|
320 ], |
|
321 'module': [ |
|
322 (r'\{-', Comment.Multiline, 'comment'), |
|
323 (r'[a-zA-Z][\w.]*', Name, '#pop'), |
|
324 (r'[^a-zA-Z]+', Text) |
|
325 ], |
|
326 'comment': HaskellLexer.tokens['comment'], |
|
327 'character': HaskellLexer.tokens['character'], |
|
328 'string': HaskellLexer.tokens['string'], |
|
329 'escape': HaskellLexer.tokens['escape'] |
|
330 } |
|
331 |
|
332 |
|
333 class CryptolLexer(RegexLexer): |
|
334 """ |
|
335 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. |
|
336 |
|
337 .. versionadded:: 2.0 |
|
338 """ |
|
339 name = 'Cryptol' |
|
340 aliases = ['cryptol', 'cry'] |
|
341 filenames = ['*.cry'] |
|
342 mimetypes = ['text/x-cryptol'] |
|
343 |
|
344 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', |
|
345 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', |
|
346 'max', 'min', 'module', 'newtype', 'pragma', 'property', |
|
347 'then', 'type', 'where', 'width') |
|
348 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', |
|
349 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', |
|
350 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', |
|
351 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') |
|
352 |
|
353 tokens = { |
|
354 'root': [ |
|
355 # Whitespace: |
|
356 (r'\s+', Text), |
|
357 # (r'--\s*|.*$', Comment.Doc), |
|
358 (r'//.*$', Comment.Single), |
|
359 (r'/\*', Comment.Multiline, 'comment'), |
|
360 # Lexemes: |
|
361 # Identifiers |
|
362 (r'\bimport\b', Keyword.Reserved, 'import'), |
|
363 (r'\bmodule\b', Keyword.Reserved, 'module'), |
|
364 (r'\berror\b', Name.Exception), |
|
365 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
366 (r'^[_a-z][\w\']*', Name.Function), |
|
367 (r"'?[_a-z][\w']*", Name), |
|
368 (r"('')?[A-Z][\w\']*", Keyword.Type), |
|
369 # Operators |
|
370 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator |
|
371 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
372 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators |
|
373 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators |
|
374 # Numbers |
|
375 (r'\d+[eE][+-]?\d+', Number.Float), |
|
376 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
377 (r'0[oO][0-7]+', Number.Oct), |
|
378 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
379 (r'\d+', Number.Integer), |
|
380 # Character/String Literals |
|
381 (r"'", String.Char, 'character'), |
|
382 (r'"', String, 'string'), |
|
383 # Special |
|
384 (r'\[\]', Keyword.Type), |
|
385 (r'\(\)', Name.Builtin), |
|
386 (r'[][(),;`{}]', Punctuation), |
|
387 ], |
|
388 'import': [ |
|
389 # Import statements |
|
390 (r'\s+', Text), |
|
391 (r'"', String, 'string'), |
|
392 # after "funclist" state |
|
393 (r'\)', Punctuation, '#pop'), |
|
394 (r'qualified\b', Keyword), |
|
395 # import X as Y |
|
396 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', |
|
397 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), |
|
398 # import X hiding (functions) |
|
399 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', |
|
400 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), |
|
401 # import X (functions) |
|
402 (r'([A-Z][\w.]*)(\s+)(\()', |
|
403 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
404 # import X |
|
405 (r'[\w.]+', Name.Namespace, '#pop'), |
|
406 ], |
|
407 'module': [ |
|
408 (r'\s+', Text), |
|
409 (r'([A-Z][\w.]*)(\s+)(\()', |
|
410 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
411 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), |
|
412 ], |
|
413 'funclist': [ |
|
414 (r'\s+', Text), |
|
415 (r'[A-Z]\w*', Keyword.Type), |
|
416 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), |
|
417 # TODO: these don't match the comments in docs, remove. |
|
418 #(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
419 #(r'{-', Comment.Multiline, 'comment'), |
|
420 (r',', Punctuation), |
|
421 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
422 # (HACK, but it makes sense to push two instances, believe me) |
|
423 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
424 (r'\)', Punctuation, '#pop:2'), |
|
425 ], |
|
426 'comment': [ |
|
427 # Multiline Comments |
|
428 (r'[^/*]+', Comment.Multiline), |
|
429 (r'/\*', Comment.Multiline, '#push'), |
|
430 (r'\*/', Comment.Multiline, '#pop'), |
|
431 (r'[*/]', Comment.Multiline), |
|
432 ], |
|
433 'character': [ |
|
434 # Allows multi-chars, incorrectly. |
|
435 (r"[^\\']'", String.Char, '#pop'), |
|
436 (r"\\", String.Escape, 'escape'), |
|
437 ("'", String.Char, '#pop'), |
|
438 ], |
|
439 'string': [ |
|
440 (r'[^\\"]+', String), |
|
441 (r"\\", String.Escape, 'escape'), |
|
442 ('"', String, '#pop'), |
|
443 ], |
|
444 'escape': [ |
|
445 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
446 (r'\^[][A-Z@^_]', String.Escape, '#pop'), |
|
447 ('|'.join(ascii), String.Escape, '#pop'), |
|
448 (r'o[0-7]+', String.Escape, '#pop'), |
|
449 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
450 (r'\d+', String.Escape, '#pop'), |
|
451 (r'\s+\\', String.Escape, '#pop'), |
|
452 ], |
|
453 } |
|
454 |
|
455 EXTRA_KEYWORDS = set(('join', 'split', 'reverse', 'transpose', 'width', |
|
456 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', |
|
457 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', |
|
458 'trace')) |
|
459 |
|
460 def get_tokens_unprocessed(self, text): |
|
461 stack = ['root'] |
|
462 for index, token, value in \ |
|
463 RegexLexer.get_tokens_unprocessed(self, text, stack): |
|
464 if token is Name and value in self.EXTRA_KEYWORDS: |
|
465 yield index, Name.Builtin, value |
|
466 else: |
|
467 yield index, token, value |
|
468 |
|
469 |
|
470 class LiterateLexer(Lexer): |
|
471 """ |
|
472 Base class for lexers of literate file formats based on LaTeX or Bird-style |
|
473 (prefixing each code line with ">"). |
|
474 |
|
475 Additional options accepted: |
|
476 |
|
477 `litstyle` |
|
478 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
479 is autodetected: if the first non-whitespace character in the source |
|
480 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
481 """ |
|
482 |
|
483 bird_re = re.compile(r'(>[ \t]*)(.*\n)') |
|
484 |
|
485 def __init__(self, baselexer, **options): |
|
486 self.baselexer = baselexer |
|
487 Lexer.__init__(self, **options) |
|
488 |
|
489 def get_tokens_unprocessed(self, text): |
|
490 style = self.options.get('litstyle') |
|
491 if style is None: |
|
492 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' |
|
493 |
|
494 code = '' |
|
495 insertions = [] |
|
496 if style == 'bird': |
|
497 # bird-style |
|
498 for match in line_re.finditer(text): |
|
499 line = match.group() |
|
500 m = self.bird_re.match(line) |
|
501 if m: |
|
502 insertions.append((len(code), |
|
503 [(0, Comment.Special, m.group(1))])) |
|
504 code += m.group(2) |
|
505 else: |
|
506 insertions.append((len(code), [(0, Text, line)])) |
|
507 else: |
|
508 # latex-style |
|
509 from pygments.lexers.markup import TexLexer |
|
510 lxlexer = TexLexer(**self.options) |
|
511 codelines = 0 |
|
512 latex = '' |
|
513 for match in line_re.finditer(text): |
|
514 line = match.group() |
|
515 if codelines: |
|
516 if line.lstrip().startswith('\\end{code}'): |
|
517 codelines = 0 |
|
518 latex += line |
|
519 else: |
|
520 code += line |
|
521 elif line.lstrip().startswith('\\begin{code}'): |
|
522 codelines = 1 |
|
523 latex += line |
|
524 insertions.append((len(code), |
|
525 list(lxlexer.get_tokens_unprocessed(latex)))) |
|
526 latex = '' |
|
527 else: |
|
528 latex += line |
|
529 insertions.append((len(code), |
|
530 list(lxlexer.get_tokens_unprocessed(latex)))) |
|
531 for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)): |
|
532 yield item |
|
533 |
|
534 |
|
535 class LiterateHaskellLexer(LiterateLexer): |
|
536 """ |
|
537 For Literate Haskell (Bird-style or LaTeX) source. |
|
538 |
|
539 Additional options accepted: |
|
540 |
|
541 `litstyle` |
|
542 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
543 is autodetected: if the first non-whitespace character in the source |
|
544 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
545 |
|
546 .. versionadded:: 0.9 |
|
547 """ |
|
548 name = 'Literate Haskell' |
|
549 aliases = ['lhs', 'literate-haskell', 'lhaskell'] |
|
550 filenames = ['*.lhs'] |
|
551 mimetypes = ['text/x-literate-haskell'] |
|
552 |
|
553 def __init__(self, **options): |
|
554 hslexer = HaskellLexer(**options) |
|
555 LiterateLexer.__init__(self, hslexer, **options) |
|
556 |
|
557 |
|
558 class LiterateIdrisLexer(LiterateLexer): |
|
559 """ |
|
560 For Literate Idris (Bird-style or LaTeX) source. |
|
561 |
|
562 Additional options accepted: |
|
563 |
|
564 `litstyle` |
|
565 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
566 is autodetected: if the first non-whitespace character in the source |
|
567 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
568 |
|
569 .. versionadded:: 2.0 |
|
570 """ |
|
571 name = 'Literate Idris' |
|
572 aliases = ['lidr', 'literate-idris', 'lidris'] |
|
573 filenames = ['*.lidr'] |
|
574 mimetypes = ['text/x-literate-idris'] |
|
575 |
|
576 def __init__(self, **options): |
|
577 hslexer = IdrisLexer(**options) |
|
578 LiterateLexer.__init__(self, hslexer, **options) |
|
579 |
|
580 |
|
581 class LiterateAgdaLexer(LiterateLexer): |
|
582 """ |
|
583 For Literate Agda source. |
|
584 |
|
585 Additional options accepted: |
|
586 |
|
587 `litstyle` |
|
588 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
589 is autodetected: if the first non-whitespace character in the source |
|
590 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
591 |
|
592 .. versionadded:: 2.0 |
|
593 """ |
|
594 name = 'Literate Agda' |
|
595 aliases = ['lagda', 'literate-agda'] |
|
596 filenames = ['*.lagda'] |
|
597 mimetypes = ['text/x-literate-agda'] |
|
598 |
|
599 def __init__(self, **options): |
|
600 agdalexer = AgdaLexer(**options) |
|
601 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) |
|
602 |
|
603 |
|
604 class LiterateCryptolLexer(LiterateLexer): |
|
605 """ |
|
606 For Literate Cryptol (Bird-style or LaTeX) source. |
|
607 |
|
608 Additional options accepted: |
|
609 |
|
610 `litstyle` |
|
611 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
612 is autodetected: if the first non-whitespace character in the source |
|
613 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
614 |
|
615 .. versionadded:: 2.0 |
|
616 """ |
|
617 name = 'Literate Cryptol' |
|
618 aliases = ['lcry', 'literate-cryptol', 'lcryptol'] |
|
619 filenames = ['*.lcry'] |
|
620 mimetypes = ['text/x-literate-cryptol'] |
|
621 |
|
622 def __init__(self, **options): |
|
623 crylexer = CryptolLexer(**options) |
|
624 LiterateLexer.__init__(self, crylexer, **options) |
|
625 |
|
626 |
|
627 class KokaLexer(RegexLexer): |
|
628 """ |
|
629 Lexer for the `Koka <http://koka.codeplex.com>`_ |
|
630 language. |
|
631 |
|
632 .. versionadded:: 1.6 |
|
633 """ |
|
634 |
|
635 name = 'Koka' |
|
636 aliases = ['koka'] |
|
637 filenames = ['*.kk', '*.kki'] |
|
638 mimetypes = ['text/x-koka'] |
|
639 |
|
640 keywords = [ |
|
641 'infix', 'infixr', 'infixl', |
|
642 'type', 'cotype', 'rectype', 'alias', |
|
643 'struct', 'con', |
|
644 'fun', 'function', 'val', 'var', |
|
645 'external', |
|
646 'if', 'then', 'else', 'elif', 'return', 'match', |
|
647 'private', 'public', 'private', |
|
648 'module', 'import', 'as', |
|
649 'include', 'inline', |
|
650 'rec', |
|
651 'try', 'yield', 'enum', |
|
652 'interface', 'instance', |
|
653 ] |
|
654 |
|
655 # keywords that are followed by a type |
|
656 typeStartKeywords = [ |
|
657 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', |
|
658 ] |
|
659 |
|
660 # keywords valid in a type |
|
661 typekeywords = [ |
|
662 'forall', 'exists', 'some', 'with', |
|
663 ] |
|
664 |
|
665 # builtin names and special names |
|
666 builtin = [ |
|
667 'for', 'while', 'repeat', |
|
668 'foreach', 'foreach-indexed', |
|
669 'error', 'catch', 'finally', |
|
670 'cs', 'js', 'file', 'ref', 'assigned', |
|
671 ] |
|
672 |
|
673 # symbols that can be in an operator |
|
674 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' |
|
675 |
|
676 # symbol boundary: an operator keyword should not be followed by any of these |
|
677 sboundary = '(?!'+symbols+')' |
|
678 |
|
679 # name boundary: a keyword should not be followed by any of these |
|
680 boundary = '(?![\w/])' |
|
681 |
|
682 # koka token abstractions |
|
683 tokenType = Name.Attribute |
|
684 tokenTypeDef = Name.Class |
|
685 tokenConstructor = Generic.Emph |
|
686 |
|
687 # main lexer |
|
688 tokens = { |
|
689 'root': [ |
|
690 include('whitespace'), |
|
691 |
|
692 # go into type mode |
|
693 (r'::?' + sboundary, tokenType, 'type'), |
|
694 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), |
|
695 'alias-type'), |
|
696 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), |
|
697 'struct-type'), |
|
698 ((r'(%s)' % '|'.join(typeStartKeywords)) + |
|
699 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), |
|
700 'type'), |
|
701 |
|
702 # special sequences of tokens (we use ?: for non-capturing group as |
|
703 # required by 'bygroups') |
|
704 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', |
|
705 bygroups(Keyword, Text, Keyword, Name.Namespace)), |
|
706 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' |
|
707 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' |
|
708 r'((?:[a-z]\w*/)*[a-z]\w*))?', |
|
709 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, |
|
710 Keyword, Name.Namespace)), |
|
711 |
|
712 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' |
|
713 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', |
|
714 bygroups(Keyword, Text, Name.Function)), |
|
715 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' |
|
716 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', |
|
717 bygroups(Keyword, Text, Keyword, Name.Function)), |
|
718 |
|
719 # keywords |
|
720 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), |
|
721 (r'(%s)' % '|'.join(keywords) + boundary, Keyword), |
|
722 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), |
|
723 (r'::?|:=|\->|[=.]' + sboundary, Keyword), |
|
724 |
|
725 # names |
|
726 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', |
|
727 bygroups(Name.Namespace, tokenConstructor)), |
|
728 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), |
|
729 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', |
|
730 bygroups(Name.Namespace, Name)), |
|
731 (r'_\w*', Name.Variable), |
|
732 |
|
733 # literal string |
|
734 (r'@"', String.Double, 'litstring'), |
|
735 |
|
736 # operators |
|
737 (symbols + "|/(?![*/])", Operator), |
|
738 (r'`', Operator), |
|
739 (r'[{}()\[\];,]', Punctuation), |
|
740 |
|
741 # literals. No check for literal characters with len > 1 |
|
742 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), |
|
743 (r'0[xX][0-9a-fA-F]+', Number.Hex), |
|
744 (r'[0-9]+', Number.Integer), |
|
745 |
|
746 (r"'", String.Char, 'char'), |
|
747 (r'"', String.Double, 'string'), |
|
748 ], |
|
749 |
|
750 # type started by alias |
|
751 'alias-type': [ |
|
752 (r'=', Keyword), |
|
753 include('type') |
|
754 ], |
|
755 |
|
756 # type started by struct |
|
757 'struct-type': [ |
|
758 (r'(?=\((?!,*\)))', Punctuation, '#pop'), |
|
759 include('type') |
|
760 ], |
|
761 |
|
762 # type started by colon |
|
763 'type': [ |
|
764 (r'[(\[<]', tokenType, 'type-nested'), |
|
765 include('type-content') |
|
766 ], |
|
767 |
|
768 # type nested in brackets: can contain parameters, comma etc. |
|
769 'type-nested': [ |
|
770 (r'[)\]>]', tokenType, '#pop'), |
|
771 (r'[(\[<]', tokenType, 'type-nested'), |
|
772 (r',', tokenType), |
|
773 (r'([a-z]\w*)(\s*)(:)(?!:)', |
|
774 bygroups(Name, Text, tokenType)), # parameter name |
|
775 include('type-content') |
|
776 ], |
|
777 |
|
778 # shared contents of a type |
|
779 'type-content': [ |
|
780 include('whitespace'), |
|
781 |
|
782 # keywords |
|
783 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), |
|
784 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', |
|
785 Keyword, '#pop'), # need to match because names overlap... |
|
786 |
|
787 # kinds |
|
788 (r'[EPHVX]' + boundary, tokenType), |
|
789 |
|
790 # type names |
|
791 (r'[a-z][0-9]*(?![\w/])', tokenType), |
|
792 (r'_\w*', tokenType.Variable), # Generic.Emph |
|
793 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', |
|
794 bygroups(Name.Namespace, tokenType)), |
|
795 (r'((?:[a-z]\w*/)*)([a-z]\w+)', |
|
796 bygroups(Name.Namespace, tokenType)), |
|
797 |
|
798 # type keyword operators |
|
799 (r'::|->|[.:|]', tokenType), |
|
800 |
|
801 # catchall |
|
802 default('#pop') |
|
803 ], |
|
804 |
|
805 # comments and literals |
|
806 'whitespace': [ |
|
807 (r'\n\s*#.*$', Comment.Preproc), |
|
808 (r'\s+', Text), |
|
809 (r'/\*', Comment.Multiline, 'comment'), |
|
810 (r'//.*$', Comment.Single) |
|
811 ], |
|
812 'comment': [ |
|
813 (r'[^/*]+', Comment.Multiline), |
|
814 (r'/\*', Comment.Multiline, '#push'), |
|
815 (r'\*/', Comment.Multiline, '#pop'), |
|
816 (r'[*/]', Comment.Multiline), |
|
817 ], |
|
818 'litstring': [ |
|
819 (r'[^"]+', String.Double), |
|
820 (r'""', String.Escape), |
|
821 (r'"', String.Double, '#pop'), |
|
822 ], |
|
823 'string': [ |
|
824 (r'[^\\"\n]+', String.Double), |
|
825 include('escape-sequence'), |
|
826 (r'["\n]', String.Double, '#pop'), |
|
827 ], |
|
828 'char': [ |
|
829 (r'[^\\\'\n]+', String.Char), |
|
830 include('escape-sequence'), |
|
831 (r'[\'\n]', String.Char, '#pop'), |
|
832 ], |
|
833 'escape-sequence': [ |
|
834 (r'\\[nrt\\"\']', String.Escape), |
|
835 (r'\\x[0-9a-fA-F]{2}', String.Escape), |
|
836 (r'\\u[0-9a-fA-F]{4}', String.Escape), |
|
837 # Yes, \U literals are 6 hex digits. |
|
838 (r'\\U[0-9a-fA-F]{6}', String.Escape) |
|
839 ] |
|
840 } |