1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.haskell |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for Haskell and related languages. |
|
7 |
|
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \ |
|
15 default, include, inherit |
|
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
17 Number, Punctuation, Generic |
|
18 from pygments import unistring as uni |
|
19 |
|
20 __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer', |
|
21 'LiterateHaskellLexer', 'LiterateIdrisLexer', 'LiterateAgdaLexer', |
|
22 'LiterateCryptolLexer', 'KokaLexer'] |
|
23 |
|
24 |
|
25 line_re = re.compile('.*?\n') |
|
26 |
|
27 |
|
28 class HaskellLexer(RegexLexer): |
|
29 """ |
|
30 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. |
|
31 |
|
32 .. versionadded:: 0.8 |
|
33 """ |
|
34 name = 'Haskell' |
|
35 aliases = ['haskell', 'hs'] |
|
36 filenames = ['*.hs'] |
|
37 mimetypes = ['text/x-haskell'] |
|
38 |
|
39 flags = re.MULTILINE | re.UNICODE |
|
40 |
|
41 reserved = ('case', 'class', 'data', 'default', 'deriving', 'do', 'else', |
|
42 'family', 'if', 'in', 'infix[lr]?', 'instance', |
|
43 'let', 'newtype', 'of', 'then', 'type', 'where', '_') |
|
44 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', |
|
45 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', |
|
46 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', |
|
47 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') |
|
48 |
|
49 tokens = { |
|
50 'root': [ |
|
51 # Whitespace: |
|
52 (r'\s+', Text), |
|
53 # (r'--\s*|.*$', Comment.Doc), |
|
54 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
55 (r'\{-', Comment.Multiline, 'comment'), |
|
56 # Lexemes: |
|
57 # Identifiers |
|
58 (r'\bimport\b', Keyword.Reserved, 'import'), |
|
59 (r'\bmodule\b', Keyword.Reserved, 'module'), |
|
60 (r'\berror\b', Name.Exception), |
|
61 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
62 (r"'[^\\]'", String.Char), # this has to come before the TH quote |
|
63 (r'^[_' + uni.Ll + r'][\w\']*', Name.Function), |
|
64 (r"'?[_" + uni.Ll + r"][\w']*", Name), |
|
65 (r"('')?[" + uni.Lu + r"][\w\']*", Keyword.Type), |
|
66 (r"(')[" + uni.Lu + r"][\w\']*", Keyword.Type), |
|
67 (r"(')\[[^\]]*\]", Keyword.Type), # tuples and lists get special treatment in GHC |
|
68 (r"(')\([^)]*\)", Keyword.Type), # .. |
|
69 (r"(')[:!#$%&*+.\\/<=>?@^|~-]+", Keyword.Type), # promoted type operators |
|
70 # Operators |
|
71 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator |
|
72 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
73 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators |
|
74 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators |
|
75 # Numbers |
|
76 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*_*[pP][+-]?\d(_*\d)*', Number.Float), |
|
77 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*\.[\da-fA-F](_*[\da-fA-F])*' |
|
78 r'(_*[pP][+-]?\d(_*\d)*)?', Number.Float), |
|
79 (r'\d(_*\d)*_*[eE][+-]?\d(_*\d)*', Number.Float), |
|
80 (r'\d(_*\d)*\.\d(_*\d)*(_*[eE][+-]?\d(_*\d)*)?', Number.Float), |
|
81 (r'0[bB]_*[01](_*[01])*', Number.Bin), |
|
82 (r'0[oO]_*[0-7](_*[0-7])*', Number.Oct), |
|
83 (r'0[xX]_*[\da-fA-F](_*[\da-fA-F])*', Number.Hex), |
|
84 (r'\d(_*\d)*', Number.Integer), |
|
85 # Character/String Literals |
|
86 (r"'", String.Char, 'character'), |
|
87 (r'"', String, 'string'), |
|
88 # Special |
|
89 (r'\[\]', Keyword.Type), |
|
90 (r'\(\)', Name.Builtin), |
|
91 (r'[][(),;`{}]', Punctuation), |
|
92 ], |
|
93 'import': [ |
|
94 # Import statements |
|
95 (r'\s+', Text), |
|
96 (r'"', String, 'string'), |
|
97 # after "funclist" state |
|
98 (r'\)', Punctuation, '#pop'), |
|
99 (r'qualified\b', Keyword), |
|
100 # import X as Y |
|
101 (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)', |
|
102 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), |
|
103 # import X hiding (functions) |
|
104 (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()', |
|
105 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), |
|
106 # import X (functions) |
|
107 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', |
|
108 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
109 # import X |
|
110 (r'[\w.]+', Name.Namespace, '#pop'), |
|
111 ], |
|
112 'module': [ |
|
113 (r'\s+', Text), |
|
114 (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()', |
|
115 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
116 (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'), |
|
117 ], |
|
118 'funclist': [ |
|
119 (r'\s+', Text), |
|
120 (r'[' + uni.Lu + r']\w*', Keyword.Type), |
|
121 (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function), |
|
122 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
123 (r'\{-', Comment.Multiline, 'comment'), |
|
124 (r',', Punctuation), |
|
125 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
126 # (HACK, but it makes sense to push two instances, believe me) |
|
127 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
128 (r'\)', Punctuation, '#pop:2'), |
|
129 ], |
|
130 # NOTE: the next four states are shared in the AgdaLexer; make sure |
|
131 # any change is compatible with Agda as well or copy over and change |
|
132 'comment': [ |
|
133 # Multiline Comments |
|
134 (r'[^-{}]+', Comment.Multiline), |
|
135 (r'\{-', Comment.Multiline, '#push'), |
|
136 (r'-\}', Comment.Multiline, '#pop'), |
|
137 (r'[-{}]', Comment.Multiline), |
|
138 ], |
|
139 'character': [ |
|
140 # Allows multi-chars, incorrectly. |
|
141 (r"[^\\']'", String.Char, '#pop'), |
|
142 (r"\\", String.Escape, 'escape'), |
|
143 ("'", String.Char, '#pop'), |
|
144 ], |
|
145 'string': [ |
|
146 (r'[^\\"]+', String), |
|
147 (r"\\", String.Escape, 'escape'), |
|
148 ('"', String, '#pop'), |
|
149 ], |
|
150 'escape': [ |
|
151 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
152 (r'\^[][' + uni.Lu + r'@^_]', String.Escape, '#pop'), |
|
153 ('|'.join(ascii), String.Escape, '#pop'), |
|
154 (r'o[0-7]+', String.Escape, '#pop'), |
|
155 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
156 (r'\d+', String.Escape, '#pop'), |
|
157 (r'\s+\\', String.Escape, '#pop'), |
|
158 ], |
|
159 } |
|
160 |
|
161 |
|
162 class HspecLexer(HaskellLexer): |
|
163 """ |
|
164 A Haskell lexer with support for Hspec constructs. |
|
165 |
|
166 .. versionadded:: 2.4.0 |
|
167 """ |
|
168 |
|
169 name = 'Hspec' |
|
170 aliases = ['hspec'] |
|
171 filenames = [] |
|
172 mimetypes = [] |
|
173 |
|
174 tokens = { |
|
175 'root': [ |
|
176 (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)), |
|
177 (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)), |
|
178 (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)), |
|
179 inherit, |
|
180 ], |
|
181 } |
|
182 |
|
183 |
|
184 class IdrisLexer(RegexLexer): |
|
185 """ |
|
186 A lexer for the dependently typed programming language Idris. |
|
187 |
|
188 Based on the Haskell and Agda Lexer. |
|
189 |
|
190 .. versionadded:: 2.0 |
|
191 """ |
|
192 name = 'Idris' |
|
193 aliases = ['idris', 'idr'] |
|
194 filenames = ['*.idr'] |
|
195 mimetypes = ['text/x-idris'] |
|
196 |
|
197 reserved = ('case', 'class', 'data', 'default', 'using', 'do', 'else', |
|
198 'if', 'in', 'infix[lr]?', 'instance', 'rewrite', 'auto', |
|
199 'namespace', 'codata', 'mutual', 'private', 'public', 'abstract', |
|
200 'total', 'partial', |
|
201 'interface', 'implementation', 'export', 'covering', 'constructor', |
|
202 'let', 'proof', 'of', 'then', 'static', 'where', '_', 'with', |
|
203 'pattern', 'term', 'syntax', 'prefix', |
|
204 'postulate', 'parameters', 'record', 'dsl', 'impossible', 'implicit', |
|
205 'tactics', 'intros', 'intro', 'compute', 'refine', 'exact', 'trivial') |
|
206 |
|
207 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', |
|
208 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', |
|
209 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', |
|
210 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') |
|
211 |
|
212 directives = ('lib', 'link', 'flag', 'include', 'hide', 'freeze', 'access', |
|
213 'default', 'logging', 'dynamic', 'name', 'error_handlers', 'language') |
|
214 |
|
215 tokens = { |
|
216 'root': [ |
|
217 # Comments |
|
218 (r'^(\s*)(%%(%s))' % '|'.join(directives), |
|
219 bygroups(Text, Keyword.Reserved)), |
|
220 (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)), |
|
221 (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)), |
|
222 (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'), |
|
223 # Declaration |
|
224 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', |
|
225 bygroups(Text, Name.Function, Text, Operator.Word, Text)), |
|
226 # Identifiers |
|
227 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
228 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), |
|
229 (r"('')?[A-Z][\w\']*", Keyword.Type), |
|
230 (r'[a-z][\w\']*', Text), |
|
231 # Special Symbols |
|
232 (r'(<-|::|->|=>|=)', Operator.Word), # specials |
|
233 (r'([(){}\[\]:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
234 # Numbers |
|
235 (r'\d+[eE][+-]?\d+', Number.Float), |
|
236 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
237 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
238 (r'\d+', Number.Integer), |
|
239 # Strings |
|
240 (r"'", String.Char, 'character'), |
|
241 (r'"', String, 'string'), |
|
242 (r'[^\s(){}]+', Text), |
|
243 (r'\s+?', Text), # Whitespace |
|
244 ], |
|
245 'module': [ |
|
246 (r'\s+', Text), |
|
247 (r'([A-Z][\w.]*)(\s+)(\()', |
|
248 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
249 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), |
|
250 ], |
|
251 'funclist': [ |
|
252 (r'\s+', Text), |
|
253 (r'[A-Z]\w*', Keyword.Type), |
|
254 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), |
|
255 (r'--.*$', Comment.Single), |
|
256 (r'\{-', Comment.Multiline, 'comment'), |
|
257 (r',', Punctuation), |
|
258 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
259 # (HACK, but it makes sense to push two instances, believe me) |
|
260 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
261 (r'\)', Punctuation, '#pop:2'), |
|
262 ], |
|
263 # NOTE: the next four states are shared in the AgdaLexer; make sure |
|
264 # any change is compatible with Agda as well or copy over and change |
|
265 'comment': [ |
|
266 # Multiline Comments |
|
267 (r'[^-{}]+', Comment.Multiline), |
|
268 (r'\{-', Comment.Multiline, '#push'), |
|
269 (r'-\}', Comment.Multiline, '#pop'), |
|
270 (r'[-{}]', Comment.Multiline), |
|
271 ], |
|
272 'character': [ |
|
273 # Allows multi-chars, incorrectly. |
|
274 (r"[^\\']", String.Char), |
|
275 (r"\\", String.Escape, 'escape'), |
|
276 ("'", String.Char, '#pop'), |
|
277 ], |
|
278 'string': [ |
|
279 (r'[^\\"]+', String), |
|
280 (r"\\", String.Escape, 'escape'), |
|
281 ('"', String, '#pop'), |
|
282 ], |
|
283 'escape': [ |
|
284 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
285 (r'\^[][A-Z@^_]', String.Escape, '#pop'), |
|
286 ('|'.join(ascii), String.Escape, '#pop'), |
|
287 (r'o[0-7]+', String.Escape, '#pop'), |
|
288 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
289 (r'\d+', String.Escape, '#pop'), |
|
290 (r'\s+\\', String.Escape, '#pop') |
|
291 ], |
|
292 } |
|
293 |
|
294 |
|
295 class AgdaLexer(RegexLexer): |
|
296 """ |
|
297 For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_ |
|
298 dependently typed functional programming language and proof assistant. |
|
299 |
|
300 .. versionadded:: 2.0 |
|
301 """ |
|
302 |
|
303 name = 'Agda' |
|
304 aliases = ['agda'] |
|
305 filenames = ['*.agda'] |
|
306 mimetypes = ['text/x-agda'] |
|
307 |
|
308 reserved = ['abstract', 'codata', 'coinductive', 'constructor', 'data', |
|
309 'field', 'forall', 'hiding', 'in', 'inductive', 'infix', |
|
310 'infixl', 'infixr', 'instance', 'let', 'mutual', 'open', |
|
311 'pattern', 'postulate', 'primitive', 'private', |
|
312 'quote', 'quoteGoal', 'quoteTerm', |
|
313 'record', 'renaming', 'rewrite', 'syntax', 'tactic', |
|
314 'unquote', 'unquoteDecl', 'using', 'where', 'with'] |
|
315 |
|
316 tokens = { |
|
317 'root': [ |
|
318 # Declaration |
|
319 (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)', |
|
320 bygroups(Text, Name.Function, Text, Operator.Word, Text)), |
|
321 # Comments |
|
322 (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
323 (r'\{-', Comment.Multiline, 'comment'), |
|
324 # Holes |
|
325 (r'\{!', Comment.Directive, 'hole'), |
|
326 # Lexemes: |
|
327 # Identifiers |
|
328 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
329 (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'), |
|
330 (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type), |
|
331 # Special Symbols |
|
332 (r'(\(|\)|\{|\})', Operator), |
|
333 (r'(\.{1,3}|\||\u03BB|\u2200|\u2192|:|=|->)', Operator.Word), |
|
334 # Numbers |
|
335 (r'\d+[eE][+-]?\d+', Number.Float), |
|
336 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
337 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
338 (r'\d+', Number.Integer), |
|
339 # Strings |
|
340 (r"'", String.Char, 'character'), |
|
341 (r'"', String, 'string'), |
|
342 (r'[^\s(){}]+', Text), |
|
343 (r'\s+?', Text), # Whitespace |
|
344 ], |
|
345 'hole': [ |
|
346 # Holes |
|
347 (r'[^!{}]+', Comment.Directive), |
|
348 (r'\{!', Comment.Directive, '#push'), |
|
349 (r'!\}', Comment.Directive, '#pop'), |
|
350 (r'[!{}]', Comment.Directive), |
|
351 ], |
|
352 'module': [ |
|
353 (r'\{-', Comment.Multiline, 'comment'), |
|
354 (r'[a-zA-Z][\w.]*', Name, '#pop'), |
|
355 (r'[\W0-9_]+', Text) |
|
356 ], |
|
357 'comment': HaskellLexer.tokens['comment'], |
|
358 'character': HaskellLexer.tokens['character'], |
|
359 'string': HaskellLexer.tokens['string'], |
|
360 'escape': HaskellLexer.tokens['escape'] |
|
361 } |
|
362 |
|
363 |
|
364 class CryptolLexer(RegexLexer): |
|
365 """ |
|
366 FIXME: A Cryptol2 lexer based on the lexemes defined in the Haskell 98 Report. |
|
367 |
|
368 .. versionadded:: 2.0 |
|
369 """ |
|
370 name = 'Cryptol' |
|
371 aliases = ['cryptol', 'cry'] |
|
372 filenames = ['*.cry'] |
|
373 mimetypes = ['text/x-cryptol'] |
|
374 |
|
375 reserved = ('Arith', 'Bit', 'Cmp', 'False', 'Inf', 'True', 'else', |
|
376 'export', 'extern', 'fin', 'if', 'import', 'inf', 'lg2', |
|
377 'max', 'min', 'module', 'newtype', 'pragma', 'property', |
|
378 'then', 'type', 'where', 'width') |
|
379 ascii = ('NUL', 'SOH', '[SE]TX', 'EOT', 'ENQ', 'ACK', |
|
380 'BEL', 'BS', 'HT', 'LF', 'VT', 'FF', 'CR', 'S[OI]', 'DLE', |
|
381 'DC[1-4]', 'NAK', 'SYN', 'ETB', 'CAN', |
|
382 'EM', 'SUB', 'ESC', '[FGRU]S', 'SP', 'DEL') |
|
383 |
|
384 tokens = { |
|
385 'root': [ |
|
386 # Whitespace: |
|
387 (r'\s+', Text), |
|
388 # (r'--\s*|.*$', Comment.Doc), |
|
389 (r'//.*$', Comment.Single), |
|
390 (r'/\*', Comment.Multiline, 'comment'), |
|
391 # Lexemes: |
|
392 # Identifiers |
|
393 (r'\bimport\b', Keyword.Reserved, 'import'), |
|
394 (r'\bmodule\b', Keyword.Reserved, 'module'), |
|
395 (r'\berror\b', Name.Exception), |
|
396 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
397 (r'^[_a-z][\w\']*', Name.Function), |
|
398 (r"'?[_a-z][\w']*", Name), |
|
399 (r"('')?[A-Z][\w\']*", Keyword.Type), |
|
400 # Operators |
|
401 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator |
|
402 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
403 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators |
|
404 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators |
|
405 # Numbers |
|
406 (r'\d+[eE][+-]?\d+', Number.Float), |
|
407 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
408 (r'0[oO][0-7]+', Number.Oct), |
|
409 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
410 (r'\d+', Number.Integer), |
|
411 # Character/String Literals |
|
412 (r"'", String.Char, 'character'), |
|
413 (r'"', String, 'string'), |
|
414 # Special |
|
415 (r'\[\]', Keyword.Type), |
|
416 (r'\(\)', Name.Builtin), |
|
417 (r'[][(),;`{}]', Punctuation), |
|
418 ], |
|
419 'import': [ |
|
420 # Import statements |
|
421 (r'\s+', Text), |
|
422 (r'"', String, 'string'), |
|
423 # after "funclist" state |
|
424 (r'\)', Punctuation, '#pop'), |
|
425 (r'qualified\b', Keyword), |
|
426 # import X as Y |
|
427 (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)', |
|
428 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), |
|
429 # import X hiding (functions) |
|
430 (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()', |
|
431 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), |
|
432 # import X (functions) |
|
433 (r'([A-Z][\w.]*)(\s+)(\()', |
|
434 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
435 # import X |
|
436 (r'[\w.]+', Name.Namespace, '#pop'), |
|
437 ], |
|
438 'module': [ |
|
439 (r'\s+', Text), |
|
440 (r'([A-Z][\w.]*)(\s+)(\()', |
|
441 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
442 (r'[A-Z][\w.]*', Name.Namespace, '#pop'), |
|
443 ], |
|
444 'funclist': [ |
|
445 (r'\s+', Text), |
|
446 (r'[A-Z]\w*', Keyword.Type), |
|
447 (r'(_[\w\']+|[a-z][\w\']*)', Name.Function), |
|
448 # TODO: these don't match the comments in docs, remove. |
|
449 # (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single), |
|
450 # (r'{-', Comment.Multiline, 'comment'), |
|
451 (r',', Punctuation), |
|
452 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
453 # (HACK, but it makes sense to push two instances, believe me) |
|
454 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
455 (r'\)', Punctuation, '#pop:2'), |
|
456 ], |
|
457 'comment': [ |
|
458 # Multiline Comments |
|
459 (r'[^/*]+', Comment.Multiline), |
|
460 (r'/\*', Comment.Multiline, '#push'), |
|
461 (r'\*/', Comment.Multiline, '#pop'), |
|
462 (r'[*/]', Comment.Multiline), |
|
463 ], |
|
464 'character': [ |
|
465 # Allows multi-chars, incorrectly. |
|
466 (r"[^\\']'", String.Char, '#pop'), |
|
467 (r"\\", String.Escape, 'escape'), |
|
468 ("'", String.Char, '#pop'), |
|
469 ], |
|
470 'string': [ |
|
471 (r'[^\\"]+', String), |
|
472 (r"\\", String.Escape, 'escape'), |
|
473 ('"', String, '#pop'), |
|
474 ], |
|
475 'escape': [ |
|
476 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
477 (r'\^[][A-Z@^_]', String.Escape, '#pop'), |
|
478 ('|'.join(ascii), String.Escape, '#pop'), |
|
479 (r'o[0-7]+', String.Escape, '#pop'), |
|
480 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
481 (r'\d+', String.Escape, '#pop'), |
|
482 (r'\s+\\', String.Escape, '#pop'), |
|
483 ], |
|
484 } |
|
485 |
|
486 EXTRA_KEYWORDS = {'join', 'split', 'reverse', 'transpose', 'width', |
|
487 'length', 'tail', '<<', '>>', '<<<', '>>>', 'const', |
|
488 'reg', 'par', 'seq', 'ASSERT', 'undefined', 'error', |
|
489 'trace'} |
|
490 |
|
491 def get_tokens_unprocessed(self, text): |
|
492 stack = ['root'] |
|
493 for index, token, value in \ |
|
494 RegexLexer.get_tokens_unprocessed(self, text, stack): |
|
495 if token is Name and value in self.EXTRA_KEYWORDS: |
|
496 yield index, Name.Builtin, value |
|
497 else: |
|
498 yield index, token, value |
|
499 |
|
500 |
|
501 class LiterateLexer(Lexer): |
|
502 """ |
|
503 Base class for lexers of literate file formats based on LaTeX or Bird-style |
|
504 (prefixing each code line with ">"). |
|
505 |
|
506 Additional options accepted: |
|
507 |
|
508 `litstyle` |
|
509 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
510 is autodetected: if the first non-whitespace character in the source |
|
511 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
512 """ |
|
513 |
|
514 bird_re = re.compile(r'(>[ \t]*)(.*\n)') |
|
515 |
|
516 def __init__(self, baselexer, **options): |
|
517 self.baselexer = baselexer |
|
518 Lexer.__init__(self, **options) |
|
519 |
|
520 def get_tokens_unprocessed(self, text): |
|
521 style = self.options.get('litstyle') |
|
522 if style is None: |
|
523 style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' |
|
524 |
|
525 code = '' |
|
526 insertions = [] |
|
527 if style == 'bird': |
|
528 # bird-style |
|
529 for match in line_re.finditer(text): |
|
530 line = match.group() |
|
531 m = self.bird_re.match(line) |
|
532 if m: |
|
533 insertions.append((len(code), |
|
534 [(0, Comment.Special, m.group(1))])) |
|
535 code += m.group(2) |
|
536 else: |
|
537 insertions.append((len(code), [(0, Text, line)])) |
|
538 else: |
|
539 # latex-style |
|
540 from pygments.lexers.markup import TexLexer |
|
541 lxlexer = TexLexer(**self.options) |
|
542 codelines = 0 |
|
543 latex = '' |
|
544 for match in line_re.finditer(text): |
|
545 line = match.group() |
|
546 if codelines: |
|
547 if line.lstrip().startswith('\\end{code}'): |
|
548 codelines = 0 |
|
549 latex += line |
|
550 else: |
|
551 code += line |
|
552 elif line.lstrip().startswith('\\begin{code}'): |
|
553 codelines = 1 |
|
554 latex += line |
|
555 insertions.append((len(code), |
|
556 list(lxlexer.get_tokens_unprocessed(latex)))) |
|
557 latex = '' |
|
558 else: |
|
559 latex += line |
|
560 insertions.append((len(code), |
|
561 list(lxlexer.get_tokens_unprocessed(latex)))) |
|
562 yield from do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)) |
|
563 |
|
564 |
|
565 class LiterateHaskellLexer(LiterateLexer): |
|
566 """ |
|
567 For Literate Haskell (Bird-style or LaTeX) source. |
|
568 |
|
569 Additional options accepted: |
|
570 |
|
571 `litstyle` |
|
572 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
573 is autodetected: if the first non-whitespace character in the source |
|
574 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
575 |
|
576 .. versionadded:: 0.9 |
|
577 """ |
|
578 name = 'Literate Haskell' |
|
579 aliases = ['lhs', 'literate-haskell', 'lhaskell'] |
|
580 filenames = ['*.lhs'] |
|
581 mimetypes = ['text/x-literate-haskell'] |
|
582 |
|
583 def __init__(self, **options): |
|
584 hslexer = HaskellLexer(**options) |
|
585 LiterateLexer.__init__(self, hslexer, **options) |
|
586 |
|
587 |
|
588 class LiterateIdrisLexer(LiterateLexer): |
|
589 """ |
|
590 For Literate Idris (Bird-style or LaTeX) source. |
|
591 |
|
592 Additional options accepted: |
|
593 |
|
594 `litstyle` |
|
595 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
596 is autodetected: if the first non-whitespace character in the source |
|
597 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
598 |
|
599 .. versionadded:: 2.0 |
|
600 """ |
|
601 name = 'Literate Idris' |
|
602 aliases = ['lidr', 'literate-idris', 'lidris'] |
|
603 filenames = ['*.lidr'] |
|
604 mimetypes = ['text/x-literate-idris'] |
|
605 |
|
606 def __init__(self, **options): |
|
607 hslexer = IdrisLexer(**options) |
|
608 LiterateLexer.__init__(self, hslexer, **options) |
|
609 |
|
610 |
|
611 class LiterateAgdaLexer(LiterateLexer): |
|
612 """ |
|
613 For Literate Agda source. |
|
614 |
|
615 Additional options accepted: |
|
616 |
|
617 `litstyle` |
|
618 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
619 is autodetected: if the first non-whitespace character in the source |
|
620 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
621 |
|
622 .. versionadded:: 2.0 |
|
623 """ |
|
624 name = 'Literate Agda' |
|
625 aliases = ['lagda', 'literate-agda'] |
|
626 filenames = ['*.lagda'] |
|
627 mimetypes = ['text/x-literate-agda'] |
|
628 |
|
629 def __init__(self, **options): |
|
630 agdalexer = AgdaLexer(**options) |
|
631 LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) |
|
632 |
|
633 |
|
634 class LiterateCryptolLexer(LiterateLexer): |
|
635 """ |
|
636 For Literate Cryptol (Bird-style or LaTeX) source. |
|
637 |
|
638 Additional options accepted: |
|
639 |
|
640 `litstyle` |
|
641 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
642 is autodetected: if the first non-whitespace character in the source |
|
643 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
644 |
|
645 .. versionadded:: 2.0 |
|
646 """ |
|
647 name = 'Literate Cryptol' |
|
648 aliases = ['lcry', 'literate-cryptol', 'lcryptol'] |
|
649 filenames = ['*.lcry'] |
|
650 mimetypes = ['text/x-literate-cryptol'] |
|
651 |
|
652 def __init__(self, **options): |
|
653 crylexer = CryptolLexer(**options) |
|
654 LiterateLexer.__init__(self, crylexer, **options) |
|
655 |
|
656 |
|
657 class KokaLexer(RegexLexer): |
|
658 """ |
|
659 Lexer for the `Koka <http://koka.codeplex.com>`_ |
|
660 language. |
|
661 |
|
662 .. versionadded:: 1.6 |
|
663 """ |
|
664 |
|
665 name = 'Koka' |
|
666 aliases = ['koka'] |
|
667 filenames = ['*.kk', '*.kki'] |
|
668 mimetypes = ['text/x-koka'] |
|
669 |
|
670 keywords = [ |
|
671 'infix', 'infixr', 'infixl', |
|
672 'type', 'cotype', 'rectype', 'alias', |
|
673 'struct', 'con', |
|
674 'fun', 'function', 'val', 'var', |
|
675 'external', |
|
676 'if', 'then', 'else', 'elif', 'return', 'match', |
|
677 'private', 'public', 'private', |
|
678 'module', 'import', 'as', |
|
679 'include', 'inline', |
|
680 'rec', |
|
681 'try', 'yield', 'enum', |
|
682 'interface', 'instance', |
|
683 ] |
|
684 |
|
685 # keywords that are followed by a type |
|
686 typeStartKeywords = [ |
|
687 'type', 'cotype', 'rectype', 'alias', 'struct', 'enum', |
|
688 ] |
|
689 |
|
690 # keywords valid in a type |
|
691 typekeywords = [ |
|
692 'forall', 'exists', 'some', 'with', |
|
693 ] |
|
694 |
|
695 # builtin names and special names |
|
696 builtin = [ |
|
697 'for', 'while', 'repeat', |
|
698 'foreach', 'foreach-indexed', |
|
699 'error', 'catch', 'finally', |
|
700 'cs', 'js', 'file', 'ref', 'assigned', |
|
701 ] |
|
702 |
|
703 # symbols that can be in an operator |
|
704 symbols = r'[$%&*+@!/\\^~=.:\-?|<>]+' |
|
705 |
|
706 # symbol boundary: an operator keyword should not be followed by any of these |
|
707 sboundary = '(?!' + symbols + ')' |
|
708 |
|
709 # name boundary: a keyword should not be followed by any of these |
|
710 boundary = r'(?![\w/])' |
|
711 |
|
712 # koka token abstractions |
|
713 tokenType = Name.Attribute |
|
714 tokenTypeDef = Name.Class |
|
715 tokenConstructor = Generic.Emph |
|
716 |
|
717 # main lexer |
|
718 tokens = { |
|
719 'root': [ |
|
720 include('whitespace'), |
|
721 |
|
722 # go into type mode |
|
723 (r'::?' + sboundary, tokenType, 'type'), |
|
724 (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), |
|
725 'alias-type'), |
|
726 (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), |
|
727 'struct-type'), |
|
728 ((r'(%s)' % '|'.join(typeStartKeywords)) + |
|
729 r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef), |
|
730 'type'), |
|
731 |
|
732 # special sequences of tokens (we use ?: for non-capturing group as |
|
733 # required by 'bygroups') |
|
734 (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)', |
|
735 bygroups(Keyword, Text, Keyword, Name.Namespace)), |
|
736 (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)' |
|
737 r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)' |
|
738 r'((?:[a-z]\w*/)*[a-z]\w*))?', |
|
739 bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text, |
|
740 Keyword, Name.Namespace)), |
|
741 |
|
742 (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))' |
|
743 r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))', |
|
744 bygroups(Keyword, Text, Name.Function)), |
|
745 (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?' |
|
746 r'([a-z]\w*|\((?:' + symbols + r'|/)\))', |
|
747 bygroups(Keyword, Text, Keyword, Name.Function)), |
|
748 |
|
749 # keywords |
|
750 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type), |
|
751 (r'(%s)' % '|'.join(keywords) + boundary, Keyword), |
|
752 (r'(%s)' % '|'.join(builtin) + boundary, Keyword.Pseudo), |
|
753 (r'::?|:=|\->|[=.]' + sboundary, Keyword), |
|
754 |
|
755 # names |
|
756 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', |
|
757 bygroups(Name.Namespace, tokenConstructor)), |
|
758 (r'((?:[a-z]\w*/)*)([a-z]\w*)', bygroups(Name.Namespace, Name)), |
|
759 (r'((?:[a-z]\w*/)*)(\((?:' + symbols + r'|/)\))', |
|
760 bygroups(Name.Namespace, Name)), |
|
761 (r'_\w*', Name.Variable), |
|
762 |
|
763 # literal string |
|
764 (r'@"', String.Double, 'litstring'), |
|
765 |
|
766 # operators |
|
767 (symbols + "|/(?![*/])", Operator), |
|
768 (r'`', Operator), |
|
769 (r'[{}()\[\];,]', Punctuation), |
|
770 |
|
771 # literals. No check for literal characters with len > 1 |
|
772 (r'[0-9]+\.[0-9]+([eE][\-+]?[0-9]+)?', Number.Float), |
|
773 (r'0[xX][0-9a-fA-F]+', Number.Hex), |
|
774 (r'[0-9]+', Number.Integer), |
|
775 |
|
776 (r"'", String.Char, 'char'), |
|
777 (r'"', String.Double, 'string'), |
|
778 ], |
|
779 |
|
780 # type started by alias |
|
781 'alias-type': [ |
|
782 (r'=', Keyword), |
|
783 include('type') |
|
784 ], |
|
785 |
|
786 # type started by struct |
|
787 'struct-type': [ |
|
788 (r'(?=\((?!,*\)))', Punctuation, '#pop'), |
|
789 include('type') |
|
790 ], |
|
791 |
|
792 # type started by colon |
|
793 'type': [ |
|
794 (r'[(\[<]', tokenType, 'type-nested'), |
|
795 include('type-content') |
|
796 ], |
|
797 |
|
798 # type nested in brackets: can contain parameters, comma etc. |
|
799 'type-nested': [ |
|
800 (r'[)\]>]', tokenType, '#pop'), |
|
801 (r'[(\[<]', tokenType, 'type-nested'), |
|
802 (r',', tokenType), |
|
803 (r'([a-z]\w*)(\s*)(:)(?!:)', |
|
804 bygroups(Name, Text, tokenType)), # parameter name |
|
805 include('type-content') |
|
806 ], |
|
807 |
|
808 # shared contents of a type |
|
809 'type-content': [ |
|
810 include('whitespace'), |
|
811 |
|
812 # keywords |
|
813 (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword), |
|
814 (r'(?=((%s)' % '|'.join(keywords) + boundary + '))', |
|
815 Keyword, '#pop'), # need to match because names overlap... |
|
816 |
|
817 # kinds |
|
818 (r'[EPHVX]' + boundary, tokenType), |
|
819 |
|
820 # type names |
|
821 (r'[a-z][0-9]*(?![\w/])', tokenType), |
|
822 (r'_\w*', tokenType.Variable), # Generic.Emph |
|
823 (r'((?:[a-z]\w*/)*)([A-Z]\w*)', |
|
824 bygroups(Name.Namespace, tokenType)), |
|
825 (r'((?:[a-z]\w*/)*)([a-z]\w+)', |
|
826 bygroups(Name.Namespace, tokenType)), |
|
827 |
|
828 # type keyword operators |
|
829 (r'::|->|[.:|]', tokenType), |
|
830 |
|
831 # catchall |
|
832 default('#pop') |
|
833 ], |
|
834 |
|
835 # comments and literals |
|
836 'whitespace': [ |
|
837 (r'\n\s*#.*$', Comment.Preproc), |
|
838 (r'\s+', Text), |
|
839 (r'/\*', Comment.Multiline, 'comment'), |
|
840 (r'//.*$', Comment.Single) |
|
841 ], |
|
842 'comment': [ |
|
843 (r'[^/*]+', Comment.Multiline), |
|
844 (r'/\*', Comment.Multiline, '#push'), |
|
845 (r'\*/', Comment.Multiline, '#pop'), |
|
846 (r'[*/]', Comment.Multiline), |
|
847 ], |
|
848 'litstring': [ |
|
849 (r'[^"]+', String.Double), |
|
850 (r'""', String.Escape), |
|
851 (r'"', String.Double, '#pop'), |
|
852 ], |
|
853 'string': [ |
|
854 (r'[^\\"\n]+', String.Double), |
|
855 include('escape-sequence'), |
|
856 (r'["\n]', String.Double, '#pop'), |
|
857 ], |
|
858 'char': [ |
|
859 (r'[^\\\'\n]+', String.Char), |
|
860 include('escape-sequence'), |
|
861 (r'[\'\n]', String.Char, '#pop'), |
|
862 ], |
|
863 'escape-sequence': [ |
|
864 (r'\\[nrt\\"\']', String.Escape), |
|
865 (r'\\x[0-9a-fA-F]{2}', String.Escape), |
|
866 (r'\\u[0-9a-fA-F]{4}', String.Escape), |
|
867 # Yes, \U literals are 6 hex digits. |
|
868 (r'\\U[0-9a-fA-F]{6}', String.Escape) |
|
869 ] |
|
870 } |
|