|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.functional |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for functional languages. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 |
|
18 from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions |
|
19 from pygments.token import Text, Comment, Operator, Keyword, Name, \ |
|
20 String, Number, Punctuation, Literal, Generic |
|
21 |
|
22 |
|
23 __all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer', |
|
24 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer'] |
|
25 |
|
26 |
|
27 class SchemeLexer(RegexLexer): |
|
28 """ |
|
29 A Scheme lexer, parsing a stream and outputting the tokens |
|
30 needed to highlight scheme code. |
|
31 This lexer could be most probably easily subclassed to parse |
|
32 other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp. |
|
33 |
|
34 This parser is checked with pastes from the LISP pastebin |
|
35 at http://paste.lisp.org/ to cover as much syntax as possible. |
|
36 |
|
37 It supports the full Scheme syntax as defined in R5RS. |
|
38 |
|
39 *New in Pygments 0.6.* |
|
40 """ |
|
41 name = 'Scheme' |
|
42 aliases = ['scheme', 'scm'] |
|
43 filenames = ['*.scm'] |
|
44 mimetypes = ['text/x-scheme', 'application/x-scheme'] |
|
45 |
|
46 # list of known keywords and builtins taken form vim 6.4 scheme.vim |
|
47 # syntax file. |
|
48 keywords = [ |
|
49 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let', |
|
50 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote', |
|
51 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax', |
|
52 'let-syntax', 'letrec-syntax', 'syntax-rules' |
|
53 ] |
|
54 builtins = [ |
|
55 '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle', |
|
56 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan', |
|
57 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', |
|
58 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr', |
|
59 'cadr', 'call-with-current-continuation', 'call-with-input-file', |
|
60 'call-with-output-file', 'call-with-values', 'call/cc', 'car', |
|
61 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar', |
|
62 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr', |
|
63 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?', |
|
64 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase', |
|
65 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase', |
|
66 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?', |
|
67 'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port', |
|
68 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port', |
|
69 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?', |
|
70 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp', |
|
71 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part', |
|
72 'inexact->exact', 'inexact?', 'input-port?', 'integer->char', |
|
73 'integer?', 'interaction-environment', 'lcm', 'length', 'list', |
|
74 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?', |
|
75 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular', |
|
76 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv', |
|
77 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment', |
|
78 'null?', 'number->string', 'number?', 'numerator', 'odd?', |
|
79 'open-input-file', 'open-output-file', 'output-port?', 'pair?', |
|
80 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient', |
|
81 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?', |
|
82 'remainder', 'reverse', 'round', 'scheme-report-environment', |
|
83 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list', |
|
84 'string->number', 'string->symbol', 'string-append', 'string-ci<=?', |
|
85 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?', |
|
86 'string-copy', 'string-fill!', 'string-length', 'string-ref', |
|
87 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?', |
|
88 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?', |
|
89 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values', |
|
90 'vector', 'vector->list', 'vector-fill!', 'vector-length', |
|
91 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file', |
|
92 'with-output-to-file', 'write', 'write-char', 'zero?' |
|
93 ] |
|
94 |
|
95 # valid names for identifiers |
|
96 # well, names can only not consist fully of numbers |
|
97 # but this should be good enough for now |
|
98 valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+' |
|
99 |
|
100 tokens = { |
|
101 'root' : [ |
|
102 # the comments - always starting with semicolon |
|
103 # and going to the end of the line |
|
104 (r';.*$', Comment.Single), |
|
105 |
|
106 # whitespaces - usually not relevant |
|
107 (r'\s+', Text), |
|
108 |
|
109 # numbers |
|
110 (r'-?\d+\.\d+', Number.Float), |
|
111 (r'-?\d+', Number.Integer), |
|
112 # support for uncommon kinds of numbers - |
|
113 # have to figure out what the characters mean |
|
114 #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), |
|
115 |
|
116 # strings, symbols and characters |
|
117 (r'"(\\\\|\\"|[^"])*"', String), |
|
118 (r"'" + valid_name, String.Symbol), |
|
119 (r"#\\([()/'\".'_!ยง$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), |
|
120 |
|
121 # constants |
|
122 (r'(#t|#f)', Name.Constant), |
|
123 |
|
124 # special operators |
|
125 (r"('|#|`|,@|,|\.)", Operator), |
|
126 |
|
127 # highlight the keywords |
|
128 ('(%s)' % '|'.join([ |
|
129 re.escape(entry) + ' ' for entry in keywords]), |
|
130 Keyword |
|
131 ), |
|
132 |
|
133 # first variable in a quoted string like |
|
134 # '(this is syntactic sugar) |
|
135 (r"(?<='\()" + valid_name, Name.Variable), |
|
136 (r"(?<=#\()" + valid_name, Name.Variable), |
|
137 |
|
138 # highlight the builtins |
|
139 ("(?<=\()(%s)" % '|'.join([ |
|
140 re.escape(entry) + ' ' for entry in builtins]), |
|
141 Name.Builtin |
|
142 ), |
|
143 |
|
144 # the remaining functions |
|
145 (r'(?<=\()' + valid_name, Name.Function), |
|
146 # find the remaining variables |
|
147 (valid_name, Name.Variable), |
|
148 |
|
149 # the famous parentheses! |
|
150 (r'(\(|\))', Punctuation), |
|
151 ], |
|
152 } |
|
153 |
|
154 |
|
155 class CommonLispLexer(RegexLexer): |
|
156 """ |
|
157 A Common Lisp lexer. |
|
158 |
|
159 *New in Pygments 0.9.* |
|
160 """ |
|
161 name = 'Common Lisp' |
|
162 aliases = ['common-lisp', 'cl'] |
|
163 filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too |
|
164 mimetypes = ['text/x-common-lisp'] |
|
165 |
|
166 flags = re.IGNORECASE | re.MULTILINE |
|
167 |
|
168 ### couple of useful regexes |
|
169 |
|
170 # characters that are not macro-characters and can be used to begin a symbol |
|
171 nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]' |
|
172 constituent = nonmacro + '|[#.:]' |
|
173 terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters |
|
174 |
|
175 ### symbol token, reverse-engineered from hyperspec |
|
176 # Take a deep breath... |
|
177 symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent) |
|
178 |
|
179 def __init__(self, **options): |
|
180 from pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \ |
|
181 SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \ |
|
182 BUILTIN_TYPES, BUILTIN_CLASSES |
|
183 self.builtin_function = BUILTIN_FUNCTIONS |
|
184 self.special_forms = SPECIAL_FORMS |
|
185 self.macros = MACROS |
|
186 self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS |
|
187 self.declarations = DECLARATIONS |
|
188 self.builtin_types = BUILTIN_TYPES |
|
189 self.builtin_classes = BUILTIN_CLASSES |
|
190 RegexLexer.__init__(self, **options) |
|
191 |
|
192 def get_tokens_unprocessed(self, text): |
|
193 stack = ['root'] |
|
194 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack): |
|
195 if token is Name.Variable: |
|
196 if value in self.builtin_function: |
|
197 yield index, Name.Builtin, value |
|
198 continue |
|
199 if value in self.special_forms: |
|
200 yield index, Keyword, value |
|
201 continue |
|
202 if value in self.macros: |
|
203 yield index, Name.Builtin, value |
|
204 continue |
|
205 if value in self.lambda_list_keywords: |
|
206 yield index, Keyword, value |
|
207 continue |
|
208 if value in self.declarations: |
|
209 yield index, Keyword, value |
|
210 continue |
|
211 if value in self.builtin_types: |
|
212 yield index, Keyword.Type, value |
|
213 continue |
|
214 if value in self.builtin_classes: |
|
215 yield index, Name.Class, value |
|
216 continue |
|
217 yield index, token, value |
|
218 |
|
219 tokens = { |
|
220 'root' : [ |
|
221 ('', Text, 'body'), |
|
222 ], |
|
223 'multiline-comment' : [ |
|
224 (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19) |
|
225 (r'\|#', Comment.Multiline, '#pop'), |
|
226 (r'[^|#]+', Comment.Multiline), |
|
227 (r'[|#]', Comment.Multiline), |
|
228 ], |
|
229 'commented-form' : [ |
|
230 (r'\(', Comment.Preproc, '#push'), |
|
231 (r'\)', Comment.Preproc, '#pop'), |
|
232 (r'[^()]+', Comment.Preproc), |
|
233 ], |
|
234 'body' : [ |
|
235 # whitespace |
|
236 (r'\s+', Text), |
|
237 |
|
238 # single-line comment |
|
239 (r';.*$', Comment.Single), |
|
240 |
|
241 # multi-line comment |
|
242 (r'#\|', Comment.Multiline, 'multiline-comment'), |
|
243 |
|
244 # encoding comment (?) |
|
245 (r'#\d*Y.*$', Comment.Special), |
|
246 |
|
247 # strings and characters |
|
248 (r'"(\\.|[^"])*"', String), |
|
249 # quoting |
|
250 (r":" + symbol, String.Symbol), |
|
251 (r"'" + symbol, String.Symbol), |
|
252 (r"'", Operator), |
|
253 (r"`", Operator), |
|
254 |
|
255 # decimal numbers |
|
256 (r'[-+]?\d+\.?' + terminated, Number.Integer), |
|
257 (r'[-+]?\d+/\d+' + terminated, Number), |
|
258 (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \ |
|
259 + terminated, Number.Float), |
|
260 |
|
261 # sharpsign strings and characters |
|
262 (r"#\\." + terminated, String.Char), |
|
263 (r"#\\" + symbol, String.Char), |
|
264 |
|
265 # vector |
|
266 (r'#\(', Operator, 'body'), |
|
267 |
|
268 # bitstring |
|
269 (r'#\d*\*[01]*', Literal.Other), |
|
270 |
|
271 # uninterned symbol |
|
272 (r'#:' + symbol, String.Symbol), |
|
273 |
|
274 # read-time and load-time evaluation |
|
275 (r'#[.,]', Operator), |
|
276 |
|
277 # function shorthand |
|
278 (r'#\'', Name.Function), |
|
279 |
|
280 # binary rational |
|
281 (r'#[bB][+-]?[01]+(/[01]+)?', Number), |
|
282 |
|
283 # octal rational |
|
284 (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct), |
|
285 |
|
286 # hex rational |
|
287 (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex), |
|
288 |
|
289 # radix rational |
|
290 (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number), |
|
291 |
|
292 # complex |
|
293 (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'), |
|
294 |
|
295 # array |
|
296 (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'), |
|
297 |
|
298 # structure |
|
299 (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'), |
|
300 |
|
301 # path |
|
302 (r'#[pP]?"(\\.|[^"])*"', Literal.Other), |
|
303 |
|
304 # reference |
|
305 (r'#\d+=', Operator), |
|
306 (r'#\d+#', Operator), |
|
307 |
|
308 # read-time comment |
|
309 (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'), |
|
310 |
|
311 # read-time conditional |
|
312 (r'#[+-]', Operator), |
|
313 |
|
314 # special operators that should have been parsed already |
|
315 (r'(,@|,|\.)', Operator), |
|
316 |
|
317 # special constants |
|
318 (r'(t|nil)' + terminated, Name.Constant), |
|
319 |
|
320 # functions and variables |
|
321 (r'\*' + symbol + '\*', Name.Variable.Global), |
|
322 (symbol, Name.Variable), |
|
323 |
|
324 # parentheses |
|
325 (r'\(', Punctuation, 'body'), |
|
326 (r'\)', Punctuation, '#pop'), |
|
327 ], |
|
328 } |
|
329 |
|
330 |
|
331 class HaskellLexer(RegexLexer): |
|
332 """ |
|
333 A Haskell lexer based on the lexemes defined in the Haskell 98 Report. |
|
334 |
|
335 *New in Pygments 0.8.* |
|
336 """ |
|
337 name = 'Haskell' |
|
338 aliases = ['haskell', 'hs'] |
|
339 filenames = ['*.hs'] |
|
340 mimetypes = ['text/x-haskell'] |
|
341 |
|
342 reserved = ['case','class','data','default','deriving','do','else', |
|
343 'if','in','infix[lr]?','instance', |
|
344 'let','newtype','of','then','type','where','_'] |
|
345 ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK', |
|
346 'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE', |
|
347 'DC[1-4]','NAK','SYN','ETB','CAN', |
|
348 'EM','SUB','ESC','[FGRU]S','SP','DEL'] |
|
349 |
|
350 tokens = { |
|
351 'root': [ |
|
352 # Whitespace: |
|
353 (r'\s+', Text), |
|
354 #(r'--\s*|.*$', Comment.Doc), |
|
355 (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single), |
|
356 (r'{-', Comment.Multiline, 'comment'), |
|
357 # Lexemes: |
|
358 # Identifiers |
|
359 (r'\bimport\b', Keyword.Reserved, 'import'), |
|
360 (r'\bmodule\b', Keyword.Reserved, 'module'), |
|
361 (r'\berror\b', Name.Exception), |
|
362 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), |
|
363 (r'^[_a-z][\w\']*', Name.Function), |
|
364 (r'[_a-z][\w\']*', Name), |
|
365 (r'[A-Z][\w\']*', Keyword.Type), |
|
366 # Operators |
|
367 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator |
|
368 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials |
|
369 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators |
|
370 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators |
|
371 # Numbers |
|
372 (r'\d+[eE][+-]?\d+', Number.Float), |
|
373 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), |
|
374 (r'0[oO][0-7]+', Number.Oct), |
|
375 (r'0[xX][\da-fA-F]+', Number.Hex), |
|
376 (r'\d+', Number.Integer), |
|
377 # Character/String Literals |
|
378 (r"'", String.Char, 'character'), |
|
379 (r'"', String, 'string'), |
|
380 # Special |
|
381 (r'\[\]', Keyword.Type), |
|
382 (r'\(\)', Name.Builtin), |
|
383 (r'[][(),;`{}]', Punctuation), |
|
384 ], |
|
385 'import': [ |
|
386 # Import statements |
|
387 (r'\s+', Text), |
|
388 # after "funclist" state |
|
389 (r'\)', Punctuation, '#pop'), |
|
390 (r'qualified\b', Keyword), |
|
391 # import X as Y |
|
392 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)', |
|
393 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), |
|
394 # import X hiding (functions) |
|
395 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()', |
|
396 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), |
|
397 # import X (functions) |
|
398 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', |
|
399 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
400 # import X |
|
401 (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'), |
|
402 ], |
|
403 'module': [ |
|
404 (r'\s+', Text), |
|
405 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', |
|
406 bygroups(Name.Namespace, Text, Punctuation), 'funclist'), |
|
407 (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), |
|
408 ], |
|
409 'funclist': [ |
|
410 (r'\s+', Text), |
|
411 (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), |
|
412 (r'[_a-z][\w\']+', Name.Function), |
|
413 (r'--.*$', Comment.Single), |
|
414 (r'{-', Comment.Multiline, 'comment'), |
|
415 (r',', Punctuation), |
|
416 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), |
|
417 # (HACK, but it makes sense to push two instances, believe me) |
|
418 (r'\(', Punctuation, ('funclist', 'funclist')), |
|
419 (r'\)', Punctuation, '#pop:2'), |
|
420 ], |
|
421 'comment': [ |
|
422 # Multiline Comments |
|
423 (r'[^-{}]+', Comment.Multiline), |
|
424 (r'{-', Comment.Multiline, '#push'), |
|
425 (r'-}', Comment.Multiline, '#pop'), |
|
426 (r'[-{}]', Comment.Multiline), |
|
427 ], |
|
428 'character': [ |
|
429 # Allows multi-chars, incorrectly. |
|
430 (r"[^\\']", String.Char), |
|
431 (r"\\", String.Escape, 'escape'), |
|
432 ("'", String.Char, '#pop'), |
|
433 ], |
|
434 'string': [ |
|
435 (r'[^\\"]+', String), |
|
436 (r"\\", String.Escape, 'escape'), |
|
437 ('"', String, '#pop'), |
|
438 ], |
|
439 'escape': [ |
|
440 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), |
|
441 (r'\^[][A-Z@\^_]', String.Escape, '#pop'), |
|
442 ('|'.join(ascii), String.Escape, '#pop'), |
|
443 (r'o[0-7]+', String.Escape, '#pop'), |
|
444 (r'x[\da-fA-F]+', String.Escape, '#pop'), |
|
445 (r'\d+', String.Escape, '#pop'), |
|
446 (r'\n\s+\\', String.Escape, '#pop'), |
|
447 ], |
|
448 } |
|
449 |
|
450 |
|
451 line_re = re.compile('.*?\n') |
|
452 bird_re = re.compile(r'(>[ \t]*)(.*\n)') |
|
453 |
|
454 class LiterateHaskellLexer(Lexer): |
|
455 """ |
|
456 For Literate Haskell (Bird-style or LaTeX) source. |
|
457 |
|
458 Additional options accepted: |
|
459 |
|
460 `litstyle` |
|
461 If given, must be ``"bird"`` or ``"latex"``. If not given, the style |
|
462 is autodetected: if the first non-whitespace character in the source |
|
463 is a backslash or percent character, LaTeX is assumed, else Bird. |
|
464 |
|
465 *New in Pygments 0.9.* |
|
466 """ |
|
467 name = 'Literate Haskell' |
|
468 aliases = ['lhs', 'literate-haskell'] |
|
469 filenames = ['*.lhs'] |
|
470 mimetypes = ['text/x-literate-haskell'] |
|
471 |
|
472 def get_tokens_unprocessed(self, text): |
|
473 hslexer = HaskellLexer(**self.options) |
|
474 |
|
475 style = self.options.get('litstyle') |
|
476 if style is None: |
|
477 style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird' |
|
478 |
|
479 code = '' |
|
480 insertions = [] |
|
481 if style == 'bird': |
|
482 # bird-style |
|
483 for match in line_re.finditer(text): |
|
484 line = match.group() |
|
485 m = bird_re.match(line) |
|
486 if m: |
|
487 insertions.append((len(code), |
|
488 [(0, Comment.Special, m.group(1))])) |
|
489 code += m.group(2) |
|
490 else: |
|
491 insertions.append((len(code), [(0, Text, line)])) |
|
492 else: |
|
493 # latex-style |
|
494 from pygments.lexers.text import TexLexer |
|
495 lxlexer = TexLexer(**self.options) |
|
496 |
|
497 codelines = 0 |
|
498 latex = '' |
|
499 for match in line_re.finditer(text): |
|
500 line = match.group() |
|
501 if codelines: |
|
502 if line.lstrip().startswith('\\end{code}'): |
|
503 codelines = 0 |
|
504 latex += line |
|
505 else: |
|
506 code += line |
|
507 elif line.lstrip().startswith('\\begin{code}'): |
|
508 codelines = 1 |
|
509 latex += line |
|
510 insertions.append((len(code), |
|
511 list(lxlexer.get_tokens_unprocessed(latex)))) |
|
512 latex = '' |
|
513 else: |
|
514 latex += line |
|
515 insertions.append((len(code), |
|
516 list(lxlexer.get_tokens_unprocessed(latex)))) |
|
517 for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)): |
|
518 yield item |
|
519 |
|
520 |
|
521 class OcamlLexer(RegexLexer): |
|
522 """ |
|
523 For the OCaml language. |
|
524 |
|
525 *New in Pygments 0.7.* |
|
526 """ |
|
527 |
|
528 name = 'OCaml' |
|
529 aliases = ['ocaml'] |
|
530 filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] |
|
531 mimetypes = ['text/x-ocaml'] |
|
532 |
|
533 keywords = [ |
|
534 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', |
|
535 'downto', 'else', 'end', 'exception', 'external', 'false', |
|
536 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', |
|
537 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', |
|
538 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', |
|
539 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', |
|
540 'type', 'val', 'virtual', 'when', 'while', 'with' |
|
541 ] |
|
542 keyopts = [ |
|
543 '!=','#','&','&&','\(','\)','\*','\+',',','-', |
|
544 '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<', |
|
545 '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|', |
|
546 ']','_','`','{','{<','\|','\|]','}','~' |
|
547 ] |
|
548 |
|
549 operators = r'[!$%&*+\./:<=>?@^|~-]' |
|
550 word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or'] |
|
551 prefix_syms = r'[!?~]' |
|
552 infix_syms = r'[=<>@^|&+\*/$%-]' |
|
553 primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array'] |
|
554 |
|
555 tokens = { |
|
556 'escape-sequence': [ |
|
557 (r'\\[\"\'ntbr]', String.Escape), |
|
558 (r'\\[0-9]{3}', String.Escape), |
|
559 (r'\\x[0-9a-fA-F]{2}', String.Escape), |
|
560 ], |
|
561 'root': [ |
|
562 (r'\s+', Text), |
|
563 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), |
|
564 (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', |
|
565 Name.Namespace, 'dotted'), |
|
566 (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), |
|
567 (r'\(\*', Comment, 'comment'), |
|
568 (r'\b(%s)\b' % '|'.join(keywords), Keyword), |
|
569 (r'(%s)' % '|'.join(keyopts), Operator), |
|
570 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), |
|
571 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), |
|
572 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), |
|
573 |
|
574 (r"[^\W\d][\w']*", Name), |
|
575 |
|
576 (r'\d[\d_]*', Number.Integer), |
|
577 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), |
|
578 (r'0[oO][0-7][0-7_]*', Number.Oct), |
|
579 (r'0[bB][01][01_]*', Number.Binary), |
|
580 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), |
|
581 |
|
582 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", |
|
583 String.Char), |
|
584 (r"'.'", String.Char), |
|
585 (r"'", Keyword), # a stray quote is another syntax element |
|
586 |
|
587 (r'"', String.Double, 'string'), |
|
588 |
|
589 (r'[~?][a-z][\w\']*:', Name.Variable), |
|
590 ], |
|
591 'comment': [ |
|
592 (r'[^(*)]+', Comment), |
|
593 (r'\(\*', Comment, '#push'), |
|
594 (r'\*\)', Comment, '#pop'), |
|
595 (r'[(*)]', Comment), |
|
596 ], |
|
597 'string': [ |
|
598 (r'[^\\"]+', String.Double), |
|
599 include('escape-sequence'), |
|
600 (r'\\\n', String.Double), |
|
601 (r'"', String.Double, '#pop'), |
|
602 ], |
|
603 'dotted': [ |
|
604 (r'\s+', Text), |
|
605 (r'\.', Punctuation), |
|
606 (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), |
|
607 (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'), |
|
608 (r'[a-z][a-z0-9_\']*', Name, '#pop'), |
|
609 ], |
|
610 } |
|
611 |
|
612 |
|
613 class ErlangLexer(RegexLexer): |
|
614 """ |
|
615 For the Erlang functional programming language. |
|
616 |
|
617 Blame Jeremy Thurgood (http://jerith.za.net/). |
|
618 |
|
619 *New in Pygments 0.9.* |
|
620 """ |
|
621 |
|
622 name = 'Erlang' |
|
623 aliases = ['erlang'] |
|
624 filenames = ['*.erl', '*.hrl'] |
|
625 mimetypes = ['text/x-erlang'] |
|
626 |
|
627 keywords = [ |
|
628 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', |
|
629 'let', 'of', 'query', 'receive', 'try', 'when', |
|
630 ] |
|
631 |
|
632 builtins = [ # See erlang(3) man page |
|
633 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', |
|
634 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', |
|
635 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', |
|
636 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', |
|
637 'float', 'float_to_list', 'fun_info', 'fun_to_list', |
|
638 'function_exported', 'garbage_collect', 'get', 'get_keys', |
|
639 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', |
|
640 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', |
|
641 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', |
|
642 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', |
|
643 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', |
|
644 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', |
|
645 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', |
|
646 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', |
|
647 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', |
|
648 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', |
|
649 'pid_to_list', 'port_close', 'port_command', 'port_connect', |
|
650 'port_control', 'port_call', 'port_info', 'port_to_list', |
|
651 'process_display', 'process_flag', 'process_info', 'purge_module', |
|
652 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', |
|
653 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', |
|
654 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', |
|
655 'spawn_opt', 'split_binary', 'start_timer', 'statistics', |
|
656 'suspend_process', 'system_flag', 'system_info', 'system_monitor', |
|
657 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', |
|
658 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', |
|
659 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' |
|
660 ] |
|
661 |
|
662 operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)' |
|
663 word_operators = [ |
|
664 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', |
|
665 'div', 'not', 'or', 'orelse', 'rem', 'xor' |
|
666 ] |
|
667 |
|
668 atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')" |
|
669 |
|
670 variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)' |
|
671 |
|
672 escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' |
|
673 |
|
674 macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' |
|
675 |
|
676 base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' |
|
677 |
|
678 tokens = { |
|
679 'root': [ |
|
680 (r'\s+', Text), |
|
681 (r'%.*\n', Comment), |
|
682 ('(' + '|'.join(keywords) + r')\b', Keyword), |
|
683 ('(' + '|'.join(builtins) + r')\b', Name.Builtin), |
|
684 ('(' + '|'.join(word_operators) + r')\b', Operator.Word), |
|
685 (r'^-', Punctuation, 'directive'), |
|
686 (operators, Operator), |
|
687 (r'"', String, 'string'), |
|
688 (r'<<', Name.Label), |
|
689 (r'>>', Name.Label), |
|
690 (r'('+atom_re+')(:)', bygroups(Name.Namespace, Punctuation)), |
|
691 (r'^('+atom_re+r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), |
|
692 (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer), |
|
693 (r'[+-]?\d+', Number.Integer), |
|
694 (r'[+-]?\d+.\d+', Number.Float), |
|
695 (r'[][:_@\".{}()|;,]', Punctuation), |
|
696 (variable_re, Name.Variable), |
|
697 (atom_re, Name), |
|
698 (r'\?'+macro_re, Name.Constant), |
|
699 (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), |
|
700 (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), |
|
701 ], |
|
702 'string': [ |
|
703 (escape_re, String.Escape), |
|
704 (r'"', String, '#pop'), |
|
705 (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), |
|
706 (r'[^"\\~]+', String), |
|
707 (r'~', String), |
|
708 ], |
|
709 'directive': [ |
|
710 (r'(define)(\s*)(\()('+macro_re+r')', |
|
711 bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), |
|
712 (r'(record)(\s*)(\()('+macro_re+r')', |
|
713 bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), |
|
714 (atom_re, Name.Entity, '#pop'), |
|
715 ], |
|
716 } |
|
717 |
|
718 |
|
719 class ErlangShellLexer(Lexer): |
|
720 """ |
|
721 Shell sessions in erl (for Erlang code). |
|
722 |
|
723 *New in Pygments 1.1.* |
|
724 """ |
|
725 name = 'Erlang erl session' |
|
726 aliases = ['erl'] |
|
727 filenames = ['*.erl-sh'] |
|
728 mimetypes = ['text/x-erl-shellsession'] |
|
729 |
|
730 _prompt_re = re.compile(r'\d+>(?=\s|\Z)') |
|
731 |
|
732 def get_tokens_unprocessed(self, text): |
|
733 erlexer = ErlangLexer(**self.options) |
|
734 |
|
735 curcode = '' |
|
736 insertions = [] |
|
737 for match in line_re.finditer(text): |
|
738 line = match.group() |
|
739 m = self._prompt_re.match(line) |
|
740 if m is not None: |
|
741 end = m.end() |
|
742 insertions.append((len(curcode), |
|
743 [(0, Generic.Prompt, line[:end])])) |
|
744 curcode += line[end:] |
|
745 else: |
|
746 if curcode: |
|
747 for item in do_insertions(insertions, |
|
748 erlexer.get_tokens_unprocessed(curcode)): |
|
749 yield item |
|
750 curcode = '' |
|
751 insertions = [] |
|
752 if line.startswith('*'): |
|
753 yield match.start(), Generic.Traceback, line |
|
754 else: |
|
755 yield match.start(), Generic.Output, line |
|
756 if curcode: |
|
757 for item in do_insertions(insertions, |
|
758 erlexer.get_tokens_unprocessed(curcode)): |
|
759 yield item |
|
760 |