ThirdParty/Pygments/pygments/lexers/functional.py

changeset 0
de9c2efb9d02
child 684
2f29a0b6e1c7
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.functional
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for functional languages.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 try:
14 set
15 except NameError:
16 from sets import Set as set
17
18 from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
19 from pygments.token import Text, Comment, Operator, Keyword, Name, \
20 String, Number, Punctuation, Literal, Generic
21
22
23 __all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer',
24 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer']
25
26
27 class SchemeLexer(RegexLexer):
28 """
29 A Scheme lexer, parsing a stream and outputting the tokens
30 needed to highlight scheme code.
31 This lexer could be most probably easily subclassed to parse
32 other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp.
33
34 This parser is checked with pastes from the LISP pastebin
35 at http://paste.lisp.org/ to cover as much syntax as possible.
36
37 It supports the full Scheme syntax as defined in R5RS.
38
39 *New in Pygments 0.6.*
40 """
41 name = 'Scheme'
42 aliases = ['scheme', 'scm']
43 filenames = ['*.scm']
44 mimetypes = ['text/x-scheme', 'application/x-scheme']
45
46 # list of known keywords and builtins taken form vim 6.4 scheme.vim
47 # syntax file.
48 keywords = [
49 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let',
50 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote',
51 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax',
52 'let-syntax', 'letrec-syntax', 'syntax-rules'
53 ]
54 builtins = [
55 '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle',
56 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan',
57 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr',
58 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr',
59 'cadr', 'call-with-current-continuation', 'call-with-input-file',
60 'call-with-output-file', 'call-with-values', 'call/cc', 'car',
61 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar',
62 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr',
63 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?',
64 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase',
65 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase',
66 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?',
67 'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port',
68 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port',
69 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?',
70 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp',
71 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part',
72 'inexact->exact', 'inexact?', 'input-port?', 'integer->char',
73 'integer?', 'interaction-environment', 'lcm', 'length', 'list',
74 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?',
75 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular',
76 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv',
77 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment',
78 'null?', 'number->string', 'number?', 'numerator', 'odd?',
79 'open-input-file', 'open-output-file', 'output-port?', 'pair?',
80 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient',
81 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?',
82 'remainder', 'reverse', 'round', 'scheme-report-environment',
83 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list',
84 'string->number', 'string->symbol', 'string-append', 'string-ci<=?',
85 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
86 'string-copy', 'string-fill!', 'string-length', 'string-ref',
87 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?',
88 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?',
89 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values',
90 'vector', 'vector->list', 'vector-fill!', 'vector-length',
91 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file',
92 'with-output-to-file', 'write', 'write-char', 'zero?'
93 ]
94
95 # valid names for identifiers
96 # well, names can only not consist fully of numbers
97 # but this should be good enough for now
98 valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+'
99
100 tokens = {
101 'root' : [
102 # the comments - always starting with semicolon
103 # and going to the end of the line
104 (r';.*$', Comment.Single),
105
106 # whitespaces - usually not relevant
107 (r'\s+', Text),
108
109 # numbers
110 (r'-?\d+\.\d+', Number.Float),
111 (r'-?\d+', Number.Integer),
112 # support for uncommon kinds of numbers -
113 # have to figure out what the characters mean
114 #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
115
116 # strings, symbols and characters
117 (r'"(\\\\|\\"|[^"])*"', String),
118 (r"'" + valid_name, String.Symbol),
119 (r"#\\([()/'\".'_!ยง$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char),
120
121 # constants
122 (r'(#t|#f)', Name.Constant),
123
124 # special operators
125 (r"('|#|`|,@|,|\.)", Operator),
126
127 # highlight the keywords
128 ('(%s)' % '|'.join([
129 re.escape(entry) + ' ' for entry in keywords]),
130 Keyword
131 ),
132
133 # first variable in a quoted string like
134 # '(this is syntactic sugar)
135 (r"(?<='\()" + valid_name, Name.Variable),
136 (r"(?<=#\()" + valid_name, Name.Variable),
137
138 # highlight the builtins
139 ("(?<=\()(%s)" % '|'.join([
140 re.escape(entry) + ' ' for entry in builtins]),
141 Name.Builtin
142 ),
143
144 # the remaining functions
145 (r'(?<=\()' + valid_name, Name.Function),
146 # find the remaining variables
147 (valid_name, Name.Variable),
148
149 # the famous parentheses!
150 (r'(\(|\))', Punctuation),
151 ],
152 }
153
154
155 class CommonLispLexer(RegexLexer):
156 """
157 A Common Lisp lexer.
158
159 *New in Pygments 0.9.*
160 """
161 name = 'Common Lisp'
162 aliases = ['common-lisp', 'cl']
163 filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too
164 mimetypes = ['text/x-common-lisp']
165
166 flags = re.IGNORECASE | re.MULTILINE
167
168 ### couple of useful regexes
169
170 # characters that are not macro-characters and can be used to begin a symbol
171 nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]'
172 constituent = nonmacro + '|[#.:]'
173 terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters
174
175 ### symbol token, reverse-engineered from hyperspec
176 # Take a deep breath...
177 symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent)
178
179 def __init__(self, **options):
180 from pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \
181 SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \
182 BUILTIN_TYPES, BUILTIN_CLASSES
183 self.builtin_function = BUILTIN_FUNCTIONS
184 self.special_forms = SPECIAL_FORMS
185 self.macros = MACROS
186 self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS
187 self.declarations = DECLARATIONS
188 self.builtin_types = BUILTIN_TYPES
189 self.builtin_classes = BUILTIN_CLASSES
190 RegexLexer.__init__(self, **options)
191
192 def get_tokens_unprocessed(self, text):
193 stack = ['root']
194 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack):
195 if token is Name.Variable:
196 if value in self.builtin_function:
197 yield index, Name.Builtin, value
198 continue
199 if value in self.special_forms:
200 yield index, Keyword, value
201 continue
202 if value in self.macros:
203 yield index, Name.Builtin, value
204 continue
205 if value in self.lambda_list_keywords:
206 yield index, Keyword, value
207 continue
208 if value in self.declarations:
209 yield index, Keyword, value
210 continue
211 if value in self.builtin_types:
212 yield index, Keyword.Type, value
213 continue
214 if value in self.builtin_classes:
215 yield index, Name.Class, value
216 continue
217 yield index, token, value
218
219 tokens = {
220 'root' : [
221 ('', Text, 'body'),
222 ],
223 'multiline-comment' : [
224 (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19)
225 (r'\|#', Comment.Multiline, '#pop'),
226 (r'[^|#]+', Comment.Multiline),
227 (r'[|#]', Comment.Multiline),
228 ],
229 'commented-form' : [
230 (r'\(', Comment.Preproc, '#push'),
231 (r'\)', Comment.Preproc, '#pop'),
232 (r'[^()]+', Comment.Preproc),
233 ],
234 'body' : [
235 # whitespace
236 (r'\s+', Text),
237
238 # single-line comment
239 (r';.*$', Comment.Single),
240
241 # multi-line comment
242 (r'#\|', Comment.Multiline, 'multiline-comment'),
243
244 # encoding comment (?)
245 (r'#\d*Y.*$', Comment.Special),
246
247 # strings and characters
248 (r'"(\\.|[^"])*"', String),
249 # quoting
250 (r":" + symbol, String.Symbol),
251 (r"'" + symbol, String.Symbol),
252 (r"'", Operator),
253 (r"`", Operator),
254
255 # decimal numbers
256 (r'[-+]?\d+\.?' + terminated, Number.Integer),
257 (r'[-+]?\d+/\d+' + terminated, Number),
258 (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \
259 + terminated, Number.Float),
260
261 # sharpsign strings and characters
262 (r"#\\." + terminated, String.Char),
263 (r"#\\" + symbol, String.Char),
264
265 # vector
266 (r'#\(', Operator, 'body'),
267
268 # bitstring
269 (r'#\d*\*[01]*', Literal.Other),
270
271 # uninterned symbol
272 (r'#:' + symbol, String.Symbol),
273
274 # read-time and load-time evaluation
275 (r'#[.,]', Operator),
276
277 # function shorthand
278 (r'#\'', Name.Function),
279
280 # binary rational
281 (r'#[bB][+-]?[01]+(/[01]+)?', Number),
282
283 # octal rational
284 (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct),
285
286 # hex rational
287 (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex),
288
289 # radix rational
290 (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number),
291
292 # complex
293 (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'),
294
295 # array
296 (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'),
297
298 # structure
299 (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'),
300
301 # path
302 (r'#[pP]?"(\\.|[^"])*"', Literal.Other),
303
304 # reference
305 (r'#\d+=', Operator),
306 (r'#\d+#', Operator),
307
308 # read-time comment
309 (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'),
310
311 # read-time conditional
312 (r'#[+-]', Operator),
313
314 # special operators that should have been parsed already
315 (r'(,@|,|\.)', Operator),
316
317 # special constants
318 (r'(t|nil)' + terminated, Name.Constant),
319
320 # functions and variables
321 (r'\*' + symbol + '\*', Name.Variable.Global),
322 (symbol, Name.Variable),
323
324 # parentheses
325 (r'\(', Punctuation, 'body'),
326 (r'\)', Punctuation, '#pop'),
327 ],
328 }
329
330
331 class HaskellLexer(RegexLexer):
332 """
333 A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
334
335 *New in Pygments 0.8.*
336 """
337 name = 'Haskell'
338 aliases = ['haskell', 'hs']
339 filenames = ['*.hs']
340 mimetypes = ['text/x-haskell']
341
342 reserved = ['case','class','data','default','deriving','do','else',
343 'if','in','infix[lr]?','instance',
344 'let','newtype','of','then','type','where','_']
345 ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK',
346 'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE',
347 'DC[1-4]','NAK','SYN','ETB','CAN',
348 'EM','SUB','ESC','[FGRU]S','SP','DEL']
349
350 tokens = {
351 'root': [
352 # Whitespace:
353 (r'\s+', Text),
354 #(r'--\s*|.*$', Comment.Doc),
355 (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single),
356 (r'{-', Comment.Multiline, 'comment'),
357 # Lexemes:
358 # Identifiers
359 (r'\bimport\b', Keyword.Reserved, 'import'),
360 (r'\bmodule\b', Keyword.Reserved, 'module'),
361 (r'\berror\b', Name.Exception),
362 (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
363 (r'^[_a-z][\w\']*', Name.Function),
364 (r'[_a-z][\w\']*', Name),
365 (r'[A-Z][\w\']*', Keyword.Type),
366 # Operators
367 (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
368 (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
369 (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
370 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
371 # Numbers
372 (r'\d+[eE][+-]?\d+', Number.Float),
373 (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
374 (r'0[oO][0-7]+', Number.Oct),
375 (r'0[xX][\da-fA-F]+', Number.Hex),
376 (r'\d+', Number.Integer),
377 # Character/String Literals
378 (r"'", String.Char, 'character'),
379 (r'"', String, 'string'),
380 # Special
381 (r'\[\]', Keyword.Type),
382 (r'\(\)', Name.Builtin),
383 (r'[][(),;`{}]', Punctuation),
384 ],
385 'import': [
386 # Import statements
387 (r'\s+', Text),
388 # after "funclist" state
389 (r'\)', Punctuation, '#pop'),
390 (r'qualified\b', Keyword),
391 # import X as Y
392 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)',
393 bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
394 # import X hiding (functions)
395 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()',
396 bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
397 # import X (functions)
398 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
399 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
400 # import X
401 (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'),
402 ],
403 'module': [
404 (r'\s+', Text),
405 (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
406 bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
407 (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'),
408 ],
409 'funclist': [
410 (r'\s+', Text),
411 (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
412 (r'[_a-z][\w\']+', Name.Function),
413 (r'--.*$', Comment.Single),
414 (r'{-', Comment.Multiline, 'comment'),
415 (r',', Punctuation),
416 (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
417 # (HACK, but it makes sense to push two instances, believe me)
418 (r'\(', Punctuation, ('funclist', 'funclist')),
419 (r'\)', Punctuation, '#pop:2'),
420 ],
421 'comment': [
422 # Multiline Comments
423 (r'[^-{}]+', Comment.Multiline),
424 (r'{-', Comment.Multiline, '#push'),
425 (r'-}', Comment.Multiline, '#pop'),
426 (r'[-{}]', Comment.Multiline),
427 ],
428 'character': [
429 # Allows multi-chars, incorrectly.
430 (r"[^\\']", String.Char),
431 (r"\\", String.Escape, 'escape'),
432 ("'", String.Char, '#pop'),
433 ],
434 'string': [
435 (r'[^\\"]+', String),
436 (r"\\", String.Escape, 'escape'),
437 ('"', String, '#pop'),
438 ],
439 'escape': [
440 (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
441 (r'\^[][A-Z@\^_]', String.Escape, '#pop'),
442 ('|'.join(ascii), String.Escape, '#pop'),
443 (r'o[0-7]+', String.Escape, '#pop'),
444 (r'x[\da-fA-F]+', String.Escape, '#pop'),
445 (r'\d+', String.Escape, '#pop'),
446 (r'\n\s+\\', String.Escape, '#pop'),
447 ],
448 }
449
450
451 line_re = re.compile('.*?\n')
452 bird_re = re.compile(r'(>[ \t]*)(.*\n)')
453
454 class LiterateHaskellLexer(Lexer):
455 """
456 For Literate Haskell (Bird-style or LaTeX) source.
457
458 Additional options accepted:
459
460 `litstyle`
461 If given, must be ``"bird"`` or ``"latex"``. If not given, the style
462 is autodetected: if the first non-whitespace character in the source
463 is a backslash or percent character, LaTeX is assumed, else Bird.
464
465 *New in Pygments 0.9.*
466 """
467 name = 'Literate Haskell'
468 aliases = ['lhs', 'literate-haskell']
469 filenames = ['*.lhs']
470 mimetypes = ['text/x-literate-haskell']
471
472 def get_tokens_unprocessed(self, text):
473 hslexer = HaskellLexer(**self.options)
474
475 style = self.options.get('litstyle')
476 if style is None:
477 style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird'
478
479 code = ''
480 insertions = []
481 if style == 'bird':
482 # bird-style
483 for match in line_re.finditer(text):
484 line = match.group()
485 m = bird_re.match(line)
486 if m:
487 insertions.append((len(code),
488 [(0, Comment.Special, m.group(1))]))
489 code += m.group(2)
490 else:
491 insertions.append((len(code), [(0, Text, line)]))
492 else:
493 # latex-style
494 from pygments.lexers.text import TexLexer
495 lxlexer = TexLexer(**self.options)
496
497 codelines = 0
498 latex = ''
499 for match in line_re.finditer(text):
500 line = match.group()
501 if codelines:
502 if line.lstrip().startswith('\\end{code}'):
503 codelines = 0
504 latex += line
505 else:
506 code += line
507 elif line.lstrip().startswith('\\begin{code}'):
508 codelines = 1
509 latex += line
510 insertions.append((len(code),
511 list(lxlexer.get_tokens_unprocessed(latex))))
512 latex = ''
513 else:
514 latex += line
515 insertions.append((len(code),
516 list(lxlexer.get_tokens_unprocessed(latex))))
517 for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
518 yield item
519
520
521 class OcamlLexer(RegexLexer):
522 """
523 For the OCaml language.
524
525 *New in Pygments 0.7.*
526 """
527
528 name = 'OCaml'
529 aliases = ['ocaml']
530 filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
531 mimetypes = ['text/x-ocaml']
532
533 keywords = [
534 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
535 'downto', 'else', 'end', 'exception', 'external', 'false',
536 'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
537 'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
538 'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
539 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
540 'type', 'val', 'virtual', 'when', 'while', 'with'
541 ]
542 keyopts = [
543 '!=','#','&','&&','\(','\)','\*','\+',',','-',
544 '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<',
545 '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|',
546 ']','_','`','{','{<','\|','\|]','}','~'
547 ]
548
549 operators = r'[!$%&*+\./:<=>?@^|~-]'
550 word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
551 prefix_syms = r'[!?~]'
552 infix_syms = r'[=<>@^|&+\*/$%-]'
553 primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array']
554
555 tokens = {
556 'escape-sequence': [
557 (r'\\[\"\'ntbr]', String.Escape),
558 (r'\\[0-9]{3}', String.Escape),
559 (r'\\x[0-9a-fA-F]{2}', String.Escape),
560 ],
561 'root': [
562 (r'\s+', Text),
563 (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
564 (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
565 Name.Namespace, 'dotted'),
566 (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
567 (r'\(\*', Comment, 'comment'),
568 (r'\b(%s)\b' % '|'.join(keywords), Keyword),
569 (r'(%s)' % '|'.join(keyopts), Operator),
570 (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
571 (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
572 (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
573
574 (r"[^\W\d][\w']*", Name),
575
576 (r'\d[\d_]*', Number.Integer),
577 (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
578 (r'0[oO][0-7][0-7_]*', Number.Oct),
579 (r'0[bB][01][01_]*', Number.Binary),
580 (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
581
582 (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
583 String.Char),
584 (r"'.'", String.Char),
585 (r"'", Keyword), # a stray quote is another syntax element
586
587 (r'"', String.Double, 'string'),
588
589 (r'[~?][a-z][\w\']*:', Name.Variable),
590 ],
591 'comment': [
592 (r'[^(*)]+', Comment),
593 (r'\(\*', Comment, '#push'),
594 (r'\*\)', Comment, '#pop'),
595 (r'[(*)]', Comment),
596 ],
597 'string': [
598 (r'[^\\"]+', String.Double),
599 include('escape-sequence'),
600 (r'\\\n', String.Double),
601 (r'"', String.Double, '#pop'),
602 ],
603 'dotted': [
604 (r'\s+', Text),
605 (r'\.', Punctuation),
606 (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
607 (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
608 (r'[a-z][a-z0-9_\']*', Name, '#pop'),
609 ],
610 }
611
612
613 class ErlangLexer(RegexLexer):
614 """
615 For the Erlang functional programming language.
616
617 Blame Jeremy Thurgood (http://jerith.za.net/).
618
619 *New in Pygments 0.9.*
620 """
621
622 name = 'Erlang'
623 aliases = ['erlang']
624 filenames = ['*.erl', '*.hrl']
625 mimetypes = ['text/x-erlang']
626
627 keywords = [
628 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
629 'let', 'of', 'query', 'receive', 'try', 'when',
630 ]
631
632 builtins = [ # See erlang(3) man page
633 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
634 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
635 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
636 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
637 'float', 'float_to_list', 'fun_info', 'fun_to_list',
638 'function_exported', 'garbage_collect', 'get', 'get_keys',
639 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
640 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
641 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
642 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
643 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
644 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
645 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
646 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
647 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
648 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
649 'pid_to_list', 'port_close', 'port_command', 'port_connect',
650 'port_control', 'port_call', 'port_info', 'port_to_list',
651 'process_display', 'process_flag', 'process_info', 'purge_module',
652 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
653 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
654 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
655 'spawn_opt', 'split_binary', 'start_timer', 'statistics',
656 'suspend_process', 'system_flag', 'system_info', 'system_monitor',
657 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
658 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
659 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
660 ]
661
662 operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
663 word_operators = [
664 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
665 'div', 'not', 'or', 'orelse', 'rem', 'xor'
666 ]
667
668 atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')"
669
670 variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)'
671
672 escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))'
673
674 macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
675
676 base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
677
678 tokens = {
679 'root': [
680 (r'\s+', Text),
681 (r'%.*\n', Comment),
682 ('(' + '|'.join(keywords) + r')\b', Keyword),
683 ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
684 ('(' + '|'.join(word_operators) + r')\b', Operator.Word),
685 (r'^-', Punctuation, 'directive'),
686 (operators, Operator),
687 (r'"', String, 'string'),
688 (r'<<', Name.Label),
689 (r'>>', Name.Label),
690 (r'('+atom_re+')(:)', bygroups(Name.Namespace, Punctuation)),
691 (r'^('+atom_re+r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)),
692 (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer),
693 (r'[+-]?\d+', Number.Integer),
694 (r'[+-]?\d+.\d+', Number.Float),
695 (r'[][:_@\".{}()|;,]', Punctuation),
696 (variable_re, Name.Variable),
697 (atom_re, Name),
698 (r'\?'+macro_re, Name.Constant),
699 (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
700 (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
701 ],
702 'string': [
703 (escape_re, String.Escape),
704 (r'"', String, '#pop'),
705 (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol),
706 (r'[^"\\~]+', String),
707 (r'~', String),
708 ],
709 'directive': [
710 (r'(define)(\s*)(\()('+macro_re+r')',
711 bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
712 (r'(record)(\s*)(\()('+macro_re+r')',
713 bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
714 (atom_re, Name.Entity, '#pop'),
715 ],
716 }
717
718
719 class ErlangShellLexer(Lexer):
720 """
721 Shell sessions in erl (for Erlang code).
722
723 *New in Pygments 1.1.*
724 """
725 name = 'Erlang erl session'
726 aliases = ['erl']
727 filenames = ['*.erl-sh']
728 mimetypes = ['text/x-erl-shellsession']
729
730 _prompt_re = re.compile(r'\d+>(?=\s|\Z)')
731
732 def get_tokens_unprocessed(self, text):
733 erlexer = ErlangLexer(**self.options)
734
735 curcode = ''
736 insertions = []
737 for match in line_re.finditer(text):
738 line = match.group()
739 m = self._prompt_re.match(line)
740 if m is not None:
741 end = m.end()
742 insertions.append((len(curcode),
743 [(0, Generic.Prompt, line[:end])]))
744 curcode += line[end:]
745 else:
746 if curcode:
747 for item in do_insertions(insertions,
748 erlexer.get_tokens_unprocessed(curcode)):
749 yield item
750 curcode = ''
751 insertions = []
752 if line.startswith('*'):
753 yield match.start(), Generic.Traceback, line
754 else:
755 yield match.start(), Generic.Output, line
756 if curcode:
757 for item in do_insertions(insertions,
758 erlexer.get_tokens_unprocessed(curcode)):
759 yield item
760

eric ide

mercurial