diff -r 000000000000 -r de9c2efb9d02 ThirdParty/Pygments/pygments/lexers/functional.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ThirdParty/Pygments/pygments/lexers/functional.py Mon Dec 28 16:03:33 2009 +0000 @@ -0,0 +1,760 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.functional + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for functional languages. + + :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re +try: + set +except NameError: + from sets import Set as set + +from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions +from pygments.token import Text, Comment, Operator, Keyword, Name, \ + String, Number, Punctuation, Literal, Generic + + +__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer', + 'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer'] + + +class SchemeLexer(RegexLexer): + """ + A Scheme lexer, parsing a stream and outputting the tokens + needed to highlight scheme code. + This lexer could be most probably easily subclassed to parse + other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp. + + This parser is checked with pastes from the LISP pastebin + at http://paste.lisp.org/ to cover as much syntax as possible. + + It supports the full Scheme syntax as defined in R5RS. + + *New in Pygments 0.6.* + """ + name = 'Scheme' + aliases = ['scheme', 'scm'] + filenames = ['*.scm'] + mimetypes = ['text/x-scheme', 'application/x-scheme'] + + # list of known keywords and builtins taken form vim 6.4 scheme.vim + # syntax file. + keywords = [ + 'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let', + 'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote', + 'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax', + 'let-syntax', 'letrec-syntax', 'syntax-rules' + ] + builtins = [ + '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle', + 'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan', + 'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr', + 'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr', + 'cadr', 'call-with-current-continuation', 'call-with-input-file', + 'call-with-output-file', 'call-with-values', 'call/cc', 'car', + 'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar', + 'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr', + 'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?', + 'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase', + 'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase', + 'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?', + 'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port', + 'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port', + 'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?', + 'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp', + 'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part', + 'inexact->exact', 'inexact?', 'input-port?', 'integer->char', + 'integer?', 'interaction-environment', 'lcm', 'length', 'list', + 'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?', + 'load', 'log', 'magnitude', 'make-polar', 'make-rectangular', + 'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv', + 'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment', + 'null?', 'number->string', 'number?', 'numerator', 'odd?', + 'open-input-file', 'open-output-file', 'output-port?', 'pair?', + 'peek-char', 'port?', 'positive?', 'procedure?', 'quotient', + 'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?', + 'remainder', 'reverse', 'round', 'scheme-report-environment', + 'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list', + 'string->number', 'string->symbol', 'string-append', 'string-ci<=?', + 'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?', + 'string-copy', 'string-fill!', 'string-length', 'string-ref', + 'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?', + 'string>?', 'string?', 'substring', 'symbol->string', 'symbol?', + 'tan', 'transcript-off', 'transcript-on', 'truncate', 'values', + 'vector', 'vector->list', 'vector-fill!', 'vector-length', + 'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file', + 'with-output-to-file', 'write', 'write-char', 'zero?' + ] + + # valid names for identifiers + # well, names can only not consist fully of numbers + # but this should be good enough for now + valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+' + + tokens = { + 'root' : [ + # the comments - always starting with semicolon + # and going to the end of the line + (r';.*$', Comment.Single), + + # whitespaces - usually not relevant + (r'\s+', Text), + + # numbers + (r'-?\d+\.\d+', Number.Float), + (r'-?\d+', Number.Integer), + # support for uncommon kinds of numbers - + # have to figure out what the characters mean + #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), + + # strings, symbols and characters + (r'"(\\\\|\\"|[^"])*"', String), + (r"'" + valid_name, String.Symbol), + (r"#\\([()/'\".'_!ยง$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char), + + # constants + (r'(#t|#f)', Name.Constant), + + # special operators + (r"('|#|`|,@|,|\.)", Operator), + + # highlight the keywords + ('(%s)' % '|'.join([ + re.escape(entry) + ' ' for entry in keywords]), + Keyword + ), + + # first variable in a quoted string like + # '(this is syntactic sugar) + (r"(?<='\()" + valid_name, Name.Variable), + (r"(?<=#\()" + valid_name, Name.Variable), + + # highlight the builtins + ("(?<=\()(%s)" % '|'.join([ + re.escape(entry) + ' ' for entry in builtins]), + Name.Builtin + ), + + # the remaining functions + (r'(?<=\()' + valid_name, Name.Function), + # find the remaining variables + (valid_name, Name.Variable), + + # the famous parentheses! + (r'(\(|\))', Punctuation), + ], + } + + +class CommonLispLexer(RegexLexer): + """ + A Common Lisp lexer. + + *New in Pygments 0.9.* + """ + name = 'Common Lisp' + aliases = ['common-lisp', 'cl'] + filenames = ['*.cl', '*.lisp', '*.el'] # use for Elisp too + mimetypes = ['text/x-common-lisp'] + + flags = re.IGNORECASE | re.MULTILINE + + ### couple of useful regexes + + # characters that are not macro-characters and can be used to begin a symbol + nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]' + constituent = nonmacro + '|[#.:]' + terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters + + ### symbol token, reverse-engineered from hyperspec + # Take a deep breath... + symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent) + + def __init__(self, **options): + from pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \ + SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \ + BUILTIN_TYPES, BUILTIN_CLASSES + self.builtin_function = BUILTIN_FUNCTIONS + self.special_forms = SPECIAL_FORMS + self.macros = MACROS + self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS + self.declarations = DECLARATIONS + self.builtin_types = BUILTIN_TYPES + self.builtin_classes = BUILTIN_CLASSES + RegexLexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + stack = ['root'] + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack): + if token is Name.Variable: + if value in self.builtin_function: + yield index, Name.Builtin, value + continue + if value in self.special_forms: + yield index, Keyword, value + continue + if value in self.macros: + yield index, Name.Builtin, value + continue + if value in self.lambda_list_keywords: + yield index, Keyword, value + continue + if value in self.declarations: + yield index, Keyword, value + continue + if value in self.builtin_types: + yield index, Keyword.Type, value + continue + if value in self.builtin_classes: + yield index, Name.Class, value + continue + yield index, token, value + + tokens = { + 'root' : [ + ('', Text, 'body'), + ], + 'multiline-comment' : [ + (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19) + (r'\|#', Comment.Multiline, '#pop'), + (r'[^|#]+', Comment.Multiline), + (r'[|#]', Comment.Multiline), + ], + 'commented-form' : [ + (r'\(', Comment.Preproc, '#push'), + (r'\)', Comment.Preproc, '#pop'), + (r'[^()]+', Comment.Preproc), + ], + 'body' : [ + # whitespace + (r'\s+', Text), + + # single-line comment + (r';.*$', Comment.Single), + + # multi-line comment + (r'#\|', Comment.Multiline, 'multiline-comment'), + + # encoding comment (?) + (r'#\d*Y.*$', Comment.Special), + + # strings and characters + (r'"(\\.|[^"])*"', String), + # quoting + (r":" + symbol, String.Symbol), + (r"'" + symbol, String.Symbol), + (r"'", Operator), + (r"`", Operator), + + # decimal numbers + (r'[-+]?\d+\.?' + terminated, Number.Integer), + (r'[-+]?\d+/\d+' + terminated, Number), + (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \ + + terminated, Number.Float), + + # sharpsign strings and characters + (r"#\\." + terminated, String.Char), + (r"#\\" + symbol, String.Char), + + # vector + (r'#\(', Operator, 'body'), + + # bitstring + (r'#\d*\*[01]*', Literal.Other), + + # uninterned symbol + (r'#:' + symbol, String.Symbol), + + # read-time and load-time evaluation + (r'#[.,]', Operator), + + # function shorthand + (r'#\'', Name.Function), + + # binary rational + (r'#[bB][+-]?[01]+(/[01]+)?', Number), + + # octal rational + (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct), + + # hex rational + (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex), + + # radix rational + (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number), + + # complex + (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'), + + # array + (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'), + + # structure + (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'), + + # path + (r'#[pP]?"(\\.|[^"])*"', Literal.Other), + + # reference + (r'#\d+=', Operator), + (r'#\d+#', Operator), + + # read-time comment + (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'), + + # read-time conditional + (r'#[+-]', Operator), + + # special operators that should have been parsed already + (r'(,@|,|\.)', Operator), + + # special constants + (r'(t|nil)' + terminated, Name.Constant), + + # functions and variables + (r'\*' + symbol + '\*', Name.Variable.Global), + (symbol, Name.Variable), + + # parentheses + (r'\(', Punctuation, 'body'), + (r'\)', Punctuation, '#pop'), + ], + } + + +class HaskellLexer(RegexLexer): + """ + A Haskell lexer based on the lexemes defined in the Haskell 98 Report. + + *New in Pygments 0.8.* + """ + name = 'Haskell' + aliases = ['haskell', 'hs'] + filenames = ['*.hs'] + mimetypes = ['text/x-haskell'] + + reserved = ['case','class','data','default','deriving','do','else', + 'if','in','infix[lr]?','instance', + 'let','newtype','of','then','type','where','_'] + ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK', + 'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE', + 'DC[1-4]','NAK','SYN','ETB','CAN', + 'EM','SUB','ESC','[FGRU]S','SP','DEL'] + + tokens = { + 'root': [ + # Whitespace: + (r'\s+', Text), + #(r'--\s*|.*$', Comment.Doc), + (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single), + (r'{-', Comment.Multiline, 'comment'), + # Lexemes: + # Identifiers + (r'\bimport\b', Keyword.Reserved, 'import'), + (r'\bmodule\b', Keyword.Reserved, 'module'), + (r'\berror\b', Name.Exception), + (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved), + (r'^[_a-z][\w\']*', Name.Function), + (r'[_a-z][\w\']*', Name), + (r'[A-Z][\w\']*', Keyword.Type), + # Operators + (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator + (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials + (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators + (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators + # Numbers + (r'\d+[eE][+-]?\d+', Number.Float), + (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float), + (r'0[oO][0-7]+', Number.Oct), + (r'0[xX][\da-fA-F]+', Number.Hex), + (r'\d+', Number.Integer), + # Character/String Literals + (r"'", String.Char, 'character'), + (r'"', String, 'string'), + # Special + (r'\[\]', Keyword.Type), + (r'\(\)', Name.Builtin), + (r'[][(),;`{}]', Punctuation), + ], + 'import': [ + # Import statements + (r'\s+', Text), + # after "funclist" state + (r'\)', Punctuation, '#pop'), + (r'qualified\b', Keyword), + # import X as Y + (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)', + bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'), + # import X hiding (functions) + (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()', + bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'), + # import X (functions) + (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', + bygroups(Name.Namespace, Text, Punctuation), 'funclist'), + # import X + (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'), + ], + 'module': [ + (r'\s+', Text), + (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()', + bygroups(Name.Namespace, Text, Punctuation), 'funclist'), + (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'), + ], + 'funclist': [ + (r'\s+', Text), + (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type), + (r'[_a-z][\w\']+', Name.Function), + (r'--.*$', Comment.Single), + (r'{-', Comment.Multiline, 'comment'), + (r',', Punctuation), + (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), + # (HACK, but it makes sense to push two instances, believe me) + (r'\(', Punctuation, ('funclist', 'funclist')), + (r'\)', Punctuation, '#pop:2'), + ], + 'comment': [ + # Multiline Comments + (r'[^-{}]+', Comment.Multiline), + (r'{-', Comment.Multiline, '#push'), + (r'-}', Comment.Multiline, '#pop'), + (r'[-{}]', Comment.Multiline), + ], + 'character': [ + # Allows multi-chars, incorrectly. + (r"[^\\']", String.Char), + (r"\\", String.Escape, 'escape'), + ("'", String.Char, '#pop'), + ], + 'string': [ + (r'[^\\"]+', String), + (r"\\", String.Escape, 'escape'), + ('"', String, '#pop'), + ], + 'escape': [ + (r'[abfnrtv"\'&\\]', String.Escape, '#pop'), + (r'\^[][A-Z@\^_]', String.Escape, '#pop'), + ('|'.join(ascii), String.Escape, '#pop'), + (r'o[0-7]+', String.Escape, '#pop'), + (r'x[\da-fA-F]+', String.Escape, '#pop'), + (r'\d+', String.Escape, '#pop'), + (r'\n\s+\\', String.Escape, '#pop'), + ], + } + + +line_re = re.compile('.*?\n') +bird_re = re.compile(r'(>[ \t]*)(.*\n)') + +class LiterateHaskellLexer(Lexer): + """ + For Literate Haskell (Bird-style or LaTeX) source. + + Additional options accepted: + + `litstyle` + If given, must be ``"bird"`` or ``"latex"``. If not given, the style + is autodetected: if the first non-whitespace character in the source + is a backslash or percent character, LaTeX is assumed, else Bird. + + *New in Pygments 0.9.* + """ + name = 'Literate Haskell' + aliases = ['lhs', 'literate-haskell'] + filenames = ['*.lhs'] + mimetypes = ['text/x-literate-haskell'] + + def get_tokens_unprocessed(self, text): + hslexer = HaskellLexer(**self.options) + + style = self.options.get('litstyle') + if style is None: + style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird' + + code = '' + insertions = [] + if style == 'bird': + # bird-style + for match in line_re.finditer(text): + line = match.group() + m = bird_re.match(line) + if m: + insertions.append((len(code), + [(0, Comment.Special, m.group(1))])) + code += m.group(2) + else: + insertions.append((len(code), [(0, Text, line)])) + else: + # latex-style + from pygments.lexers.text import TexLexer + lxlexer = TexLexer(**self.options) + + codelines = 0 + latex = '' + for match in line_re.finditer(text): + line = match.group() + if codelines: + if line.lstrip().startswith('\\end{code}'): + codelines = 0 + latex += line + else: + code += line + elif line.lstrip().startswith('\\begin{code}'): + codelines = 1 + latex += line + insertions.append((len(code), + list(lxlexer.get_tokens_unprocessed(latex)))) + latex = '' + else: + latex += line + insertions.append((len(code), + list(lxlexer.get_tokens_unprocessed(latex)))) + for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)): + yield item + + +class OcamlLexer(RegexLexer): + """ + For the OCaml language. + + *New in Pygments 0.7.* + """ + + name = 'OCaml' + aliases = ['ocaml'] + filenames = ['*.ml', '*.mli', '*.mll', '*.mly'] + mimetypes = ['text/x-ocaml'] + + keywords = [ + 'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done', + 'downto', 'else', 'end', 'exception', 'external', 'false', + 'for', 'fun', 'function', 'functor', 'if', 'in', 'include', + 'inherit', 'initializer', 'lazy', 'let', 'match', 'method', + 'module', 'mutable', 'new', 'object', 'of', 'open', 'private', + 'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try', + 'type', 'val', 'virtual', 'when', 'while', 'with' + ] + keyopts = [ + '!=','#','&','&&','\(','\)','\*','\+',',','-', + '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<', + '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|', + ']','_','`','{','{<','\|','\|]','}','~' + ] + + operators = r'[!$%&*+\./:<=>?@^|~-]' + word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or'] + prefix_syms = r'[!?~]' + infix_syms = r'[=<>@^|&+\*/$%-]' + primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array'] + + tokens = { + 'escape-sequence': [ + (r'\\[\"\'ntbr]', String.Escape), + (r'\\[0-9]{3}', String.Escape), + (r'\\x[0-9a-fA-F]{2}', String.Escape), + ], + 'root': [ + (r'\s+', Text), + (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo), + (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)', + Name.Namespace, 'dotted'), + (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class), + (r'\(\*', Comment, 'comment'), + (r'\b(%s)\b' % '|'.join(keywords), Keyword), + (r'(%s)' % '|'.join(keyopts), Operator), + (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator), + (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word), + (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type), + + (r"[^\W\d][\w']*", Name), + + (r'\d[\d_]*', Number.Integer), + (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex), + (r'0[oO][0-7][0-7_]*', Number.Oct), + (r'0[bB][01][01_]*', Number.Binary), + (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float), + + (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'", + String.Char), + (r"'.'", String.Char), + (r"'", Keyword), # a stray quote is another syntax element + + (r'"', String.Double, 'string'), + + (r'[~?][a-z][\w\']*:', Name.Variable), + ], + 'comment': [ + (r'[^(*)]+', Comment), + (r'\(\*', Comment, '#push'), + (r'\*\)', Comment, '#pop'), + (r'[(*)]', Comment), + ], + 'string': [ + (r'[^\\"]+', String.Double), + include('escape-sequence'), + (r'\\\n', String.Double), + (r'"', String.Double, '#pop'), + ], + 'dotted': [ + (r'\s+', Text), + (r'\.', Punctuation), + (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace), + (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'), + (r'[a-z][a-z0-9_\']*', Name, '#pop'), + ], + } + + +class ErlangLexer(RegexLexer): + """ + For the Erlang functional programming language. + + Blame Jeremy Thurgood (http://jerith.za.net/). + + *New in Pygments 0.9.* + """ + + name = 'Erlang' + aliases = ['erlang'] + filenames = ['*.erl', '*.hrl'] + mimetypes = ['text/x-erlang'] + + keywords = [ + 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', + 'let', 'of', 'query', 'receive', 'try', 'when', + ] + + builtins = [ # See erlang(3) man page + 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', + 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', + 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', + 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', + 'float', 'float_to_list', 'fun_info', 'fun_to_list', + 'function_exported', 'garbage_collect', 'get', 'get_keys', + 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', + 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', + 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', + 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', + 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', + 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', + 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', + 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', + 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', + 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', + 'pid_to_list', 'port_close', 'port_command', 'port_connect', + 'port_control', 'port_call', 'port_info', 'port_to_list', + 'process_display', 'process_flag', 'process_info', 'purge_module', + 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', + 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', + 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', + 'spawn_opt', 'split_binary', 'start_timer', 'statistics', + 'suspend_process', 'system_flag', 'system_info', 'system_monitor', + 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', + 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', + 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' + ] + + operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)' + word_operators = [ + 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', + 'div', 'not', 'or', 'orelse', 'rem', 'xor' + ] + + atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')" + + variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)' + + escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' + + macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' + + base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' + + tokens = { + 'root': [ + (r'\s+', Text), + (r'%.*\n', Comment), + ('(' + '|'.join(keywords) + r')\b', Keyword), + ('(' + '|'.join(builtins) + r')\b', Name.Builtin), + ('(' + '|'.join(word_operators) + r')\b', Operator.Word), + (r'^-', Punctuation, 'directive'), + (operators, Operator), + (r'"', String, 'string'), + (r'<<', Name.Label), + (r'>>', Name.Label), + (r'('+atom_re+')(:)', bygroups(Name.Namespace, Punctuation)), + (r'^('+atom_re+r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), + (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer), + (r'[+-]?\d+', Number.Integer), + (r'[+-]?\d+.\d+', Number.Float), + (r'[][:_@\".{}()|;,]', Punctuation), + (variable_re, Name.Variable), + (atom_re, Name), + (r'\?'+macro_re, Name.Constant), + (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), + (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), + ], + 'string': [ + (escape_re, String.Escape), + (r'"', String, '#pop'), + (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), + (r'[^"\\~]+', String), + (r'~', String), + ], + 'directive': [ + (r'(define)(\s*)(\()('+macro_re+r')', + bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), + (r'(record)(\s*)(\()('+macro_re+r')', + bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), + (atom_re, Name.Entity, '#pop'), + ], + } + + +class ErlangShellLexer(Lexer): + """ + Shell sessions in erl (for Erlang code). + + *New in Pygments 1.1.* + """ + name = 'Erlang erl session' + aliases = ['erl'] + filenames = ['*.erl-sh'] + mimetypes = ['text/x-erl-shellsession'] + + _prompt_re = re.compile(r'\d+>(?=\s|\Z)') + + def get_tokens_unprocessed(self, text): + erlexer = ErlangLexer(**self.options) + + curcode = '' + insertions = [] + for match in line_re.finditer(text): + line = match.group() + m = self._prompt_re.match(line) + if m is not None: + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + if line.startswith('*'): + yield match.start(), Generic.Traceback, line + else: + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item +