ThirdParty/Pygments/pygments/lexers/functional.py

changeset 0
de9c2efb9d02
child 684
2f29a0b6e1c7
diff -r 000000000000 -r de9c2efb9d02 ThirdParty/Pygments/pygments/lexers/functional.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ThirdParty/Pygments/pygments/lexers/functional.py	Mon Dec 28 16:03:33 2009 +0000
@@ -0,0 +1,760 @@
+# -*- coding: utf-8 -*-
+"""
+    pygments.lexers.functional
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for functional languages.
+
+    :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+try:
+    set
+except NameError:
+    from sets import Set as set
+
+from pygments.lexer import Lexer, RegexLexer, bygroups, include, do_insertions
+from pygments.token import Text, Comment, Operator, Keyword, Name, \
+     String, Number, Punctuation, Literal, Generic
+
+
+__all__ = ['SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'LiterateHaskellLexer',
+           'OcamlLexer', 'ErlangLexer', 'ErlangShellLexer']
+
+
+class SchemeLexer(RegexLexer):
+    """
+    A Scheme lexer, parsing a stream and outputting the tokens
+    needed to highlight scheme code.
+    This lexer could be most probably easily subclassed to parse
+    other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp.
+
+    This parser is checked with pastes from the LISP pastebin
+    at http://paste.lisp.org/ to cover as much syntax as possible.
+
+    It supports the full Scheme syntax as defined in R5RS.
+
+    *New in Pygments 0.6.*
+    """
+    name = 'Scheme'
+    aliases = ['scheme', 'scm']
+    filenames = ['*.scm']
+    mimetypes = ['text/x-scheme', 'application/x-scheme']
+
+    # list of known keywords and builtins taken form vim 6.4 scheme.vim
+    # syntax file.
+    keywords = [
+        'lambda', 'define', 'if', 'else', 'cond', 'and', 'or', 'case', 'let',
+        'let*', 'letrec', 'begin', 'do', 'delay', 'set!', '=>', 'quote',
+        'quasiquote', 'unquote', 'unquote-splicing', 'define-syntax',
+        'let-syntax', 'letrec-syntax', 'syntax-rules'
+    ]
+    builtins = [
+        '*', '+', '-', '/', '<', '<=', '=', '>', '>=', 'abs', 'acos', 'angle',
+        'append', 'apply', 'asin', 'assoc', 'assq', 'assv', 'atan',
+        'boolean?', 'caaaar', 'caaadr', 'caaar', 'caadar', 'caaddr', 'caadr',
+        'caar', 'cadaar', 'cadadr', 'cadar', 'caddar', 'cadddr', 'caddr',
+        'cadr', 'call-with-current-continuation', 'call-with-input-file',
+        'call-with-output-file', 'call-with-values', 'call/cc', 'car',
+        'cdaaar', 'cdaadr', 'cdaar', 'cdadar', 'cdaddr', 'cdadr', 'cdar',
+        'cddaar', 'cddadr', 'cddar', 'cdddar', 'cddddr', 'cdddr', 'cddr',
+        'cdr', 'ceiling', 'char->integer', 'char-alphabetic?', 'char-ci<=?',
+        'char-ci<?', 'char-ci=?', 'char-ci>=?', 'char-ci>?', 'char-downcase',
+        'char-lower-case?', 'char-numeric?', 'char-ready?', 'char-upcase',
+        'char-upper-case?', 'char-whitespace?', 'char<=?', 'char<?', 'char=?',
+        'char>=?', 'char>?', 'char?', 'close-input-port', 'close-output-port',
+        'complex?', 'cons', 'cos', 'current-input-port', 'current-output-port',
+        'denominator', 'display', 'dynamic-wind', 'eof-object?', 'eq?',
+        'equal?', 'eqv?', 'eval', 'even?', 'exact->inexact', 'exact?', 'exp',
+        'expt', 'floor', 'for-each', 'force', 'gcd', 'imag-part',
+        'inexact->exact', 'inexact?', 'input-port?', 'integer->char',
+        'integer?', 'interaction-environment', 'lcm', 'length', 'list',
+        'list->string', 'list->vector', 'list-ref', 'list-tail', 'list?',
+        'load', 'log', 'magnitude', 'make-polar', 'make-rectangular',
+        'make-string', 'make-vector', 'map', 'max', 'member', 'memq', 'memv',
+        'min', 'modulo', 'negative?', 'newline', 'not', 'null-environment',
+        'null?', 'number->string', 'number?', 'numerator', 'odd?',
+        'open-input-file', 'open-output-file', 'output-port?', 'pair?',
+        'peek-char', 'port?', 'positive?', 'procedure?', 'quotient',
+        'rational?', 'rationalize', 'read', 'read-char', 'real-part', 'real?',
+        'remainder', 'reverse', 'round', 'scheme-report-environment',
+        'set-car!', 'set-cdr!', 'sin', 'sqrt', 'string', 'string->list',
+        'string->number', 'string->symbol', 'string-append', 'string-ci<=?',
+        'string-ci<?', 'string-ci=?', 'string-ci>=?', 'string-ci>?',
+        'string-copy', 'string-fill!', 'string-length', 'string-ref',
+        'string-set!', 'string<=?', 'string<?', 'string=?', 'string>=?',
+        'string>?', 'string?', 'substring', 'symbol->string', 'symbol?',
+        'tan', 'transcript-off', 'transcript-on', 'truncate', 'values',
+        'vector', 'vector->list', 'vector-fill!', 'vector-length',
+        'vector-ref', 'vector-set!', 'vector?', 'with-input-from-file',
+        'with-output-to-file', 'write', 'write-char', 'zero?'
+    ]
+
+    # valid names for identifiers
+    # well, names can only not consist fully of numbers
+    # but this should be good enough for now
+    valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~|-]+'
+
+    tokens = {
+        'root' : [
+            # the comments - always starting with semicolon
+            # and going to the end of the line
+            (r';.*$', Comment.Single),
+
+            # whitespaces - usually not relevant
+            (r'\s+', Text),
+
+            # numbers
+            (r'-?\d+\.\d+', Number.Float),
+            (r'-?\d+', Number.Integer),
+            # support for uncommon kinds of numbers -
+            # have to figure out what the characters mean
+            #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
+
+            # strings, symbols and characters
+            (r'"(\\\\|\\"|[^"])*"', String),
+            (r"'" + valid_name, String.Symbol),
+            (r"#\\([()/'\".'_!ยง$%& ?=+-]{1}|[a-zA-Z0-9]+)", String.Char),
+
+            # constants
+            (r'(#t|#f)', Name.Constant),
+
+            # special operators
+            (r"('|#|`|,@|,|\.)", Operator),
+
+            # highlight the keywords
+            ('(%s)' % '|'.join([
+                re.escape(entry) + ' ' for entry in keywords]),
+                Keyword
+            ),
+
+            # first variable in a quoted string like
+            # '(this is syntactic sugar)
+            (r"(?<='\()" + valid_name, Name.Variable),
+            (r"(?<=#\()" + valid_name, Name.Variable),
+
+            # highlight the builtins
+            ("(?<=\()(%s)" % '|'.join([
+                re.escape(entry) + ' ' for entry in builtins]),
+                Name.Builtin
+            ),
+
+            # the remaining functions
+            (r'(?<=\()' + valid_name, Name.Function),
+            # find the remaining variables
+            (valid_name, Name.Variable),
+
+            # the famous parentheses!
+            (r'(\(|\))', Punctuation),
+        ],
+    }
+
+
+class CommonLispLexer(RegexLexer):
+    """
+    A Common Lisp lexer.
+
+    *New in Pygments 0.9.*
+    """
+    name = 'Common Lisp'
+    aliases = ['common-lisp', 'cl']
+    filenames = ['*.cl', '*.lisp', '*.el']  # use for Elisp too
+    mimetypes = ['text/x-common-lisp']
+
+    flags = re.IGNORECASE | re.MULTILINE
+
+    ### couple of useful regexes
+
+    # characters that are not macro-characters and can be used to begin a symbol
+    nonmacro = r'\\.|[a-zA-Z0-9!$%&*+-/<=>?@\[\]^_{}~]'
+    constituent = nonmacro + '|[#.:]'
+    terminated = r'(?=[ "()\'\n,;`])' # whitespace or terminating macro characters
+
+    ### symbol token, reverse-engineered from hyperspec
+    # Take a deep breath...
+    symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent)
+
+    def __init__(self, **options):
+        from pygments.lexers._clbuiltins import BUILTIN_FUNCTIONS, \
+            SPECIAL_FORMS, MACROS, LAMBDA_LIST_KEYWORDS, DECLARATIONS, \
+            BUILTIN_TYPES, BUILTIN_CLASSES
+        self.builtin_function = BUILTIN_FUNCTIONS
+        self.special_forms = SPECIAL_FORMS
+        self.macros = MACROS
+        self.lambda_list_keywords = LAMBDA_LIST_KEYWORDS
+        self.declarations = DECLARATIONS
+        self.builtin_types = BUILTIN_TYPES
+        self.builtin_classes = BUILTIN_CLASSES
+        RegexLexer.__init__(self, **options)
+
+    def get_tokens_unprocessed(self, text):
+        stack = ['root']
+        for index, token, value in RegexLexer.get_tokens_unprocessed(self, text, stack):
+            if token is Name.Variable:
+                if value in self.builtin_function:
+                    yield index, Name.Builtin, value
+                    continue
+                if value in self.special_forms:
+                    yield index, Keyword, value
+                    continue
+                if value in self.macros:
+                    yield index, Name.Builtin, value
+                    continue
+                if value in self.lambda_list_keywords:
+                    yield index, Keyword, value
+                    continue
+                if value in self.declarations:
+                    yield index, Keyword, value
+                    continue
+                if value in self.builtin_types:
+                    yield index, Keyword.Type, value
+                    continue
+                if value in self.builtin_classes:
+                    yield index, Name.Class, value
+                    continue
+            yield index, token, value
+
+    tokens = {
+        'root' : [
+            ('', Text, 'body'),
+        ],
+        'multiline-comment' : [
+            (r'#\|', Comment.Multiline, '#push'), # (cf. Hyperspec 2.4.8.19)
+            (r'\|#', Comment.Multiline, '#pop'),
+            (r'[^|#]+', Comment.Multiline),
+            (r'[|#]', Comment.Multiline),
+        ],
+        'commented-form' : [
+            (r'\(', Comment.Preproc, '#push'),
+            (r'\)', Comment.Preproc, '#pop'),
+            (r'[^()]+', Comment.Preproc),
+        ],
+        'body' : [
+            # whitespace
+            (r'\s+', Text),
+
+            # single-line comment
+            (r';.*$', Comment.Single),
+
+            # multi-line comment
+            (r'#\|', Comment.Multiline, 'multiline-comment'),
+
+            # encoding comment (?)
+            (r'#\d*Y.*$', Comment.Special),
+
+            # strings and characters
+            (r'"(\\.|[^"])*"', String),
+            # quoting
+            (r":" + symbol, String.Symbol),
+            (r"'" + symbol, String.Symbol),
+            (r"'", Operator),
+            (r"`", Operator),
+
+            # decimal numbers
+            (r'[-+]?\d+\.?' + terminated, Number.Integer),
+            (r'[-+]?\d+/\d+' + terminated, Number),
+            (r'[-+]?(\d*\.\d+([defls][-+]?\d+)?|\d+(\.\d*)?[defls][-+]?\d+)' \
+                + terminated, Number.Float),
+
+            # sharpsign strings and characters
+            (r"#\\." + terminated, String.Char),
+            (r"#\\" + symbol, String.Char),
+
+            # vector
+            (r'#\(', Operator, 'body'),
+
+            # bitstring
+            (r'#\d*\*[01]*', Literal.Other),
+
+            # uninterned symbol
+            (r'#:' + symbol, String.Symbol),
+
+            # read-time and load-time evaluation
+            (r'#[.,]', Operator),
+
+            # function shorthand
+            (r'#\'', Name.Function),
+
+            # binary rational
+            (r'#[bB][+-]?[01]+(/[01]+)?', Number),
+
+            # octal rational
+            (r'#[oO][+-]?[0-7]+(/[0-7]+)?', Number.Oct),
+
+            # hex rational
+            (r'#[xX][+-]?[0-9a-fA-F]+(/[0-9a-fA-F]+)?', Number.Hex),
+
+            # radix rational
+            (r'#\d+[rR][+-]?[0-9a-zA-Z]+(/[0-9a-zA-Z]+)?', Number),
+
+            # complex
+            (r'(#[cC])(\()', bygroups(Number, Punctuation), 'body'),
+
+            # array
+            (r'(#\d+[aA])(\()', bygroups(Literal.Other, Punctuation), 'body'),
+
+            # structure
+            (r'(#[sS])(\()', bygroups(Literal.Other, Punctuation), 'body'),
+
+            # path
+            (r'#[pP]?"(\\.|[^"])*"', Literal.Other),
+
+            # reference
+            (r'#\d+=', Operator),
+            (r'#\d+#', Operator),
+
+            # read-time comment
+            (r'#+nil' + terminated + '\s*\(', Comment.Preproc, 'commented-form'),
+
+            # read-time conditional
+            (r'#[+-]', Operator),
+
+            # special operators that should have been parsed already
+            (r'(,@|,|\.)', Operator),
+
+            # special constants
+            (r'(t|nil)' + terminated, Name.Constant),
+
+            # functions and variables
+            (r'\*' + symbol + '\*', Name.Variable.Global),
+            (symbol, Name.Variable),
+
+            # parentheses
+            (r'\(', Punctuation, 'body'),
+            (r'\)', Punctuation, '#pop'),
+        ],
+    }
+
+
+class HaskellLexer(RegexLexer):
+    """
+    A Haskell lexer based on the lexemes defined in the Haskell 98 Report.
+
+    *New in Pygments 0.8.*
+    """
+    name = 'Haskell'
+    aliases = ['haskell', 'hs']
+    filenames = ['*.hs']
+    mimetypes = ['text/x-haskell']
+
+    reserved = ['case','class','data','default','deriving','do','else',
+                'if','in','infix[lr]?','instance',
+                'let','newtype','of','then','type','where','_']
+    ascii = ['NUL','SOH','[SE]TX','EOT','ENQ','ACK',
+             'BEL','BS','HT','LF','VT','FF','CR','S[OI]','DLE',
+             'DC[1-4]','NAK','SYN','ETB','CAN',
+             'EM','SUB','ESC','[FGRU]S','SP','DEL']
+
+    tokens = {
+        'root': [
+            # Whitespace:
+            (r'\s+', Text),
+            #(r'--\s*|.*$', Comment.Doc),
+            (r'--(?![!#$%&*+./<=>?@\^|_~]).*?$', Comment.Single),
+            (r'{-', Comment.Multiline, 'comment'),
+            # Lexemes:
+            #  Identifiers
+            (r'\bimport\b', Keyword.Reserved, 'import'),
+            (r'\bmodule\b', Keyword.Reserved, 'module'),
+            (r'\berror\b', Name.Exception),
+            (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
+            (r'^[_a-z][\w\']*', Name.Function),
+            (r'[_a-z][\w\']*', Name),
+            (r'[A-Z][\w\']*', Keyword.Type),
+            #  Operators
+            (r'\\(?![:!#$%&*+.\\/<=>?@^|~-]+)', Name.Function), # lambda operator
+            (r'(<-|::|->|=>|=)(?![:!#$%&*+.\\/<=>?@^|~-]+)', Operator.Word), # specials
+            (r':[:!#$%&*+.\\/<=>?@^|~-]*', Keyword.Type), # Constructor operators
+            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator), # Other operators
+            #  Numbers
+            (r'\d+[eE][+-]?\d+', Number.Float),
+            (r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
+            (r'0[oO][0-7]+', Number.Oct),
+            (r'0[xX][\da-fA-F]+', Number.Hex),
+            (r'\d+', Number.Integer),
+            #  Character/String Literals
+            (r"'", String.Char, 'character'),
+            (r'"', String, 'string'),
+            #  Special
+            (r'\[\]', Keyword.Type),
+            (r'\(\)', Name.Builtin),
+            (r'[][(),;`{}]', Punctuation),
+        ],
+        'import': [
+            # Import statements
+            (r'\s+', Text),
+            # after "funclist" state
+            (r'\)', Punctuation, '#pop'),
+            (r'qualified\b', Keyword),
+            # import X as Y
+            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(as)(\s+)([A-Z][a-zA-Z0-9_.]*)',
+             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+            # import X hiding (functions)
+            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(hiding)(\s+)(\()',
+             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+            # import X (functions)
+            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
+             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+            # import X
+            (r'[a-zA-Z0-9_.]+', Name.Namespace, '#pop'),
+        ],
+        'module': [
+            (r'\s+', Text),
+            (r'([A-Z][a-zA-Z0-9_.]*)(\s+)(\()',
+             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+            (r'[A-Z][a-zA-Z0-9_.]*', Name.Namespace, '#pop'),
+        ],
+        'funclist': [
+            (r'\s+', Text),
+            (r'[A-Z][a-zA-Z0-9_]*', Keyword.Type),
+            (r'[_a-z][\w\']+', Name.Function),
+            (r'--.*$', Comment.Single),
+            (r'{-', Comment.Multiline, 'comment'),
+            (r',', Punctuation),
+            (r'[:!#$%&*+.\\/<=>?@^|~-]+', Operator),
+            # (HACK, but it makes sense to push two instances, believe me)
+            (r'\(', Punctuation, ('funclist', 'funclist')),
+            (r'\)', Punctuation, '#pop:2'),
+        ],
+        'comment': [
+            # Multiline Comments
+            (r'[^-{}]+', Comment.Multiline),
+            (r'{-', Comment.Multiline, '#push'),
+            (r'-}', Comment.Multiline, '#pop'),
+            (r'[-{}]', Comment.Multiline),
+        ],
+        'character': [
+            # Allows multi-chars, incorrectly.
+            (r"[^\\']", String.Char),
+            (r"\\", String.Escape, 'escape'),
+            ("'", String.Char, '#pop'),
+        ],
+        'string': [
+            (r'[^\\"]+', String),
+            (r"\\", String.Escape, 'escape'),
+            ('"', String, '#pop'),
+        ],
+        'escape': [
+            (r'[abfnrtv"\'&\\]', String.Escape, '#pop'),
+            (r'\^[][A-Z@\^_]', String.Escape, '#pop'),
+            ('|'.join(ascii), String.Escape, '#pop'),
+            (r'o[0-7]+', String.Escape, '#pop'),
+            (r'x[\da-fA-F]+', String.Escape, '#pop'),
+            (r'\d+', String.Escape, '#pop'),
+            (r'\n\s+\\', String.Escape, '#pop'),
+        ],
+    }
+
+
+line_re = re.compile('.*?\n')
+bird_re = re.compile(r'(>[ \t]*)(.*\n)')
+
+class LiterateHaskellLexer(Lexer):
+    """
+    For Literate Haskell (Bird-style or LaTeX) source.
+
+    Additional options accepted:
+
+    `litstyle`
+        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
+        is autodetected: if the first non-whitespace character in the source
+        is a backslash or percent character, LaTeX is assumed, else Bird.
+
+    *New in Pygments 0.9.*
+    """
+    name = 'Literate Haskell'
+    aliases = ['lhs', 'literate-haskell']
+    filenames = ['*.lhs']
+    mimetypes = ['text/x-literate-haskell']
+
+    def get_tokens_unprocessed(self, text):
+        hslexer = HaskellLexer(**self.options)
+
+        style = self.options.get('litstyle')
+        if style is None:
+            style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird'
+
+        code = ''
+        insertions = []
+        if style == 'bird':
+            # bird-style
+            for match in line_re.finditer(text):
+                line = match.group()
+                m = bird_re.match(line)
+                if m:
+                    insertions.append((len(code),
+                                       [(0, Comment.Special, m.group(1))]))
+                    code += m.group(2)
+                else:
+                    insertions.append((len(code), [(0, Text, line)]))
+        else:
+            # latex-style
+            from pygments.lexers.text import TexLexer
+            lxlexer = TexLexer(**self.options)
+
+            codelines = 0
+            latex = ''
+            for match in line_re.finditer(text):
+                line = match.group()
+                if codelines:
+                    if line.lstrip().startswith('\\end{code}'):
+                        codelines = 0
+                        latex += line
+                    else:
+                        code += line
+                elif line.lstrip().startswith('\\begin{code}'):
+                    codelines = 1
+                    latex += line
+                    insertions.append((len(code),
+                                       list(lxlexer.get_tokens_unprocessed(latex))))
+                    latex = ''
+                else:
+                    latex += line
+            insertions.append((len(code),
+                               list(lxlexer.get_tokens_unprocessed(latex))))
+        for item in do_insertions(insertions, hslexer.get_tokens_unprocessed(code)):
+            yield item
+
+
+class OcamlLexer(RegexLexer):
+    """
+    For the OCaml language.
+
+    *New in Pygments 0.7.*
+    """
+
+    name = 'OCaml'
+    aliases = ['ocaml']
+    filenames = ['*.ml', '*.mli', '*.mll', '*.mly']
+    mimetypes = ['text/x-ocaml']
+
+    keywords = [
+      'as', 'assert', 'begin', 'class', 'constraint', 'do', 'done',
+      'downto', 'else', 'end', 'exception', 'external', 'false',
+      'for', 'fun', 'function', 'functor', 'if', 'in', 'include',
+      'inherit', 'initializer', 'lazy', 'let', 'match', 'method',
+      'module', 'mutable', 'new', 'object', 'of', 'open', 'private',
+      'raise', 'rec', 'sig', 'struct', 'then', 'to', 'true', 'try',
+      'type', 'val', 'virtual', 'when', 'while', 'with'
+    ]
+    keyopts = [
+      '!=','#','&','&&','\(','\)','\*','\+',',','-',
+      '-\.','->','\.','\.\.',':','::',':=',':>',';',';;','<',
+      '<-','=','>','>]','>}','\?','\?\?','\[','\[<','\[>','\[\|',
+      ']','_','`','{','{<','\|','\|]','}','~'
+    ]
+
+    operators = r'[!$%&*+\./:<=>?@^|~-]'
+    word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
+    prefix_syms = r'[!?~]'
+    infix_syms = r'[=<>@^|&+\*/$%-]'
+    primitives = ['unit', 'int', 'float', 'bool', 'string', 'char', 'list', 'array']
+
+    tokens = {
+        'escape-sequence': [
+            (r'\\[\"\'ntbr]', String.Escape),
+            (r'\\[0-9]{3}', String.Escape),
+            (r'\\x[0-9a-fA-F]{2}', String.Escape),
+        ],
+        'root': [
+            (r'\s+', Text),
+            (r'false|true|\(\)|\[\]', Name.Builtin.Pseudo),
+            (r'\b([A-Z][A-Za-z0-9_\']*)(?=\s*\.)',
+             Name.Namespace, 'dotted'),
+            (r'\b([A-Z][A-Za-z0-9_\']*)', Name.Class),
+            (r'\(\*', Comment, 'comment'),
+            (r'\b(%s)\b' % '|'.join(keywords), Keyword),
+            (r'(%s)' % '|'.join(keyopts), Operator),
+            (r'(%s|%s)?%s' % (infix_syms, prefix_syms, operators), Operator),
+            (r'\b(%s)\b' % '|'.join(word_operators), Operator.Word),
+            (r'\b(%s)\b' % '|'.join(primitives), Keyword.Type),
+
+            (r"[^\W\d][\w']*", Name),
+
+            (r'\d[\d_]*', Number.Integer),
+            (r'0[xX][\da-fA-F][\da-fA-F_]*', Number.Hex),
+            (r'0[oO][0-7][0-7_]*', Number.Oct),
+            (r'0[bB][01][01_]*', Number.Binary),
+            (r'-?\d[\d_]*(.[\d_]*)?([eE][+\-]?\d[\d_]*)', Number.Float),
+
+            (r"'(?:(\\[\\\"'ntbr ])|(\\[0-9]{3})|(\\x[0-9a-fA-F]{2}))'",
+             String.Char),
+            (r"'.'", String.Char),
+            (r"'", Keyword), # a stray quote is another syntax element
+
+            (r'"', String.Double, 'string'),
+
+            (r'[~?][a-z][\w\']*:', Name.Variable),
+        ],
+        'comment': [
+            (r'[^(*)]+', Comment),
+            (r'\(\*', Comment, '#push'),
+            (r'\*\)', Comment, '#pop'),
+            (r'[(*)]', Comment),
+        ],
+        'string': [
+            (r'[^\\"]+', String.Double),
+            include('escape-sequence'),
+            (r'\\\n', String.Double),
+            (r'"', String.Double, '#pop'),
+        ],
+        'dotted': [
+            (r'\s+', Text),
+            (r'\.', Punctuation),
+            (r'[A-Z][A-Za-z0-9_\']*(?=\s*\.)', Name.Namespace),
+            (r'[A-Z][A-Za-z0-9_\']*', Name.Class, '#pop'),
+            (r'[a-z][a-z0-9_\']*', Name, '#pop'),
+        ],
+    }
+
+
+class ErlangLexer(RegexLexer):
+    """
+    For the Erlang functional programming language.
+
+    Blame Jeremy Thurgood (http://jerith.za.net/).
+
+    *New in Pygments 0.9.*
+    """
+
+    name = 'Erlang'
+    aliases = ['erlang']
+    filenames = ['*.erl', '*.hrl']
+    mimetypes = ['text/x-erlang']
+
+    keywords = [
+        'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
+        'let', 'of', 'query', 'receive', 'try', 'when',
+        ]
+
+    builtins = [ # See erlang(3) man page
+        'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
+        'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
+        'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
+        'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
+        'float', 'float_to_list', 'fun_info', 'fun_to_list',
+        'function_exported', 'garbage_collect', 'get', 'get_keys',
+        'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
+        'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
+        'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
+        'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
+        'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
+        'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
+        'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
+        'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
+        'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
+        'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
+        'pid_to_list', 'port_close', 'port_command', 'port_connect',
+        'port_control', 'port_call', 'port_info', 'port_to_list',
+        'process_display', 'process_flag', 'process_info', 'purge_module',
+        'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
+        'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
+        'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
+        'spawn_opt', 'split_binary', 'start_timer', 'statistics',
+        'suspend_process', 'system_flag', 'system_info', 'system_monitor',
+        'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
+        'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
+        'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
+        ]
+
+    operators = r'(\+|-|\*|/|<|>|=|==|/=|=:=|=/=|=<|>=|\+\+|--|<-|!)'
+    word_operators = [
+        'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
+        'div', 'not', 'or', 'orelse', 'rem', 'xor'
+        ]
+
+    atom_re = r"(?:[a-z][a-zA-Z0-9_]*|'[^\n']*[^\\]')"
+
+    variable_re = r'(?:[A-Z_][a-zA-Z0-9_]*)'
+
+    escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))'
+
+    macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
+
+    base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
+
+    tokens = {
+        'root': [
+            (r'\s+', Text),
+            (r'%.*\n', Comment),
+            ('(' + '|'.join(keywords) + r')\b', Keyword),
+            ('(' + '|'.join(builtins) + r')\b', Name.Builtin),
+            ('(' + '|'.join(word_operators) + r')\b', Operator.Word),
+            (r'^-', Punctuation, 'directive'),
+            (operators, Operator),
+            (r'"', String, 'string'),
+            (r'<<', Name.Label),
+            (r'>>', Name.Label),
+            (r'('+atom_re+')(:)', bygroups(Name.Namespace, Punctuation)),
+            (r'^('+atom_re+r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)),
+            (r'[+-]?'+base_re+r'#[0-9a-zA-Z]+', Number.Integer),
+            (r'[+-]?\d+', Number.Integer),
+            (r'[+-]?\d+.\d+', Number.Float),
+            (r'[][:_@\".{}()|;,]', Punctuation),
+            (variable_re, Name.Variable),
+            (atom_re, Name),
+            (r'\?'+macro_re, Name.Constant),
+            (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
+            (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
+            ],
+        'string': [
+            (escape_re, String.Escape),
+            (r'"', String, '#pop'),
+            (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol),
+            (r'[^"\\~]+', String),
+            (r'~', String),
+            ],
+        'directive': [
+            (r'(define)(\s*)(\()('+macro_re+r')',
+             bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
+            (r'(record)(\s*)(\()('+macro_re+r')',
+             bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
+            (atom_re, Name.Entity, '#pop'),
+            ],
+        }
+
+
+class ErlangShellLexer(Lexer):
+    """
+    Shell sessions in erl (for Erlang code).
+
+    *New in Pygments 1.1.*
+    """
+    name = 'Erlang erl session'
+    aliases = ['erl']
+    filenames = ['*.erl-sh']
+    mimetypes = ['text/x-erl-shellsession']
+
+    _prompt_re = re.compile(r'\d+>(?=\s|\Z)')
+
+    def get_tokens_unprocessed(self, text):
+        erlexer = ErlangLexer(**self.options)
+
+        curcode = ''
+        insertions = []
+        for match in line_re.finditer(text):
+            line = match.group()
+            m = self._prompt_re.match(line)
+            if m is not None:
+                end = m.end()
+                insertions.append((len(curcode),
+                                   [(0, Generic.Prompt, line[:end])]))
+                curcode += line[end:]
+            else:
+                if curcode:
+                    for item in do_insertions(insertions,
+                                    erlexer.get_tokens_unprocessed(curcode)):
+                        yield item
+                    curcode = ''
+                    insertions = []
+                if line.startswith('*'):
+                    yield match.start(), Generic.Traceback, line
+                else:
+                    yield match.start(), Generic.Output, line
+        if curcode:
+            for item in do_insertions(insertions,
+                                      erlexer.get_tokens_unprocessed(curcode)):
+                yield item
+

eric ide

mercurial