ThirdParty/Pygments/pygments/lexers/julia.py

changeset 5713
6762afd9f963
parent 5072
aab59042fefb
child 6651
e8f3b5568b21
--- a/ThirdParty/Pygments/pygments/lexers/julia.py	Sun Apr 23 16:40:31 2017 +0200
+++ b/ThirdParty/Pygments/pygments/lexers/julia.py	Tue Apr 25 18:36:38 2017 +0200
@@ -5,19 +5,24 @@
 
     Lexers for the Julia language.
 
-    :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS.
+    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
     :license: BSD, see LICENSE for details.
 """
 
 import re
 
-from pygments.lexer import Lexer, RegexLexer, bygroups, combined, do_insertions
+from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
+    words, include
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
     Number, Punctuation, Generic
 from pygments.util import shebang_matches, unirange
 
 __all__ = ['JuliaLexer', 'JuliaConsoleLexer']
 
+allowed_variable = (
+    u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' %
+    ((unirange(0x10000, 0x10ffff),) * 2))
+
 
 class JuliaLexer(RegexLexer):
     """
@@ -25,6 +30,7 @@
 
     .. versionadded:: 1.6
     """
+
     name = 'Julia'
     aliases = ['julia', 'jl']
     filenames = ['*.jl']
@@ -32,51 +38,151 @@
 
     flags = re.MULTILINE | re.UNICODE
 
-    builtins = [
-        'exit', 'whos', 'edit', 'load', 'is', 'isa', 'isequal', 'typeof', 'tuple',
-        'ntuple', 'uid', 'hash', 'finalizer', 'convert', 'promote', 'subtype',
-        'typemin', 'typemax', 'realmin', 'realmax', 'sizeof', 'eps', 'promote_type',
-        'method_exists', 'applicable', 'invoke', 'dlopen', 'dlsym', 'system',
-        'error', 'throw', 'assert', 'new', 'Inf', 'Nan', 'pi', 'im',
-    ]
-
     tokens = {
         'root': [
             (r'\n', Text),
             (r'[^\S\n]+', Text),
             (r'#=', Comment.Multiline, "blockcomment"),
             (r'#.*$', Comment),
-            (r'[]{}:(),;[@]', Punctuation),
-            (r'\\\n', Text),
-            (r'\\', Text),
+            (r'[\[\]{}(),;]', Punctuation),
 
             # keywords
-            (r'(begin|while|for|in|return|break|continue|'
-             r'macro|quote|let|if|elseif|else|try|catch|end|'
-             r'bitstype|ccall|do|using|module|import|export|'
-             r'importall|baremodule|immutable)\b', Keyword),
+            (r'in\b', Keyword.Pseudo),
+            (r'(true|false)\b', Keyword.Constant),
             (r'(local|global|const)\b', Keyword.Declaration),
-            (r'(Bool|Int|Int8|Int16|Int32|Int64|Uint|Uint8|Uint16|Uint32|Uint64'
-             r'|Float32|Float64|Complex64|Complex128|Any|Nothing|None)\b',
+            (words([
+                'function', 'type', 'typealias', 'abstract', 'immutable',
+                'baremodule', 'begin', 'bitstype', 'break', 'catch', 'ccall',
+                'continue', 'do', 'else', 'elseif', 'end', 'export', 'finally',
+                'for', 'if', 'import', 'importall', 'let', 'macro', 'module',
+                'quote', 'return', 'try', 'using', 'while'],
+                suffix=r'\b'), Keyword),
+
+            # NOTE
+            # Patterns below work only for definition sites and thus hardly reliable.
+            #
+            # functions
+            # (r'(function)(\s+)(' + allowed_variable + ')',
+            #  bygroups(Keyword, Text, Name.Function)),
+            #
+            # types
+            # (r'(type|typealias|abstract|immutable)(\s+)(' + allowed_variable + ')',
+            #  bygroups(Keyword, Text, Name.Class)),
+
+            # type names
+            (words([
+                'ANY', 'ASCIIString', 'AbstractArray', 'AbstractChannel',
+                'AbstractFloat', 'AbstractMatrix', 'AbstractRNG',
+                'AbstractSparseArray', 'AbstractSparseMatrix',
+                'AbstractSparseVector', 'AbstractString', 'AbstractVecOrMat',
+                'AbstractVector', 'Any', 'ArgumentError', 'Array',
+                'AssertionError', 'Associative', 'Base64DecodePipe',
+                'Base64EncodePipe', 'Bidiagonal', 'BigFloat', 'BigInt',
+                'BitArray', 'BitMatrix', 'BitVector', 'Bool', 'BoundsError',
+                'Box', 'BufferStream', 'CapturedException', 'CartesianIndex',
+                'CartesianRange', 'Cchar', 'Cdouble', 'Cfloat', 'Channel',
+                'Char', 'Cint', 'Cintmax_t', 'Clong', 'Clonglong',
+                'ClusterManager', 'Cmd', 'Coff_t', 'Colon', 'Complex',
+                'Complex128', 'Complex32', 'Complex64', 'CompositeException',
+                'Condition', 'Cptrdiff_t', 'Cshort', 'Csize_t', 'Cssize_t',
+                'Cstring', 'Cuchar', 'Cuint', 'Cuintmax_t', 'Culong',
+                'Culonglong', 'Cushort', 'Cwchar_t', 'Cwstring', 'DataType',
+                'Date', 'DateTime', 'DenseArray', 'DenseMatrix',
+                'DenseVecOrMat', 'DenseVector', 'Diagonal', 'Dict',
+                'DimensionMismatch', 'Dims', 'DirectIndexString', 'Display',
+                'DivideError', 'DomainError', 'EOFError', 'EachLine', 'Enum',
+                'Enumerate', 'ErrorException', 'Exception', 'Expr',
+                'Factorization', 'FileMonitor', 'FileOffset', 'Filter',
+                'Float16', 'Float32', 'Float64', 'FloatRange', 'Function',
+                'GenSym', 'GlobalRef', 'GotoNode', 'HTML', 'Hermitian', 'IO',
+                'IOBuffer', 'IOStream', 'IPv4', 'IPv6', 'InexactError',
+                'InitError', 'Int', 'Int128', 'Int16', 'Int32', 'Int64', 'Int8',
+                'IntSet', 'Integer', 'InterruptException', 'IntrinsicFunction',
+                'InvalidStateException', 'Irrational', 'KeyError', 'LabelNode',
+                'LambdaStaticData', 'LinSpace', 'LineNumberNode', 'LoadError',
+                'LocalProcess', 'LowerTriangular', 'MIME', 'Matrix',
+                'MersenneTwister', 'Method', 'MethodError', 'MethodTable',
+                'Module', 'NTuple', 'NewvarNode', 'NullException', 'Nullable',
+                'Number', 'ObjectIdDict', 'OrdinalRange', 'OutOfMemoryError',
+                'OverflowError', 'Pair', 'ParseError', 'PartialQuickSort',
+                'Pipe', 'PollingFileWatcher', 'ProcessExitedException',
+                'ProcessGroup', 'Ptr', 'QuoteNode', 'RandomDevice', 'Range',
+                'Rational', 'RawFD', 'ReadOnlyMemoryError', 'Real',
+                'ReentrantLock', 'Ref', 'Regex', 'RegexMatch',
+                'RemoteException', 'RemoteRef', 'RepString', 'RevString',
+                'RopeString', 'RoundingMode', 'SegmentationFault',
+                'SerializationState', 'Set', 'SharedArray', 'SharedMatrix',
+                'SharedVector', 'Signed', 'SimpleVector', 'SparseMatrixCSC',
+                'StackOverflowError', 'StatStruct', 'StepRange', 'StridedArray',
+                'StridedMatrix', 'StridedVecOrMat', 'StridedVector', 'SubArray',
+                'SubString', 'SymTridiagonal', 'Symbol', 'SymbolNode',
+                'Symmetric', 'SystemError', 'TCPSocket', 'Task', 'Text',
+                'TextDisplay', 'Timer', 'TopNode', 'Tridiagonal', 'Tuple',
+                'Type', 'TypeConstructor', 'TypeError', 'TypeName', 'TypeVar',
+                'UDPSocket', 'UInt', 'UInt128', 'UInt16', 'UInt32', 'UInt64',
+                'UInt8', 'UTF16String', 'UTF32String', 'UTF8String',
+                'UndefRefError', 'UndefVarError', 'UnicodeError', 'UniformScaling',
+                'Union', 'UnitRange', 'Unsigned', 'UpperTriangular', 'Val',
+                'Vararg', 'VecOrMat', 'Vector', 'VersionNumber', 'Void', 'WString',
+                'WeakKeyDict', 'WeakRef', 'WorkerConfig', 'Zip'], suffix=r'\b'),
                 Keyword.Type),
 
-            # functions
-            (r'(function)((?:\s|\\\s)+)',
-                bygroups(Keyword, Name.Function), 'funcname'),
-
-            # types
-            (r'(type|typealias|abstract|immutable)((?:\s|\\\s)+)',
-                bygroups(Keyword, Name.Class), 'typename'),
+            # builtins
+            (words([
+                u'ARGS', u'CPU_CORES', u'C_NULL', u'DevNull', u'ENDIAN_BOM',
+                u'ENV', u'I', u'Inf', u'Inf16', u'Inf32', u'Inf64',
+                u'InsertionSort', u'JULIA_HOME', u'LOAD_PATH', u'MergeSort',
+                u'NaN', u'NaN16', u'NaN32', u'NaN64', u'OS_NAME',
+                u'QuickSort', u'RoundDown', u'RoundFromZero', u'RoundNearest',
+                u'RoundNearestTiesAway', u'RoundNearestTiesUp',
+                u'RoundToZero', u'RoundUp', u'STDERR', u'STDIN', u'STDOUT',
+                u'VERSION', u'WORD_SIZE', u'catalan', u'e', u'eu',
+                u'eulergamma', u'golden', u'im', u'nothing', u'pi', u'γ',
+                u'π', u'φ'],
+                suffix=r'\b'), Name.Builtin),
 
             # operators
-            (r'==|!=|<=|>=|->|&&|\|\||::|<:|[-~+/*%=<>&^|.?!$]', Operator),
-            (r'\.\*|\.\^|\.\\|\.\/|\\', Operator),
-
-            # builtins
-            ('(' + '|'.join(builtins) + r')\b',  Name.Builtin),
-
-            # backticks
-            (r'`(?s).*?`', String.Backtick),
+            # see: https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
+            (words([
+                # prec-assignment
+                u'=', u':=', u'+=', u'-=', u'*=', u'/=', u'//=', u'.//=', u'.*=', u'./=',
+                u'\=', u'.\=', u'^=', u'.^=', u'÷=', u'.÷=', u'%=', u'.%=', u'|=', u'&=',
+                u'$=', u'=>', u'<<=', u'>>=', u'>>>=', u'~', u'.+=', u'.-=',
+                # prec-conditional
+                u'?',
+                # prec-arrow
+                u'--', u'-->',
+                # prec-lazy-or
+                u'||',
+                # prec-lazy-and
+                u'&&',
+                # prec-comparison
+                u'>', u'<', u'>=', u'≥', u'<=', u'≤', u'==', u'===', u'≡', u'!=', u'≠',
+                u'!==', u'≢', u'.>', u'.<', u'.>=', u'.≥', u'.<=', u'.≤', u'.==', u'.!=',
+                u'.≠', u'.=', u'.!', u'<:', u'>:', u'∈', u'∉', u'∋', u'∌', u'⊆',
+                u'⊈', u'⊂',
+                u'⊄', u'⊊',
+                # prec-pipe
+                u'|>', u'<|',
+                # prec-colon
+                u':',
+                # prec-plus
+                u'+', u'-', u'.+', u'.-', u'|', u'∪', u'$',
+                # prec-bitshift
+                u'<<', u'>>', u'>>>', u'.<<', u'.>>', u'.>>>',
+                # prec-times
+                u'*', u'/', u'./', u'÷', u'.÷', u'%', u'⋅', u'.%', u'.*', u'\\', u'.\\', u'&', u'∩',
+                # prec-rational
+                u'//', u'.//',
+                # prec-power
+                u'^', u'.^',
+                # prec-decl
+                u'::',
+                # prec-dot
+                u'.',
+                # unary op
+                u'+', u'-', u'!', u'~', u'√', u'∛', u'∜'
+            ]), Operator),
 
             # chars
             (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
@@ -86,13 +192,19 @@
             (r'(?<=[.\w)\]])\'+', Operator),
 
             # strings
-            (r'(?:[IL])"', String, 'string'),
-            (r'[E]?"', String, combined('stringescape', 'string')),
+            (r'"""', String, 'tqstring'),
+            (r'"', String, 'string'),
+
+            # regular expressions
+            (r'r"""', String.Regex, 'tqregex'),
+            (r'r"', String.Regex, 'regex'),
+
+            # backticks
+            (r'`', String.Backtick, 'command'),
 
             # names
-            (r'@[\w.]+', Name.Decorator),
-            (u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' %
-             ((unirange(0x10000, 0x10ffff),)*2), Name),
+            (allowed_variable, Name),
+            (r'@' + allowed_variable, Name.Decorator),
 
             # numbers
             (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float),
@@ -109,45 +221,59 @@
             (r'\d+', Number.Integer)
         ],
 
-        'funcname': [
-            ('[a-zA-Z_]\w*', Name.Function, '#pop'),
-            ('\([^\s\w{]{1,2}\)', Operator, '#pop'),
-            ('[^\s\w{]{1,2}', Operator, '#pop'),
-        ],
-
-        'typename': [
-            ('[a-zA-Z_]\w*', Name.Class, '#pop')
-        ],
-
-        'stringescape': [
-            (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
-             r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
-        ],
         "blockcomment": [
             (r'[^=#]', Comment.Multiline),
             (r'#=', Comment.Multiline, '#push'),
             (r'=#', Comment.Multiline, '#pop'),
             (r'[=#]', Comment.Multiline),
         ],
+
         'string': [
             (r'"', String, '#pop'),
-            (r'\\\\|\\"|\\\n', String.Escape),  # included here for raw strings
+            # FIXME: This escape pattern is not perfect.
+            (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
             # Interpolation is defined as "$" followed by the shortest full
             # expression, which is something we can't parse.
             # Include the most common cases here: $word, and $(paren'd expr).
-            (r'\$[a-zA-Z_]+', String.Interpol),
-            (r'\$\(', String.Interpol, 'in-intp'),
+            (r'\$' + allowed_variable, String.Interpol),
+            # (r'\$[a-zA-Z_]+', String.Interpol),
+            (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
             # @printf and @sprintf formats
-            (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[diouxXeEfFgGcrs%]',
+            (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
              String.Interpol),
-            (r'[^$%"\\]+', String),
-            # unhandled special signs
-            (r'[$%"\\]', String),
+            (r'.|\s', String),
+        ],
+
+        'tqstring': [
+            (r'"""', String, '#pop'),
+            (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
+            (r'\$' + allowed_variable, String.Interpol),
+            (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
+            (r'.|\s', String),
+        ],
+
+        'regex': [
+            (r'"', String.Regex, '#pop'),
+            (r'\\"', String.Regex),
+            (r'.|\s', String.Regex),
         ],
+
+        'tqregex': [
+            (r'"""', String.Regex, '#pop'),
+            (r'.|\s', String.Regex),
+        ],
+
+        'command': [
+            (r'`', String.Backtick, '#pop'),
+            (r'\$' + allowed_variable, String.Interpol),
+            (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
+            (r'.|\s', String.Backtick)
+        ],
+
         'in-intp': [
-            (r'[^()]+', String.Interpol),
-            (r'\(', String.Interpol, '#push'),
-            (r'\)', String.Interpol, '#pop'),
+            (r'\(', Punctuation, '#push'),
+            (r'\)', Punctuation, '#pop'),
+            include('root'),
         ]
     }
 
@@ -155,9 +281,6 @@
         return shebang_matches(text, r'julia')
 
 
-line_re  = re.compile('.*?\n')
-
-
 class JuliaConsoleLexer(Lexer):
     """
     For Julia console sessions. Modeled after MatlabSessionLexer.
@@ -169,27 +292,26 @@
 
     def get_tokens_unprocessed(self, text):
         jllexer = JuliaLexer(**self.options)
-
+        start = 0
         curcode = ''
         insertions = []
-
-        for match in line_re.finditer(text):
-            line = match.group()
-
-            if line.startswith('julia>'):
-                insertions.append((len(curcode),
-                                   [(0, Generic.Prompt, line[:6])]))
-                curcode += line[6:]
+        output = False
+        error = False
 
-            elif line.startswith('      '):
-
-                idx = len(curcode)
-
-                # without is showing error on same line as before...?
-                line = "\n" + line
-                token = (0, Generic.Traceback, line)
-                insertions.append((idx, [token]))
-
+        for line in text.splitlines(True):
+            if line.startswith('julia>'):
+                insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
+                curcode += line[6:]
+                output = False
+                error = False
+            elif line.startswith('help?>') or line.startswith('shell>'):
+                yield start, Generic.Prompt, line[:6]
+                yield start + 6, Text, line[6:]
+                output = False
+                error = False
+            elif line.startswith('      ') and not output:
+                insertions.append((len(curcode), [(0, Text, line[:6])]))
+                curcode += line[6:]
             else:
                 if curcode:
                     for item in do_insertions(
@@ -197,10 +319,15 @@
                         yield item
                     curcode = ''
                     insertions = []
+                if line.startswith('ERROR: ') or error:
+                    yield start, Generic.Error, line
+                    error = True
+                else:
+                    yield start, Generic.Output, line
+                output = True
+            start += len(line)
 
-                yield match.start(), Generic.Output, line
-
-        if curcode:  # or item:
+        if curcode:
             for item in do_insertions(
                     insertions, jllexer.get_tokens_unprocessed(curcode)):
                 yield item

eric ide

mercurial