ThirdParty/Pygments/pygments/lexers/python.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.python
4 ~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Python and related languages.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \
15 default, words, combined, do_insertions
16 from pygments.util import get_bool_opt, shebang_matches
17 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
18 Number, Punctuation, Generic, Other, Error
19 from pygments import unistring as uni
20
21 __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
22 'Python3Lexer', 'Python3TracebackLexer', 'CythonLexer',
23 'DgLexer', 'NumPyLexer']
24
25 line_re = re.compile('.*?\n')
26
27
28 class PythonLexer(RegexLexer):
29 """
30 For `Python <http://www.python.org>`_ source code.
31 """
32
33 name = 'Python'
34 aliases = ['python', 'py', 'sage']
35 filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage']
36 mimetypes = ['text/x-python', 'application/x-python']
37
38 tokens = {
39 'root': [
40 (r'\n', Text),
41 (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
42 (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
43 (r'[^\S\n]+', Text),
44 (r'#.*$', Comment),
45 (r'[]{}:(),;[]', Punctuation),
46 (r'\\\n', Text),
47 (r'\\', Text),
48 (r'(in|is|and|or|not)\b', Operator.Word),
49 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
50 include('keywords'),
51 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
52 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
53 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
54 'fromimport'),
55 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
56 'import'),
57 include('builtins'),
58 include('backtick'),
59 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
60 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
61 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
62 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
63 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
64 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
65 ('[uU]?"', String, combined('stringescape', 'dqs')),
66 ("[uU]?'", String, combined('stringescape', 'sqs')),
67 include('name'),
68 include('numbers'),
69 ],
70 'keywords': [
71 (words((
72 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
73 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
74 'print', 'raise', 'return', 'try', 'while', 'yield',
75 'yield from', 'as', 'with'), suffix=r'\b'),
76 Keyword),
77 ],
78 'builtins': [
79 (words((
80 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
81 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
82 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
83 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
84 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
85 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
86 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
87 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
88 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
89 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
90 'unichr', 'unicode', 'vars', 'xrange', 'zip'),
91 prefix=r'(?<!\.)', suffix=r'\b'),
92 Name.Builtin),
93 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True'
94 r')\b', Name.Builtin.Pseudo),
95 (words((
96 'ArithmeticError', 'AssertionError', 'AttributeError',
97 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
98 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
99 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
100 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
101 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError',
102 'OSError', 'OverflowError', 'OverflowWarning', 'PendingDeprecationWarning',
103 'ReferenceError', 'RuntimeError', 'RuntimeWarning', 'StandardError',
104 'StopIteration', 'SyntaxError', 'SyntaxWarning', 'SystemError',
105 'SystemExit', 'TabError', 'TypeError', 'UnboundLocalError',
106 'UnicodeDecodeError', 'UnicodeEncodeError', 'UnicodeError',
107 'UnicodeTranslateError', 'UnicodeWarning', 'UserWarning',
108 'ValueError', 'VMSError', 'Warning', 'WindowsError',
109 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
110 Name.Exception),
111 ],
112 'numbers': [
113 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
114 (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
115 (r'0[0-7]+j?', Number.Oct),
116 (r'0[bB][01]+', Number.Bin),
117 (r'0[xX][a-fA-F0-9]+', Number.Hex),
118 (r'\d+L', Number.Integer.Long),
119 (r'\d+j?', Number.Integer)
120 ],
121 'backtick': [
122 ('`.*?`', String.Backtick),
123 ],
124 'name': [
125 (r'@[\w.]+', Name.Decorator),
126 ('[a-zA-Z_]\w*', Name),
127 ],
128 'funcname': [
129 ('[a-zA-Z_]\w*', Name.Function, '#pop')
130 ],
131 'classname': [
132 ('[a-zA-Z_]\w*', Name.Class, '#pop')
133 ],
134 'import': [
135 (r'(?:[ \t]|\\\n)+', Text),
136 (r'as\b', Keyword.Namespace),
137 (r',', Operator),
138 (r'[a-zA-Z_][\w.]*', Name.Namespace),
139 default('#pop') # all else: go back
140 ],
141 'fromimport': [
142 (r'(?:[ \t]|\\\n)+', Text),
143 (r'import\b', Keyword.Namespace, '#pop'),
144 # if None occurs here, it's "raise x from None", since None can
145 # never be a module name
146 (r'None\b', Name.Builtin.Pseudo, '#pop'),
147 # sadly, in "raise x from y" y will be highlighted as namespace too
148 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
149 # anything else here also means "raise x from y" and is therefore
150 # not an error
151 default('#pop'),
152 ],
153 'stringescape': [
154 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
155 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
156 ],
157 'strings': [
158 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
159 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
160 (r'[^\\\'"%\n]+', String),
161 # quotes, percents and backslashes must be parsed one at a time
162 (r'[\'"\\]', String),
163 # unhandled string formatting sign
164 (r'%', String)
165 # newlines are an error (use "nl" state)
166 ],
167 'nl': [
168 (r'\n', String)
169 ],
170 'dqs': [
171 (r'"', String, '#pop'),
172 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
173 include('strings')
174 ],
175 'sqs': [
176 (r"'", String, '#pop'),
177 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
178 include('strings')
179 ],
180 'tdqs': [
181 (r'"""', String, '#pop'),
182 include('strings'),
183 include('nl')
184 ],
185 'tsqs': [
186 (r"'''", String, '#pop'),
187 include('strings'),
188 include('nl')
189 ],
190 }
191
192 def analyse_text(text):
193 return shebang_matches(text, r'pythonw?(2(\.\d)?)?') or \
194 'import ' in text[:1000]
195
196
197 class Python3Lexer(RegexLexer):
198 """
199 For `Python <http://www.python.org>`_ source code (version 3.0).
200
201 .. versionadded:: 0.10
202 """
203
204 name = 'Python 3'
205 aliases = ['python3', 'py3']
206 filenames = [] # Nothing until Python 3 gets widespread
207 mimetypes = ['text/x-python3', 'application/x-python3']
208
209 flags = re.MULTILINE | re.UNICODE
210
211 uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
212
213 tokens = PythonLexer.tokens.copy()
214 tokens['keywords'] = [
215 (words((
216 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
217 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise',
218 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as',
219 'with', 'True', 'False', 'None'), suffix=r'\b'),
220 Keyword),
221 ]
222 tokens['builtins'] = [
223 (words((
224 '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', 'bytes',
225 'chr', 'classmethod', 'cmp', 'compile', 'complex', 'delattr', 'dict',
226 'dir', 'divmod', 'enumerate', 'eval', 'filter', 'float', 'format',
227 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
228 'input', 'int', 'isinstance', 'issubclass', 'iter', 'len', 'list',
229 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', 'oct',
230 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed',
231 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str',
232 'sum', 'super', 'tuple', 'type', 'vars', 'zip'), prefix=r'(?<!\.)',
233 suffix=r'\b'),
234 Name.Builtin),
235 (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
236 (words((
237 'ArithmeticError', 'AssertionError', 'AttributeError',
238 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
239 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
240 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
241 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
242 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
243 'NotImplementedError', 'OSError', 'OverflowError',
244 'PendingDeprecationWarning', 'ReferenceError',
245 'RuntimeError', 'RuntimeWarning', 'StopIteration',
246 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
247 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
248 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
249 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
250 'WindowsError', 'ZeroDivisionError',
251 # new builtin exceptions from PEP 3151
252 'BlockingIOError', 'ChildProcessError', 'ConnectionError',
253 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
254 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
255 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
256 'PermissionError', 'ProcessLookupError', 'TimeoutError'),
257 prefix=r'(?<!\.)', suffix=r'\b'),
258 Name.Exception),
259 ]
260 tokens['numbers'] = [
261 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
262 (r'0[oO][0-7]+', Number.Oct),
263 (r'0[bB][01]+', Number.Bin),
264 (r'0[xX][a-fA-F0-9]+', Number.Hex),
265 (r'\d+', Number.Integer)
266 ]
267 tokens['backtick'] = []
268 tokens['name'] = [
269 (r'@\w+', Name.Decorator),
270 (uni_name, Name),
271 ]
272 tokens['funcname'] = [
273 (uni_name, Name.Function, '#pop')
274 ]
275 tokens['classname'] = [
276 (uni_name, Name.Class, '#pop')
277 ]
278 tokens['import'] = [
279 (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
280 (r'\.', Name.Namespace),
281 (uni_name, Name.Namespace),
282 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
283 default('#pop') # all else: go back
284 ]
285 tokens['fromimport'] = [
286 (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
287 (r'\.', Name.Namespace),
288 (uni_name, Name.Namespace),
289 default('#pop'),
290 ]
291 # don't highlight "%s" substitutions
292 tokens['strings'] = [
293 (r'[^\\\'"%\n]+', String),
294 # quotes, percents and backslashes must be parsed one at a time
295 (r'[\'"\\]', String),
296 # unhandled string formatting sign
297 (r'%', String)
298 # newlines are an error (use "nl" state)
299 ]
300
301 def analyse_text(text):
302 return shebang_matches(text, r'pythonw?3(\.\d)?')
303
304
305 class PythonConsoleLexer(Lexer):
306 """
307 For Python console output or doctests, such as:
308
309 .. sourcecode:: pycon
310
311 >>> a = 'foo'
312 >>> print a
313 foo
314 >>> 1 / 0
315 Traceback (most recent call last):
316 File "<stdin>", line 1, in <module>
317 ZeroDivisionError: integer division or modulo by zero
318
319 Additional options:
320
321 `python3`
322 Use Python 3 lexer for code. Default is ``False``.
323
324 .. versionadded:: 1.0
325 """
326 name = 'Python console session'
327 aliases = ['pycon']
328 mimetypes = ['text/x-python-doctest']
329
330 def __init__(self, **options):
331 self.python3 = get_bool_opt(options, 'python3', False)
332 Lexer.__init__(self, **options)
333
334 def get_tokens_unprocessed(self, text):
335 if self.python3:
336 pylexer = Python3Lexer(**self.options)
337 tblexer = Python3TracebackLexer(**self.options)
338 else:
339 pylexer = PythonLexer(**self.options)
340 tblexer = PythonTracebackLexer(**self.options)
341
342 curcode = ''
343 insertions = []
344 curtb = ''
345 tbindex = 0
346 tb = 0
347 for match in line_re.finditer(text):
348 line = match.group()
349 if line.startswith(u'>>> ') or line.startswith(u'... '):
350 tb = 0
351 insertions.append((len(curcode),
352 [(0, Generic.Prompt, line[:4])]))
353 curcode += line[4:]
354 elif line.rstrip() == u'...' and not tb:
355 # only a new >>> prompt can end an exception block
356 # otherwise an ellipsis in place of the traceback frames
357 # will be mishandled
358 insertions.append((len(curcode),
359 [(0, Generic.Prompt, u'...')]))
360 curcode += line[3:]
361 else:
362 if curcode:
363 for item in do_insertions(
364 insertions, pylexer.get_tokens_unprocessed(curcode)):
365 yield item
366 curcode = ''
367 insertions = []
368 if (line.startswith(u'Traceback (most recent call last):') or
369 re.match(u' File "[^"]+", line \\d+\\n$', line)):
370 tb = 1
371 curtb = line
372 tbindex = match.start()
373 elif line == 'KeyboardInterrupt\n':
374 yield match.start(), Name.Class, line
375 elif tb:
376 curtb += line
377 if not (line.startswith(' ') or line.strip() == u'...'):
378 tb = 0
379 for i, t, v in tblexer.get_tokens_unprocessed(curtb):
380 yield tbindex+i, t, v
381 curtb = ''
382 else:
383 yield match.start(), Generic.Output, line
384 if curcode:
385 for item in do_insertions(insertions,
386 pylexer.get_tokens_unprocessed(curcode)):
387 yield item
388 if curtb:
389 for i, t, v in tblexer.get_tokens_unprocessed(curtb):
390 yield tbindex+i, t, v
391
392
393 class PythonTracebackLexer(RegexLexer):
394 """
395 For Python tracebacks.
396
397 .. versionadded:: 0.7
398 """
399
400 name = 'Python Traceback'
401 aliases = ['pytb']
402 filenames = ['*.pytb']
403 mimetypes = ['text/x-python-traceback']
404
405 tokens = {
406 'root': [
407 (r'^Traceback \(most recent call last\):\n',
408 Generic.Traceback, 'intb'),
409 # SyntaxError starts with this.
410 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
411 (r'^.*\n', Other),
412 ],
413 'intb': [
414 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
415 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
416 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
417 bygroups(Text, Name.Builtin, Text, Number, Text)),
418 (r'^( )(.+)(\n)',
419 bygroups(Text, using(PythonLexer), Text)),
420 (r'^([ \t]*)(\.\.\.)(\n)',
421 bygroups(Text, Comment, Text)), # for doctests...
422 (r'^([^:]+)(: )(.+)(\n)',
423 bygroups(Generic.Error, Text, Name, Text), '#pop'),
424 (r'^([a-zA-Z_]\w*)(:?\n)',
425 bygroups(Generic.Error, Text), '#pop')
426 ],
427 }
428
429
430 class Python3TracebackLexer(RegexLexer):
431 """
432 For Python 3.0 tracebacks, with support for chained exceptions.
433
434 .. versionadded:: 1.0
435 """
436
437 name = 'Python 3.0 Traceback'
438 aliases = ['py3tb']
439 filenames = ['*.py3tb']
440 mimetypes = ['text/x-python3-traceback']
441
442 tokens = {
443 'root': [
444 (r'\n', Text),
445 (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
446 (r'^During handling of the above exception, another '
447 r'exception occurred:\n\n', Generic.Traceback),
448 (r'^The above exception was the direct cause of the '
449 r'following exception:\n\n', Generic.Traceback),
450 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
451 ],
452 'intb': [
453 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
454 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
455 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
456 bygroups(Text, Name.Builtin, Text, Number, Text)),
457 (r'^( )(.+)(\n)',
458 bygroups(Text, using(Python3Lexer), Text)),
459 (r'^([ \t]*)(\.\.\.)(\n)',
460 bygroups(Text, Comment, Text)), # for doctests...
461 (r'^([^:]+)(: )(.+)(\n)',
462 bygroups(Generic.Error, Text, Name, Text), '#pop'),
463 (r'^([a-zA-Z_]\w*)(:?\n)',
464 bygroups(Generic.Error, Text), '#pop')
465 ],
466 }
467
468
469 class CythonLexer(RegexLexer):
470 """
471 For Pyrex and `Cython <http://cython.org>`_ source code.
472
473 .. versionadded:: 1.1
474 """
475
476 name = 'Cython'
477 aliases = ['cython', 'pyx', 'pyrex']
478 filenames = ['*.pyx', '*.pxd', '*.pxi']
479 mimetypes = ['text/x-cython', 'application/x-cython']
480
481 tokens = {
482 'root': [
483 (r'\n', Text),
484 (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
485 (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
486 (r'[^\S\n]+', Text),
487 (r'#.*$', Comment),
488 (r'[]{}:(),;[]', Punctuation),
489 (r'\\\n', Text),
490 (r'\\', Text),
491 (r'(in|is|and|or|not)\b', Operator.Word),
492 (r'(<)([a-zA-Z0-9.?]+)(>)',
493 bygroups(Punctuation, Keyword.Type, Punctuation)),
494 (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
495 (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
496 bygroups(Keyword, Number.Integer, Operator, Name, Operator,
497 Name, Punctuation)),
498 include('keywords'),
499 (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
500 (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
501 (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
502 (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
503 (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
504 include('builtins'),
505 include('backtick'),
506 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
507 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
508 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
509 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
510 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
511 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
512 ('[uU]?"', String, combined('stringescape', 'dqs')),
513 ("[uU]?'", String, combined('stringescape', 'sqs')),
514 include('name'),
515 include('numbers'),
516 ],
517 'keywords': [
518 (words((
519 'assert', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif',
520 'else', 'except', 'except?', 'exec', 'finally', 'for', 'gil',
521 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print',
522 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'),
523 Keyword),
524 (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
525 ],
526 'builtins': [
527 (words((
528 '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
529 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr',
530 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
531 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit',
532 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
533 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
534 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
535 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property',
536 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
537 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
538 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode',
539 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
540 Name.Builtin),
541 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
542 r')\b', Name.Builtin.Pseudo),
543 (words((
544 'ArithmeticError', 'AssertionError', 'AttributeError',
545 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
546 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit', 'IOError',
547 'ImportError', 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
548 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
549 'NotImplemented', 'NotImplementedError', 'OSError', 'OverflowError',
550 'OverflowWarning', 'PendingDeprecationWarning', 'ReferenceError',
551 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
552 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
553 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
554 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
555 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
556 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
557 Name.Exception),
558 ],
559 'numbers': [
560 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
561 (r'0\d+', Number.Oct),
562 (r'0[xX][a-fA-F0-9]+', Number.Hex),
563 (r'\d+L', Number.Integer.Long),
564 (r'\d+', Number.Integer)
565 ],
566 'backtick': [
567 ('`.*?`', String.Backtick),
568 ],
569 'name': [
570 (r'@\w+', Name.Decorator),
571 ('[a-zA-Z_]\w*', Name),
572 ],
573 'funcname': [
574 ('[a-zA-Z_]\w*', Name.Function, '#pop')
575 ],
576 'cdef': [
577 (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
578 (r'(struct|enum|union|class)\b', Keyword),
579 (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)',
580 bygroups(Name.Function, Text), '#pop'),
581 (r'([a-zA-Z_]\w*)(\s*)(,)',
582 bygroups(Name.Function, Text, Punctuation)),
583 (r'from\b', Keyword, '#pop'),
584 (r'as\b', Keyword),
585 (r':', Punctuation, '#pop'),
586 (r'(?=["\'])', Text, '#pop'),
587 (r'[a-zA-Z_]\w*', Keyword.Type),
588 (r'.', Text),
589 ],
590 'classname': [
591 ('[a-zA-Z_]\w*', Name.Class, '#pop')
592 ],
593 'import': [
594 (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
595 (r'[a-zA-Z_][\w.]*', Name.Namespace),
596 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
597 default('#pop') # all else: go back
598 ],
599 'fromimport': [
600 (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'),
601 (r'[a-zA-Z_.][\w.]*', Name.Namespace),
602 # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
603 default('#pop'),
604 ],
605 'stringescape': [
606 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
607 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
608 ],
609 'strings': [
610 (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
611 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
612 (r'[^\\\'"%\n]+', String),
613 # quotes, percents and backslashes must be parsed one at a time
614 (r'[\'"\\]', String),
615 # unhandled string formatting sign
616 (r'%', String)
617 # newlines are an error (use "nl" state)
618 ],
619 'nl': [
620 (r'\n', String)
621 ],
622 'dqs': [
623 (r'"', String, '#pop'),
624 (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
625 include('strings')
626 ],
627 'sqs': [
628 (r"'", String, '#pop'),
629 (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
630 include('strings')
631 ],
632 'tdqs': [
633 (r'"""', String, '#pop'),
634 include('strings'),
635 include('nl')
636 ],
637 'tsqs': [
638 (r"'''", String, '#pop'),
639 include('strings'),
640 include('nl')
641 ],
642 }
643
644
645 class DgLexer(RegexLexer):
646 """
647 Lexer for `dg <http://pyos.github.com/dg>`_,
648 a functional and object-oriented programming language
649 running on the CPython 3 VM.
650
651 .. versionadded:: 1.6
652 """
653 name = 'dg'
654 aliases = ['dg']
655 filenames = ['*.dg']
656 mimetypes = ['text/x-dg']
657
658 tokens = {
659 'root': [
660 (r'\s+', Text),
661 (r'#.*?$', Comment.Single),
662
663 (r'(?i)0b[01]+', Number.Bin),
664 (r'(?i)0o[0-7]+', Number.Oct),
665 (r'(?i)0x[0-9a-f]+', Number.Hex),
666 (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float),
667 (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float),
668 (r'(?i)[+-]?[0-9]+j?', Number.Integer),
669
670 (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')),
671 (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')),
672 (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')),
673 (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')),
674
675 (r"`\w+'*`", Operator),
676 (r'\b(and|in|is|or|where)\b', Operator.Word),
677 (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
678
679 (words((
680 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
681 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
682 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super',
683 'tuple', 'tuple\'', 'type'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
684 Name.Builtin),
685 (words((
686 '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
687 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
688 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst', 'getattr',
689 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init', 'input',
690 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len', 'locals',
691 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow', 'print', 'repr',
692 'reversed', 'round', 'setattr', 'scanl1?', 'snd', 'sorted', 'sum', 'tail',
693 'take', 'takewhile', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
694 Name.Builtin),
695 (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
696 Name.Builtin.Pseudo),
697
698 (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
699 Name.Exception),
700 (r"(?<!\.)(Exception|GeneratorExit|KeyboardInterrupt|StopIteration|"
701 r"SystemExit)(?!['\w])", Name.Exception),
702
703 (r"(?<![\w.])(except|finally|for|if|import|not|otherwise|raise|"
704 r"subclass|while|with|yield)(?!['\w])", Keyword.Reserved),
705
706 (r"[A-Z_]+'*(?!['\w])", Name),
707 (r"[A-Z]\w+'*(?!['\w])", Keyword.Type),
708 (r"\w+'*", Name),
709
710 (r'[()]', Punctuation),
711 (r'.', Error),
712 ],
713 'stringescape': [
714 (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
715 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
716 ],
717 'string': [
718 (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
719 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
720 (r'[^\\\'"%\n]+', String),
721 # quotes, percents and backslashes must be parsed one at a time
722 (r'[\'"\\]', String),
723 # unhandled string formatting sign
724 (r'%', String),
725 (r'\n', String)
726 ],
727 'dqs': [
728 (r'"', String, '#pop')
729 ],
730 'sqs': [
731 (r"'", String, '#pop')
732 ],
733 'tdqs': [
734 (r'"""', String, '#pop')
735 ],
736 'tsqs': [
737 (r"'''", String, '#pop')
738 ],
739 }
740
741
742 class NumPyLexer(PythonLexer):
743 """
744 A Python lexer recognizing Numerical Python builtins.
745
746 .. versionadded:: 0.10
747 """
748
749 name = 'NumPy'
750 aliases = ['numpy']
751
752 # override the mimetypes to not inherit them from python
753 mimetypes = []
754 filenames = []
755
756 EXTRA_KEYWORDS = set((
757 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
758 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
759 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
760 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
761 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
762 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
763 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
764 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
765 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
766 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
767 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
768 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
769 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
770 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
771 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
772 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
773 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
774 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
775 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
776 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
777 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
778 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
779 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
780 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
781 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
782 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
783 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
784 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
785 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
786 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
787 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
788 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
789 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
790 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
791 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
792 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
793 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
794 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
795 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
796 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
797 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
798 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
799 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
800 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
801 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
802 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
803 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
804 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
805 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
806 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
807 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
808 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
809 'set_numeric_ops', 'set_printoptions', 'set_string_function',
810 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
811 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
812 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
813 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
814 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
815 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
816 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
817 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
818 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
819 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
820 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
821 ))
822
823 def get_tokens_unprocessed(self, text):
824 for index, token, value in \
825 PythonLexer.get_tokens_unprocessed(self, text):
826 if token is Name and value in self.EXTRA_KEYWORDS:
827 yield index, Keyword.Pseudo, value
828 else:
829 yield index, token, value
830
831 def analyse_text(text):
832 return (shebang_matches(text, r'pythonw?(2(\.\d)?)?') or
833 'import ' in text[:1000]) \
834 and ('import numpy' in text or 'from numpy import' in text)

eric ide

mercurial