|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.agile |
|
4 ~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for agile languages. |
|
7 |
|
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 |
|
18 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ |
|
19 LexerContext, include, combined, do_insertions, bygroups, using |
|
20 from pygments.token import Error, Text, \ |
|
21 Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation |
|
22 from pygments.util import get_bool_opt, get_list_opt, shebang_matches |
|
23 from pygments import unistring as uni |
|
24 |
|
25 |
|
26 __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', |
|
27 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', |
|
28 'MiniDLexer', 'IoLexer', 'TclLexer', 'ClojureLexer', |
|
29 'Python3Lexer', 'Python3TracebackLexer'] |
|
30 |
|
31 # b/w compatibility |
|
32 from pygments.lexers.functional import SchemeLexer |
|
33 |
|
34 line_re = re.compile('.*?\n') |
|
35 |
|
36 |
|
37 class PythonLexer(RegexLexer): |
|
38 """ |
|
39 For `Python <http://www.python.org>`_ source code. |
|
40 """ |
|
41 |
|
42 name = 'Python' |
|
43 aliases = ['python', 'py'] |
|
44 filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript'] |
|
45 mimetypes = ['text/x-python', 'application/x-python'] |
|
46 |
|
47 tokens = { |
|
48 'root': [ |
|
49 (r'\n', Text), |
|
50 (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), |
|
51 (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), |
|
52 (r'[^\S\n]+', Text), |
|
53 (r'#.*$', Comment), |
|
54 (r'[]{}:(),;[]', Punctuation), |
|
55 (r'\\\n', Text), |
|
56 (r'\\', Text), |
|
57 (r'(in|is|and|or|not)\b', Operator.Word), |
|
58 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), |
|
59 include('keywords'), |
|
60 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), |
|
61 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), |
|
62 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), |
|
63 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), |
|
64 include('builtins'), |
|
65 include('backtick'), |
|
66 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), |
|
67 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), |
|
68 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), |
|
69 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), |
|
70 ('[uU]?"""', String, combined('stringescape', 'tdqs')), |
|
71 ("[uU]?'''", String, combined('stringescape', 'tsqs')), |
|
72 ('[uU]?"', String, combined('stringescape', 'dqs')), |
|
73 ("[uU]?'", String, combined('stringescape', 'sqs')), |
|
74 include('name'), |
|
75 include('numbers'), |
|
76 ], |
|
77 'keywords': [ |
|
78 (r'(assert|break|continue|del|elif|else|except|exec|' |
|
79 r'finally|for|global|if|lambda|pass|print|raise|' |
|
80 r'return|try|while|yield|as|with)\b', Keyword), |
|
81 ], |
|
82 'builtins': [ |
|
83 (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|' |
|
84 r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|' |
|
85 r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|' |
|
86 r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|' |
|
87 r'input|int|intern|isinstance|issubclass|iter|len|list|locals|' |
|
88 r'long|map|max|min|next|object|oct|open|ord|pow|property|range|' |
|
89 r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|' |
|
90 r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|' |
|
91 r'vars|xrange|zip)\b', Name.Builtin), |
|
92 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True' |
|
93 r')\b', Name.Builtin.Pseudo), |
|
94 (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|' |
|
95 r'BaseException|DeprecationWarning|EOFError|EnvironmentError|' |
|
96 r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|' |
|
97 r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|' |
|
98 r'KeyboardInterrupt|LookupError|MemoryError|NameError|' |
|
99 r'NotImplemented|NotImplementedError|OSError|OverflowError|' |
|
100 r'OverflowWarning|PendingDeprecationWarning|ReferenceError|' |
|
101 r'RuntimeError|RuntimeWarning|StandardError|StopIteration|' |
|
102 r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|' |
|
103 r'TypeError|UnboundLocalError|UnicodeDecodeError|' |
|
104 r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|' |
|
105 r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|' |
|
106 r'WindowsError|ZeroDivisionError)\b', Name.Exception), |
|
107 ], |
|
108 'numbers': [ |
|
109 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), |
|
110 (r'\d+[eE][+-]?[0-9]+', Number.Float), |
|
111 (r'0\d+', Number.Oct), |
|
112 (r'0[xX][a-fA-F0-9]+', Number.Hex), |
|
113 (r'\d+L', Number.Integer.Long), |
|
114 (r'\d+', Number.Integer) |
|
115 ], |
|
116 'backtick': [ |
|
117 ('`.*?`', String.Backtick), |
|
118 ], |
|
119 'name': [ |
|
120 (r'@[a-zA-Z0-9_.]+', Name.Decorator), |
|
121 ('[a-zA-Z_][a-zA-Z0-9_]*', Name), |
|
122 ], |
|
123 'funcname': [ |
|
124 ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') |
|
125 ], |
|
126 'classname': [ |
|
127 ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') |
|
128 ], |
|
129 'import': [ |
|
130 (r'((?:\s|\\\s)+)(as)((?:\s|\\\s)+)', |
|
131 bygroups(Text, Keyword.Namespace, Text)), |
|
132 (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), |
|
133 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), |
|
134 (r'', Text, '#pop') # all else: go back |
|
135 ], |
|
136 'fromimport': [ |
|
137 (r'((?:\s|\\\s)+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'), |
|
138 (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace), |
|
139 ], |
|
140 'stringescape': [ |
|
141 (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' |
|
142 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) |
|
143 ], |
|
144 'strings': [ |
|
145 (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' |
|
146 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), |
|
147 (r'[^\\\'"%\n]+', String), |
|
148 # quotes, percents and backslashes must be parsed one at a time |
|
149 (r'[\'"\\]', String), |
|
150 # unhandled string formatting sign |
|
151 (r'%', String) |
|
152 # newlines are an error (use "nl" state) |
|
153 ], |
|
154 'nl': [ |
|
155 (r'\n', String) |
|
156 ], |
|
157 'dqs': [ |
|
158 (r'"', String, '#pop'), |
|
159 (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings |
|
160 include('strings') |
|
161 ], |
|
162 'sqs': [ |
|
163 (r"'", String, '#pop'), |
|
164 (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings |
|
165 include('strings') |
|
166 ], |
|
167 'tdqs': [ |
|
168 (r'"""', String, '#pop'), |
|
169 include('strings'), |
|
170 include('nl') |
|
171 ], |
|
172 'tsqs': [ |
|
173 (r"'''", String, '#pop'), |
|
174 include('strings'), |
|
175 include('nl') |
|
176 ], |
|
177 } |
|
178 |
|
179 def analyse_text(text): |
|
180 return shebang_matches(text, r'pythonw?(2\.\d)?') |
|
181 |
|
182 |
|
183 class Python3Lexer(RegexLexer): |
|
184 """ |
|
185 For `Python <http://www.python.org>`_ source code (version 3.0). |
|
186 |
|
187 *New in Pygments 0.10.* |
|
188 """ |
|
189 |
|
190 name = 'Python 3' |
|
191 aliases = ['python3', 'py3'] |
|
192 filenames = [] # Nothing until Python 3 gets widespread |
|
193 mimetypes = ['text/x-python3', 'application/x-python3'] |
|
194 |
|
195 flags = re.MULTILINE | re.UNICODE |
|
196 |
|
197 uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) |
|
198 |
|
199 tokens = PythonLexer.tokens.copy() |
|
200 tokens['keywords'] = [ |
|
201 (r'(assert|break|continue|del|elif|else|except|' |
|
202 r'finally|for|global|if|lambda|pass|raise|' |
|
203 r'return|try|while|yield|as|with|True|False|None)\b', Keyword), |
|
204 ] |
|
205 tokens['builtins'] = [ |
|
206 (r'(?<!\.)(__import__|abs|all|any|bin|bool|bytearray|bytes|' |
|
207 r'chr|classmethod|cmp|compile|complex|delattr|dict|dir|' |
|
208 r'divmod|enumerate|eval|filter|float|format|frozenset|getattr|' |
|
209 r'globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|' |
|
210 r'iter|len|list|locals|map|max|memoryview|min|next|object|oct|' |
|
211 r'open|ord|pow|print|property|range|repr|reversed|round|' |
|
212 r'set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|' |
|
213 r'vars|zip)\b', Name.Builtin), |
|
214 (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo), |
|
215 (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|' |
|
216 r'BaseException|BufferError|BytesWarning|DeprecationWarning|' |
|
217 r'EOFError|EnvironmentError|Exception|FloatingPointError|' |
|
218 r'FutureWarning|GeneratorExit|IOError|ImportError|' |
|
219 r'ImportWarning|IndentationError|IndexError|KeyError|' |
|
220 r'KeyboardInterrupt|LookupError|MemoryError|NameError|' |
|
221 r'NotImplementedError|OSError|OverflowError|' |
|
222 r'PendingDeprecationWarning|ReferenceError|' |
|
223 r'RuntimeError|RuntimeWarning|StopIteration|' |
|
224 r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|' |
|
225 r'TypeError|UnboundLocalError|UnicodeDecodeError|' |
|
226 r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|' |
|
227 r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|' |
|
228 r'WindowsError|ZeroDivisionError)\b', Name.Exception), |
|
229 ] |
|
230 tokens['numbers'] = [ |
|
231 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), |
|
232 (r'0[oO][0-7]+', Number.Oct), |
|
233 (r'0[bB][01]+', Number.Bin), |
|
234 (r'0[xX][a-fA-F0-9]+', Number.Hex), |
|
235 (r'\d+', Number.Integer) |
|
236 ] |
|
237 tokens['backtick'] = [] |
|
238 tokens['name'] = [ |
|
239 (r'@[a-zA-Z0-9_]+', Name.Decorator), |
|
240 (uni_name, Name), |
|
241 ] |
|
242 tokens['funcname'] = [ |
|
243 (uni_name, Name.Function, '#pop') |
|
244 ] |
|
245 tokens['classname'] = [ |
|
246 (uni_name, Name.Class, '#pop') |
|
247 ] |
|
248 tokens['import'] = [ |
|
249 (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)), |
|
250 (r'\.', Name.Namespace), |
|
251 (uni_name, Name.Namespace), |
|
252 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), |
|
253 (r'', Text, '#pop') # all else: go back |
|
254 ] |
|
255 tokens['fromimport'] = [ |
|
256 (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'), |
|
257 (r'\.', Name.Namespace), |
|
258 (uni_name, Name.Namespace), |
|
259 ] |
|
260 # don't highlight "%s" substitutions |
|
261 tokens['strings'] = [ |
|
262 (r'[^\\\'"%\n]+', String), |
|
263 # quotes, percents and backslashes must be parsed one at a time |
|
264 (r'[\'"\\]', String), |
|
265 # unhandled string formatting sign |
|
266 (r'%', String) |
|
267 # newlines are an error (use "nl" state) |
|
268 ] |
|
269 |
|
270 def analyse_text(text): |
|
271 return shebang_matches(text, r'pythonw?3(\.\d)?') |
|
272 |
|
273 |
|
274 class PythonConsoleLexer(Lexer): |
|
275 """ |
|
276 For Python console output or doctests, such as: |
|
277 |
|
278 .. sourcecode:: pycon |
|
279 |
|
280 >>> a = 'foo' |
|
281 >>> print a |
|
282 foo |
|
283 >>> 1 / 0 |
|
284 Traceback (most recent call last): |
|
285 File "<stdin>", line 1, in <module> |
|
286 ZeroDivisionError: integer division or modulo by zero |
|
287 |
|
288 Additional options: |
|
289 |
|
290 `python3` |
|
291 Use Python 3 lexer for code. Default is ``False``. |
|
292 *New in Pygments 1.0.* |
|
293 """ |
|
294 name = 'Python console session' |
|
295 aliases = ['pycon'] |
|
296 mimetypes = ['text/x-python-doctest'] |
|
297 |
|
298 def __init__(self, **options): |
|
299 self.python3 = get_bool_opt(options, 'python3', False) |
|
300 Lexer.__init__(self, **options) |
|
301 |
|
302 def get_tokens_unprocessed(self, text): |
|
303 if self.python3: |
|
304 pylexer = Python3Lexer(**self.options) |
|
305 tblexer = Python3TracebackLexer(**self.options) |
|
306 else: |
|
307 pylexer = PythonLexer(**self.options) |
|
308 tblexer = PythonTracebackLexer(**self.options) |
|
309 |
|
310 curcode = '' |
|
311 insertions = [] |
|
312 curtb = '' |
|
313 tbindex = 0 |
|
314 tb = 0 |
|
315 for match in line_re.finditer(text): |
|
316 line = match.group() |
|
317 if line.startswith('>>> ') or line.startswith('... '): |
|
318 tb = 0 |
|
319 insertions.append((len(curcode), |
|
320 [(0, Generic.Prompt, line[:4])])) |
|
321 curcode += line[4:] |
|
322 elif line.rstrip() == '...': |
|
323 tb = 0 |
|
324 insertions.append((len(curcode), |
|
325 [(0, Generic.Prompt, '...')])) |
|
326 curcode += line[3:] |
|
327 else: |
|
328 if curcode: |
|
329 for item in do_insertions(insertions, |
|
330 pylexer.get_tokens_unprocessed(curcode)): |
|
331 yield item |
|
332 curcode = '' |
|
333 insertions = [] |
|
334 if (line.startswith('Traceback (most recent call last):') or |
|
335 re.match(r' File "[^"]+", line \d+\n$', line)): |
|
336 tb = 1 |
|
337 curtb = line |
|
338 tbindex = match.start() |
|
339 elif line == 'KeyboardInterrupt\n': |
|
340 yield match.start(), Name.Class, line |
|
341 elif tb: |
|
342 curtb += line |
|
343 if not (line.startswith(' ') or line.strip() == '...'): |
|
344 tb = 0 |
|
345 for i, t, v in tblexer.get_tokens_unprocessed(curtb): |
|
346 yield tbindex+i, t, v |
|
347 else: |
|
348 yield match.start(), Generic.Output, line |
|
349 if curcode: |
|
350 for item in do_insertions(insertions, |
|
351 pylexer.get_tokens_unprocessed(curcode)): |
|
352 yield item |
|
353 |
|
354 |
|
355 class PythonTracebackLexer(RegexLexer): |
|
356 """ |
|
357 For Python tracebacks. |
|
358 |
|
359 *New in Pygments 0.7.* |
|
360 """ |
|
361 |
|
362 name = 'Python Traceback' |
|
363 aliases = ['pytb'] |
|
364 filenames = ['*.pytb'] |
|
365 mimetypes = ['text/x-python-traceback'] |
|
366 |
|
367 tokens = { |
|
368 'root': [ |
|
369 (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), |
|
370 # SyntaxError starts with this. |
|
371 (r'^(?= File "[^"]+", line \d+\n)', Generic.Traceback, 'intb'), |
|
372 ], |
|
373 'intb': [ |
|
374 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', |
|
375 bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)), |
|
376 (r'^( File )("[^"]+")(, line )(\d+)(\n)', |
|
377 bygroups(Text, Name.Builtin, Text, Number, Text)), |
|
378 (r'^( )(.+)(\n)', |
|
379 bygroups(Text, using(PythonLexer), Text)), |
|
380 (r'^([ \t]*)(...)(\n)', |
|
381 bygroups(Text, Comment, Text)), # for doctests... |
|
382 (r'^(.+)(: )(.+)(\n)', |
|
383 bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'), |
|
384 (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', |
|
385 bygroups(Name.Class, Text), '#pop') |
|
386 ], |
|
387 } |
|
388 |
|
389 |
|
390 class Python3TracebackLexer(RegexLexer): |
|
391 """ |
|
392 For Python 3.0 tracebacks, with support for chained exceptions. |
|
393 |
|
394 *New in Pygments 1.0.* |
|
395 """ |
|
396 |
|
397 name = 'Python 3.0 Traceback' |
|
398 aliases = ['py3tb'] |
|
399 filenames = ['*.py3tb'] |
|
400 mimetypes = ['text/x-python3-traceback'] |
|
401 |
|
402 tokens = { |
|
403 'root': [ |
|
404 (r'\n', Text), |
|
405 (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), |
|
406 (r'^During handling of the above exception, another ' |
|
407 r'exception occurred:\n\n', Generic.Traceback), |
|
408 (r'^The above exception was the direct cause of the ' |
|
409 r'following exception:\n\n', Generic.Traceback), |
|
410 ], |
|
411 'intb': [ |
|
412 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', |
|
413 bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)), |
|
414 (r'^( )(.+)(\n)', |
|
415 bygroups(Text, using(Python3Lexer), Text)), |
|
416 (r'^([ \t]*)(...)(\n)', |
|
417 bygroups(Text, Comment, Text)), # for doctests... |
|
418 (r'^(.+)(: )(.+)(\n)', |
|
419 bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'), |
|
420 (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)', |
|
421 bygroups(Name.Class, Text), '#pop') |
|
422 ], |
|
423 } |
|
424 |
|
425 |
|
426 class RubyLexer(ExtendedRegexLexer): |
|
427 """ |
|
428 For `Ruby <http://www.ruby-lang.org>`_ source code. |
|
429 """ |
|
430 |
|
431 name = 'Ruby' |
|
432 aliases = ['rb', 'ruby'] |
|
433 filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx'] |
|
434 mimetypes = ['text/x-ruby', 'application/x-ruby'] |
|
435 |
|
436 flags = re.DOTALL | re.MULTILINE |
|
437 |
|
438 def heredoc_callback(self, match, ctx): |
|
439 # okay, this is the hardest part of parsing Ruby... |
|
440 # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line |
|
441 |
|
442 start = match.start(1) |
|
443 yield start, Operator, match.group(1) # <<-? |
|
444 yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` |
|
445 yield match.start(3), Name.Constant, match.group(3) # heredoc name |
|
446 yield match.start(4), String.Heredoc, match.group(4) # quote again |
|
447 |
|
448 heredocstack = ctx.__dict__.setdefault('heredocstack', []) |
|
449 outermost = not bool(heredocstack) |
|
450 heredocstack.append((match.group(1) == '<<-', match.group(3))) |
|
451 |
|
452 ctx.pos = match.start(5) |
|
453 ctx.end = match.end(5) |
|
454 # this may find other heredocs |
|
455 for i, t, v in self.get_tokens_unprocessed(context=ctx): |
|
456 yield i, t, v |
|
457 ctx.pos = match.end() |
|
458 |
|
459 if outermost: |
|
460 # this is the outer heredoc again, now we can process them all |
|
461 for tolerant, hdname in heredocstack: |
|
462 lines = [] |
|
463 for match in line_re.finditer(ctx.text, ctx.pos): |
|
464 if tolerant: |
|
465 check = match.group().strip() |
|
466 else: |
|
467 check = match.group().rstrip() |
|
468 if check == hdname: |
|
469 for amatch in lines: |
|
470 yield amatch.start(), String.Heredoc, amatch.group() |
|
471 yield match.start(), Name.Constant, match.group() |
|
472 ctx.pos = match.end() |
|
473 break |
|
474 else: |
|
475 lines.append(match) |
|
476 else: |
|
477 # end of heredoc not found -- error! |
|
478 for amatch in lines: |
|
479 yield amatch.start(), Error, amatch.group() |
|
480 ctx.end = len(ctx.text) |
|
481 del heredocstack[:] |
|
482 |
|
483 |
|
484 def gen_rubystrings_rules(): |
|
485 def intp_regex_callback(self, match, ctx): |
|
486 yield match.start(1), String.Regex, match.group(1) # begin |
|
487 nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) |
|
488 for i, t, v in self.get_tokens_unprocessed(context=nctx): |
|
489 yield match.start(3)+i, t, v |
|
490 yield match.start(4), String.Regex, match.group(4) # end[mixounse]* |
|
491 ctx.pos = match.end() |
|
492 |
|
493 def intp_string_callback(self, match, ctx): |
|
494 yield match.start(1), String.Other, match.group(1) |
|
495 nctx = LexerContext(match.group(3), 0, ['interpolated-string']) |
|
496 for i, t, v in self.get_tokens_unprocessed(context=nctx): |
|
497 yield match.start(3)+i, t, v |
|
498 yield match.start(4), String.Other, match.group(4) # end |
|
499 ctx.pos = match.end() |
|
500 |
|
501 states = {} |
|
502 states['strings'] = [ |
|
503 # easy ones |
|
504 (r'\:([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|' |
|
505 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol), |
|
506 (r":'(\\\\|\\'|[^'])*'", String.Symbol), |
|
507 (r"'(\\\\|\\'|[^'])*'", String.Single), |
|
508 (r':"', String.Symbol, 'simple-sym'), |
|
509 (r'"', String.Double, 'simple-string'), |
|
510 (r'(?<!\.)`', String.Backtick, 'simple-backtick'), |
|
511 ] |
|
512 |
|
513 # double-quoted string and symbol |
|
514 for name, ttype, end in ('string', String.Double, '"'), \ |
|
515 ('sym', String.Symbol, '"'), \ |
|
516 ('backtick', String.Backtick, '`'): |
|
517 states['simple-'+name] = [ |
|
518 include('string-intp-escaped'), |
|
519 (r'[^\\%s#]+' % end, ttype), |
|
520 (r'[\\#]', ttype), |
|
521 (end, ttype, '#pop'), |
|
522 ] |
|
523 |
|
524 # braced quoted strings |
|
525 for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \ |
|
526 ('\\[', '\\]', 'sb'), \ |
|
527 ('\\(', '\\)', 'pa'), \ |
|
528 ('<', '>', 'ab'): |
|
529 states[name+'-intp-string'] = [ |
|
530 (r'\\[\\' + lbrace + rbrace + ']', String.Other), |
|
531 (r'(?<!\\)' + lbrace, String.Other, '#push'), |
|
532 (r'(?<!\\)' + rbrace, String.Other, '#pop'), |
|
533 include('string-intp-escaped'), |
|
534 (r'[\\#' + lbrace + rbrace + ']', String.Other), |
|
535 (r'[^\\#' + lbrace + rbrace + ']+', String.Other), |
|
536 ] |
|
537 states['strings'].append((r'%[QWx]?' + lbrace, String.Other, |
|
538 name+'-intp-string')) |
|
539 states[name+'-string'] = [ |
|
540 (r'\\[\\' + lbrace + rbrace + ']', String.Other), |
|
541 (r'(?<!\\)' + lbrace, String.Other, '#push'), |
|
542 (r'(?<!\\)' + rbrace, String.Other, '#pop'), |
|
543 (r'[\\#' + lbrace + rbrace + ']', String.Other), |
|
544 (r'[^\\#' + lbrace + rbrace + ']+', String.Other), |
|
545 ] |
|
546 states['strings'].append((r'%[qsw]' + lbrace, String.Other, |
|
547 name+'-string')) |
|
548 states[name+'-regex'] = [ |
|
549 (r'\\[\\' + lbrace + rbrace + ']', String.Regex), |
|
550 (r'(?<!\\)' + lbrace, String.Regex, '#push'), |
|
551 (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'), |
|
552 include('string-intp'), |
|
553 (r'[\\#' + lbrace + rbrace + ']', String.Regex), |
|
554 (r'[^\\#' + lbrace + rbrace + ']+', String.Regex), |
|
555 ] |
|
556 states['strings'].append((r'%r' + lbrace, String.Regex, |
|
557 name+'-regex')) |
|
558 |
|
559 # these must come after %<brace>! |
|
560 states['strings'] += [ |
|
561 # %r regex |
|
562 (r'(%r([^a-zA-Z0-9]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2[mixounse]*)', |
|
563 intp_regex_callback), |
|
564 # regular fancy strings with qsw |
|
565 (r'%[qsw]([^a-zA-Z0-9])([^\1\\]*(?:\\.[^\1\\]*)*)\1', String.Other), |
|
566 (r'(%[QWx]([^a-zA-Z0-9]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2)', |
|
567 intp_string_callback), |
|
568 # special forms of fancy strings after operators or |
|
569 # in method calls with braces |
|
570 (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:[^\3\\]*(?:\\.[^\3\\]*)*)\3)', |
|
571 bygroups(Text, String.Other, None)), |
|
572 # and because of fixed width lookbehinds the whole thing a |
|
573 # second time for line startings... |
|
574 (r'^(\s*)(%([\t ])(?:[^\3\\]*(?:\\.[^\3\\]*)*)\3)', |
|
575 bygroups(Text, String.Other, None)), |
|
576 # all regular fancy strings without qsw |
|
577 (r'(%([^a-zA-Z0-9\s]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2)', |
|
578 intp_string_callback), |
|
579 ] |
|
580 |
|
581 return states |
|
582 |
|
583 tokens = { |
|
584 'root': [ |
|
585 (r'#.*?$', Comment.Single), |
|
586 (r'=begin\s.*?\n=end', Comment.Multiline), |
|
587 # keywords |
|
588 (r'(BEGIN|END|alias|begin|break|case|defined\?|' |
|
589 r'do|else|elsif|end|ensure|for|if|in|next|redo|' |
|
590 r'rescue|raise|retry|return|super|then|undef|unless|until|when|' |
|
591 r'while|yield)\b', Keyword), |
|
592 # start of function, class and module names |
|
593 (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)', |
|
594 bygroups(Keyword, Text, Name.Namespace)), |
|
595 (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'), |
|
596 (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), |
|
597 (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), |
|
598 # special methods |
|
599 (r'(initialize|new|loop|include|extend|raise|attr_reader|' |
|
600 r'attr_writer|attr_accessor|attr|catch|throw|private|' |
|
601 r'module_function|public|protected|true|false|nil)\b', Keyword.Pseudo), |
|
602 (r'(not|and|or)\b', Operator.Word), |
|
603 (r'(autoload|block_given|const_defined|eql|equal|frozen|include|' |
|
604 r'instance_of|is_a|iterator|kind_of|method_defined|nil|' |
|
605 r'private_method_defined|protected_method_defined|' |
|
606 r'public_method_defined|respond_to|tainted)\?', Name.Builtin), |
|
607 (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin), |
|
608 (r'(?<!\.)(Array|Float|Integer|String|__id__|__send__|abort|ancestors|' |
|
609 r'at_exit|autoload|binding|callcc|caller|' |
|
610 r'catch|chomp|chop|class_eval|class_variables|' |
|
611 r'clone|const_defined\?|const_get|const_missing|const_set|constants|' |
|
612 r'display|dup|eval|exec|exit|extend|fail|fork|' |
|
613 r'format|freeze|getc|gets|global_variables|gsub|' |
|
614 r'hash|id|included_modules|inspect|instance_eval|' |
|
615 r'instance_method|instance_methods|' |
|
616 r'instance_variable_get|instance_variable_set|instance_variables|' |
|
617 r'lambda|load|local_variables|loop|' |
|
618 r'method|method_missing|methods|module_eval|name|' |
|
619 r'object_id|open|p|print|printf|private_class_method|' |
|
620 r'private_instance_methods|' |
|
621 r'private_methods|proc|protected_instance_methods|' |
|
622 r'protected_methods|public_class_method|' |
|
623 r'public_instance_methods|public_methods|' |
|
624 r'putc|puts|raise|rand|readline|readlines|require|' |
|
625 r'scan|select|self|send|set_trace_func|singleton_methods|sleep|' |
|
626 r'split|sprintf|srand|sub|syscall|system|taint|' |
|
627 r'test|throw|to_a|to_s|trace_var|trap|type|untaint|untrace_var|' |
|
628 r'warn)\b', Name.Builtin), |
|
629 (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo), |
|
630 # normal heredocs |
|
631 (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback), |
|
632 # empty string heredocs |
|
633 (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), |
|
634 (r'__END__', Comment.Preproc, 'end-part'), |
|
635 # multiline regex (after keywords or assignments) |
|
636 (r'(?:^|(?<=[=<>~!])|' |
|
637 r'(?<=(?:\s|;)when\s)|' |
|
638 r'(?<=(?:\s|;)or\s)|' |
|
639 r'(?<=(?:\s|;)and\s)|' |
|
640 r'(?<=(?:\s|;|\.)index\s)|' |
|
641 r'(?<=(?:\s|;|\.)scan\s)|' |
|
642 r'(?<=(?:\s|;|\.)sub\s)|' |
|
643 r'(?<=(?:\s|;|\.)sub!\s)|' |
|
644 r'(?<=(?:\s|;|\.)gsub\s)|' |
|
645 r'(?<=(?:\s|;|\.)gsub!\s)|' |
|
646 r'(?<=(?:\s|;|\.)match\s)|' |
|
647 r'(?<=(?:\s|;)if\s)|' |
|
648 r'(?<=(?:\s|;)elsif\s)|' |
|
649 r'(?<=^when\s)|' |
|
650 r'(?<=^index\s)|' |
|
651 r'(?<=^scan\s)|' |
|
652 r'(?<=^sub\s)|' |
|
653 r'(?<=^gsub\s)|' |
|
654 r'(?<=^sub!\s)|' |
|
655 r'(?<=^gsub!\s)|' |
|
656 r'(?<=^match\s)|' |
|
657 r'(?<=^if\s)|' |
|
658 r'(?<=^elsif\s)' |
|
659 r')(\s*)(/)(?!=)', bygroups(Text, String.Regex), 'multiline-regex'), |
|
660 # multiline regex (in method calls) |
|
661 (r'(?<=\(|,)/', String.Regex, 'multiline-regex'), |
|
662 # multiline regex (this time the funny no whitespace rule) |
|
663 (r'(\s+)(/[^\s=])', String.Regex, 'multiline-regex'), |
|
664 # lex numbers and ignore following regular expressions which |
|
665 # are division operators in fact (grrrr. i hate that. any |
|
666 # better ideas?) |
|
667 # since pygments 0.7 we also eat a "?" operator after numbers |
|
668 # so that the char operator does not work. Chars are not allowed |
|
669 # there so that you can use the ternary operator. |
|
670 # stupid example: |
|
671 # x>=0?n[x]:"" |
|
672 (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?', |
|
673 bygroups(Number.Oct, Text, Operator)), |
|
674 (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?', |
|
675 bygroups(Number.Hex, Text, Operator)), |
|
676 (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?', |
|
677 bygroups(Number.Bin, Text, Operator)), |
|
678 (r'([\d]+(?:_\d+)*)(\s*)([/?])?', |
|
679 bygroups(Number.Integer, Text, Operator)), |
|
680 # Names |
|
681 (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class), |
|
682 (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance), |
|
683 (r'\$[a-zA-Z0-9_]+', Name.Variable.Global), |
|
684 (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global), |
|
685 (r'\$-[0adFiIlpvw]', Name.Variable.Global), |
|
686 (r'::', Operator), |
|
687 include('strings'), |
|
688 # chars |
|
689 (r'\?(\\[MC]-)*' # modifiers |
|
690 r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' |
|
691 r'(?!\w)', |
|
692 String.Char), |
|
693 (r'[A-Z][a-zA-Z0-9_]+', Name.Constant), |
|
694 # this is needed because ruby attributes can look |
|
695 # like keywords (class) or like this: ` ?!? |
|
696 (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])', |
|
697 bygroups(Operator, Name)), |
|
698 (r'[a-zA-Z_][\w_]*[\!\?]?', Name), |
|
699 (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|' |
|
700 r'!~|&&?|\|\||\.{1,3})', Operator), |
|
701 (r'[-+/*%=<>&!^|~]=?', Operator), |
|
702 (r'[(){};,/?:\\]', Punctuation), |
|
703 (r'\s+', Text) |
|
704 ], |
|
705 'funcname': [ |
|
706 (r'\(', Punctuation, 'defexpr'), |
|
707 (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?' |
|
708 r'([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|' |
|
709 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', |
|
710 bygroups(Name.Class, Operator, Name.Function), '#pop'), |
|
711 (r'', Text, '#pop') |
|
712 ], |
|
713 'classname': [ |
|
714 (r'\(', Punctuation, 'defexpr'), |
|
715 (r'<<', Operator, '#pop'), |
|
716 (r'[A-Z_][\w_]*', Name.Class, '#pop'), |
|
717 (r'', Text, '#pop') |
|
718 ], |
|
719 'defexpr': [ |
|
720 (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'), |
|
721 (r'\(', Operator, '#push'), |
|
722 include('root') |
|
723 ], |
|
724 'in-intp': [ |
|
725 ('}', String.Interpol, '#pop'), |
|
726 include('root'), |
|
727 ], |
|
728 'string-intp': [ |
|
729 (r'#{', String.Interpol, 'in-intp'), |
|
730 (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol), |
|
731 (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol) |
|
732 ], |
|
733 'string-intp-escaped': [ |
|
734 include('string-intp'), |
|
735 (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape) |
|
736 ], |
|
737 'interpolated-regex': [ |
|
738 include('string-intp'), |
|
739 (r'[\\#]', String.Regex), |
|
740 (r'[^\\#]+', String.Regex), |
|
741 ], |
|
742 'interpolated-string': [ |
|
743 include('string-intp'), |
|
744 (r'[\\#]', String.Other), |
|
745 (r'[^\\#]+', String.Other), |
|
746 ], |
|
747 'multiline-regex': [ |
|
748 include('string-intp'), |
|
749 (r'\\\\', String.Regex), |
|
750 (r'\\/', String.Regex), |
|
751 (r'[\\#]', String.Regex), |
|
752 (r'[^\\/#]+', String.Regex), |
|
753 (r'/[mixounse]*', String.Regex, '#pop'), |
|
754 ], |
|
755 'end-part': [ |
|
756 (r'.+', Comment.Preproc, '#pop') |
|
757 ] |
|
758 } |
|
759 tokens.update(gen_rubystrings_rules()) |
|
760 |
|
761 def analyse_text(text): |
|
762 return shebang_matches(text, r'ruby(1\.\d)?') |
|
763 |
|
764 |
|
765 class RubyConsoleLexer(Lexer): |
|
766 """ |
|
767 For Ruby interactive console (**irb**) output like: |
|
768 |
|
769 .. sourcecode:: rbcon |
|
770 |
|
771 irb(main):001:0> a = 1 |
|
772 => 1 |
|
773 irb(main):002:0> puts a |
|
774 1 |
|
775 => nil |
|
776 """ |
|
777 name = 'Ruby irb session' |
|
778 aliases = ['rbcon', 'irb'] |
|
779 mimetypes = ['text/x-ruby-shellsession'] |
|
780 |
|
781 _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] ' |
|
782 '|>> |\?> ') |
|
783 |
|
784 def get_tokens_unprocessed(self, text): |
|
785 rblexer = RubyLexer(**self.options) |
|
786 |
|
787 curcode = '' |
|
788 insertions = [] |
|
789 for match in line_re.finditer(text): |
|
790 line = match.group() |
|
791 m = self._prompt_re.match(line) |
|
792 if m is not None: |
|
793 end = m.end() |
|
794 insertions.append((len(curcode), |
|
795 [(0, Generic.Prompt, line[:end])])) |
|
796 curcode += line[end:] |
|
797 else: |
|
798 if curcode: |
|
799 for item in do_insertions(insertions, |
|
800 rblexer.get_tokens_unprocessed(curcode)): |
|
801 yield item |
|
802 curcode = '' |
|
803 insertions = [] |
|
804 yield match.start(), Generic.Output, line |
|
805 if curcode: |
|
806 for item in do_insertions(insertions, |
|
807 rblexer.get_tokens_unprocessed(curcode)): |
|
808 yield item |
|
809 |
|
810 |
|
811 class PerlLexer(RegexLexer): |
|
812 """ |
|
813 For `Perl <http://www.perl.org>`_ source code. |
|
814 """ |
|
815 |
|
816 name = 'Perl' |
|
817 aliases = ['perl', 'pl'] |
|
818 filenames = ['*.pl', '*.pm'] |
|
819 mimetypes = ['text/x-perl', 'application/x-perl'] |
|
820 |
|
821 flags = re.DOTALL | re.MULTILINE |
|
822 # TODO: give this a perl guy who knows how to parse perl... |
|
823 tokens = { |
|
824 'balanced-regex': [ |
|
825 (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'), |
|
826 (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), |
|
827 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'), |
|
828 (r'{(\\\\|\\}|[^}])*}[egimosx]*', String.Regex, '#pop'), |
|
829 (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'), |
|
830 (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'), |
|
831 (r'\((\\\\|\\\)|[^\)])*\)[egimosx]*', String.Regex, '#pop'), |
|
832 (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'), |
|
833 (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'), |
|
834 (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'), |
|
835 (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'), |
|
836 ], |
|
837 'root': [ |
|
838 (r'\#.*?$', Comment.Single), |
|
839 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline), |
|
840 (r'(case|continue|do|else|elsif|for|foreach|if|last|my|' |
|
841 r'next|our|redo|reset|then|unless|until|while|use|' |
|
842 r'print|new|BEGIN|END|return)\b', Keyword), |
|
843 (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)', |
|
844 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'), |
|
845 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word), |
|
846 # common delimiters |
|
847 (r's/(\\\\|\\/|[^/])*/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex), |
|
848 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex), |
|
849 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex), |
|
850 (r's@(\\\\|\\@|[^@])*@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex), |
|
851 (r's%(\\\\|\\%|[^%])*%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex), |
|
852 # balanced delimiters |
|
853 (r's{(\\\\|\\}|[^}])*}\s*', String.Regex, 'balanced-regex'), |
|
854 (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'), |
|
855 (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'), |
|
856 (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'), |
|
857 |
|
858 (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex), |
|
859 (r'((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[gcimosx]*', String.Regex), |
|
860 (r'\s+', Text), |
|
861 (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|' |
|
862 r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|' |
|
863 r'continue|cos|crypt|dbmclose|dbmopen|defined|delete|die|' |
|
864 r'dump|each|endgrent|endhostent|endnetent|endprotoent|' |
|
865 r'endpwent|endservent|eof|eval|exec|exists|exit|exp|fcntl|' |
|
866 r'fileno|flock|fork|format|formline|getc|getgrent|getgrgid|' |
|
867 r'getgrnam|gethostbyaddr|gethostbyname|gethostent|getlogin|' |
|
868 r'getnetbyaddr|getnetbyname|getnetent|getpeername|getpgrp|' |
|
869 r'getppid|getpriority|getprotobyname|getprotobynumber|' |
|
870 r'getprotoent|getpwent|getpwnam|getpwuid|getservbyname|' |
|
871 r'getservbyport|getservent|getsockname|getsockopt|glob|gmtime|' |
|
872 r'goto|grep|hex|import|index|int|ioctl|join|keys|kill|last|' |
|
873 r'lc|lcfirst|length|link|listen|local|localtime|log|lstat|' |
|
874 r'map|mkdir|msgctl|msgget|msgrcv|msgsnd|my|next|no|oct|open|' |
|
875 r'opendir|ord|our|pack|package|pipe|pop|pos|printf|' |
|
876 r'prototype|push|quotemeta|rand|read|readdir|' |
|
877 r'readline|readlink|readpipe|recv|redo|ref|rename|require|' |
|
878 r'reverse|rewinddir|rindex|rmdir|scalar|seek|seekdir|' |
|
879 r'select|semctl|semget|semop|send|setgrent|sethostent|setnetent|' |
|
880 r'setpgrp|setpriority|setprotoent|setpwent|setservent|' |
|
881 r'setsockopt|shift|shmctl|shmget|shmread|shmwrite|shutdown|' |
|
882 r'sin|sleep|socket|socketpair|sort|splice|split|sprintf|sqrt|' |
|
883 r'srand|stat|study|substr|symlink|syscall|sysopen|sysread|' |
|
884 r'sysseek|system|syswrite|tell|telldir|tie|tied|time|times|tr|' |
|
885 r'truncate|uc|ucfirst|umask|undef|unlink|unpack|unshift|untie|' |
|
886 r'utime|values|vec|wait|waitpid|wantarray|warn|write' |
|
887 r')\b', Name.Builtin), |
|
888 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo), |
|
889 (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String), |
|
890 (r'__END__', Comment.Preproc, 'end-part'), |
|
891 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global), |
|
892 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global), |
|
893 (r'[$@%#]+', Name.Variable, 'varname'), |
|
894 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct), |
|
895 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex), |
|
896 (r'0b[01]+(_[01]+)*', Number.Bin), |
|
897 (r'\d+', Number.Integer), |
|
898 (r"'(\\\\|\\'|[^'])*'", String), |
|
899 (r'"(\\\\|\\"|[^"])*"', String), |
|
900 (r'`(\\\\|\\`|[^`])*`', String.Backtick), |
|
901 (r'<([^\s>]+)>', String.Regexp), |
|
902 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'), |
|
903 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'), |
|
904 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'), |
|
905 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'), |
|
906 (r'(q|qq|qw|qr|qx)(.)[.\n]*?\1', String.Other), |
|
907 (r'package\s+', Keyword, 'modulename'), |
|
908 (r'sub\s+', Keyword, 'funcname'), |
|
909 (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|' |
|
910 r'!~|&&?|\|\||\.{1,3})', Operator), |
|
911 (r'[-+/*%=<>&^|!\\~]=?', Operator), |
|
912 (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage |
|
913 # of punctuation in Perl! |
|
914 (r'(?=\w)', Name, 'name'), |
|
915 ], |
|
916 'format': [ |
|
917 (r'\.\n', String.Interpol, '#pop'), |
|
918 (r'[^\n]*\n', String.Interpol), |
|
919 ], |
|
920 'varname': [ |
|
921 (r'\s+', Text), |
|
922 (r'\{', Punctuation, '#pop'), # hash syntax? |
|
923 (r'\)|,', Punctuation, '#pop'), # argument specifier |
|
924 (r'[a-zA-Z0-9_]+::', Name.Namespace), |
|
925 (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'), |
|
926 ], |
|
927 'name': [ |
|
928 (r'[a-zA-Z0-9_]+::', Name.Namespace), |
|
929 (r'[a-zA-Z0-9_:]+', Name, '#pop'), |
|
930 (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'), |
|
931 (r'(?=[^a-zA-Z0-9_])', Text, '#pop'), |
|
932 ], |
|
933 'modulename': [ |
|
934 (r'[a-zA-Z_][\w_]*', Name.Namespace, '#pop') |
|
935 ], |
|
936 'funcname': [ |
|
937 (r'[a-zA-Z_][\w_]*[\!\?]?', Name.Function), |
|
938 (r'\s+', Text), |
|
939 # argument declaration |
|
940 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)), |
|
941 (r'.*?{', Punctuation, '#pop'), |
|
942 (r';', Punctuation, '#pop'), |
|
943 ], |
|
944 'cb-string': [ |
|
945 (r'\\[\{\}\\]', String.Other), |
|
946 (r'\\', String.Other), |
|
947 (r'\{', String.Other, 'cb-string'), |
|
948 (r'\}', String.Other, '#pop'), |
|
949 (r'[^\{\}\\]+', String.Other) |
|
950 ], |
|
951 'rb-string': [ |
|
952 (r'\\[\(\)\\]', String.Other), |
|
953 (r'\\', String.Other), |
|
954 (r'\(', String.Other, 'rb-string'), |
|
955 (r'\)', String.Other, '#pop'), |
|
956 (r'[^\(\)]+', String.Other) |
|
957 ], |
|
958 'sb-string': [ |
|
959 (r'\\[\[\]\\]', String.Other), |
|
960 (r'\\', String.Other), |
|
961 (r'\[', String.Other, 'sb-string'), |
|
962 (r'\]', String.Other, '#pop'), |
|
963 (r'[^\[\]]+', String.Other) |
|
964 ], |
|
965 'lt-string': [ |
|
966 (r'\\[\<\>\\]', String.Other), |
|
967 (r'\\', String.Other), |
|
968 (r'\<', String.Other, 'lt-string'), |
|
969 (r'\>', String.Other, '#pop'), |
|
970 (r'[^\<\>]]+', String.Other) |
|
971 ], |
|
972 'end-part': [ |
|
973 (r'.+', Comment.Preproc, '#pop') |
|
974 ] |
|
975 } |
|
976 |
|
977 def analyse_text(text): |
|
978 if shebang_matches(text, r'perl(\d\.\d\.\d)?'): |
|
979 return True |
|
980 if 'my $' in text: |
|
981 return 0.9 |
|
982 return 0.1 # who knows, might still be perl! |
|
983 |
|
984 |
|
985 class LuaLexer(RegexLexer): |
|
986 """ |
|
987 For `Lua <http://www.lua.org>`_ source code. |
|
988 |
|
989 Additional options accepted: |
|
990 |
|
991 `func_name_highlighting` |
|
992 If given and ``True``, highlight builtin function names |
|
993 (default: ``True``). |
|
994 `disabled_modules` |
|
995 If given, must be a list of module names whose function names |
|
996 should not be highlighted. By default all modules are highlighted. |
|
997 |
|
998 To get a list of allowed modules have a look into the |
|
999 `_luabuiltins` module: |
|
1000 |
|
1001 .. sourcecode:: pycon |
|
1002 |
|
1003 >>> from pygments.lexers._luabuiltins import MODULES |
|
1004 >>> MODULES.keys() |
|
1005 ['string', 'coroutine', 'modules', 'io', 'basic', ...] |
|
1006 """ |
|
1007 |
|
1008 name = 'Lua' |
|
1009 aliases = ['lua'] |
|
1010 filenames = ['*.lua'] |
|
1011 mimetypes = ['text/x-lua', 'application/x-lua'] |
|
1012 |
|
1013 tokens = { |
|
1014 'root': [ |
|
1015 (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline), |
|
1016 ('--.*$', Comment.Single), |
|
1017 |
|
1018 (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float), |
|
1019 (r'(?i)\d+e[+-]?\d+', Number.Float), |
|
1020 ('(?i)0x[0-9a-f]*', Number.Hex), |
|
1021 (r'\d+', Number.Integer), |
|
1022 |
|
1023 (r'\n', Text), |
|
1024 (r'[^\S\n]', Text), |
|
1025 (r'(?s)\[(=*)\[.*?\]\1\]', String.Multiline), |
|
1026 (r'[\[\]\{\}\(\)\.,:;]', Punctuation), |
|
1027 |
|
1028 (r'(==|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#])', Operator), |
|
1029 (r'(and|or|not)\b', Operator.Word), |
|
1030 |
|
1031 ('(break|do|else|elseif|end|for|if|in|repeat|return|then|until|' |
|
1032 r'while)\b', Keyword), |
|
1033 (r'(local)\b', Keyword.Declaration), |
|
1034 (r'(true|false|nil)\b', Keyword.Constant), |
|
1035 |
|
1036 (r'(function)(\s+)', bygroups(Keyword, Text), 'funcname'), |
|
1037 (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'), |
|
1038 |
|
1039 (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name), |
|
1040 |
|
1041 # multiline strings |
|
1042 (r'(?s)\[(=*)\[(.*?)\]\1\]', String), |
|
1043 ("'", String.Single, combined('stringescape', 'sqs')), |
|
1044 ('"', String.Double, combined('stringescape', 'dqs')) |
|
1045 ], |
|
1046 |
|
1047 'funcname': [ |
|
1048 ('[A-Za-z_][A-Za-z0-9_]*', Name.Function, '#pop'), |
|
1049 # inline function |
|
1050 ('\(', Punctuation, '#pop'), |
|
1051 ], |
|
1052 |
|
1053 'classname': [ |
|
1054 ('[A-Za-z_][A-Za-z0-9_]*', Name.Class, '#pop') |
|
1055 ], |
|
1056 |
|
1057 # if I understand correctly, every character is valid in a lua string, |
|
1058 # so this state is only for later corrections |
|
1059 'string': [ |
|
1060 ('.', String) |
|
1061 ], |
|
1062 |
|
1063 'stringescape': [ |
|
1064 (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape) |
|
1065 ], |
|
1066 |
|
1067 'sqs': [ |
|
1068 ("'", String, '#pop'), |
|
1069 include('string') |
|
1070 ], |
|
1071 |
|
1072 'dqs': [ |
|
1073 ('"', String, '#pop'), |
|
1074 include('string') |
|
1075 ] |
|
1076 } |
|
1077 |
|
1078 def __init__(self, **options): |
|
1079 self.func_name_highlighting = get_bool_opt( |
|
1080 options, 'func_name_highlighting', True) |
|
1081 self.disabled_modules = get_list_opt(options, 'disabled_modules', []) |
|
1082 |
|
1083 self._functions = set() |
|
1084 if self.func_name_highlighting: |
|
1085 from pygments.lexers._luabuiltins import MODULES |
|
1086 for mod, func in MODULES.iteritems(): |
|
1087 if mod not in self.disabled_modules: |
|
1088 self._functions.update(func) |
|
1089 RegexLexer.__init__(self, **options) |
|
1090 |
|
1091 def get_tokens_unprocessed(self, text): |
|
1092 for index, token, value in \ |
|
1093 RegexLexer.get_tokens_unprocessed(self, text): |
|
1094 if token is Name: |
|
1095 if value in self._functions: |
|
1096 yield index, Name.Builtin, value |
|
1097 continue |
|
1098 elif '.' in value: |
|
1099 a, b = value.split('.') |
|
1100 yield index, Name, a |
|
1101 yield index + len(a), Punctuation, u'.' |
|
1102 yield index + len(a) + 1, Name, b |
|
1103 continue |
|
1104 yield index, token, value |
|
1105 |
|
1106 |
|
1107 class MiniDLexer(RegexLexer): |
|
1108 """ |
|
1109 For `MiniD <http://www.dsource.org/projects/minid>`_ (a D-like scripting |
|
1110 language) source. |
|
1111 """ |
|
1112 name = 'MiniD' |
|
1113 filenames = ['*.md'] |
|
1114 aliases = ['minid'] |
|
1115 mimetypes = ['text/x-minidsrc'] |
|
1116 |
|
1117 tokens = { |
|
1118 'root': [ |
|
1119 (r'\n', Text), |
|
1120 (r'\s+', Text), |
|
1121 # Comments |
|
1122 (r'//(.*?)\n', Comment.Single), |
|
1123 (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), |
|
1124 (r'/\+', Comment.Multiline, 'nestedcomment'), |
|
1125 # Keywords |
|
1126 (r'(as|assert|break|case|catch|class|continue|coroutine|default' |
|
1127 r'|do|else|finally|for|foreach|function|global|namespace' |
|
1128 r'|if|import|in|is|local|module|return|super|switch' |
|
1129 r'|this|throw|try|vararg|while|with|yield)\b', Keyword), |
|
1130 (r'(false|true|null)\b', Keyword.Constant), |
|
1131 # FloatLiteral |
|
1132 (r'([0-9][0-9_]*)?\.[0-9_]+([eE][+\-]?[0-9_]+)?', Number.Float), |
|
1133 # IntegerLiteral |
|
1134 # -- Binary |
|
1135 (r'0[Bb][01_]+', Number), |
|
1136 # -- Octal |
|
1137 (r'0[Cc][0-7_]+', Number.Oct), |
|
1138 # -- Hexadecimal |
|
1139 (r'0[xX][0-9a-fA-F_]+', Number.Hex), |
|
1140 # -- Decimal |
|
1141 (r'(0|[1-9][0-9_]*)', Number.Integer), |
|
1142 # CharacterLiteral |
|
1143 (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-9]{1,3}""" |
|
1144 r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""", |
|
1145 String.Char |
|
1146 ), |
|
1147 # StringLiteral |
|
1148 # -- WysiwygString |
|
1149 (r'@"(""|.)*"', String), |
|
1150 # -- AlternateWysiwygString |
|
1151 (r'`(``|.)*`', String), |
|
1152 # -- DoubleQuotedString |
|
1153 (r'"(\\\\|\\"|[^"])*"', String), |
|
1154 # Tokens |
|
1155 ( |
|
1156 r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>' |
|
1157 r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)' |
|
1158 r'|[-/.&$@|\+<>!()\[\]{}?,;:=*%^~#\\]', Punctuation |
|
1159 ), |
|
1160 # Identifier |
|
1161 (r'[a-zA-Z_]\w*', Name), |
|
1162 ], |
|
1163 'nestedcomment': [ |
|
1164 (r'[^+/]+', Comment.Multiline), |
|
1165 (r'/\+', Comment.Multiline, '#push'), |
|
1166 (r'\+/', Comment.Multiline, '#pop'), |
|
1167 (r'[+/]', Comment.Multiline), |
|
1168 ], |
|
1169 } |
|
1170 |
|
1171 |
|
1172 class IoLexer(RegexLexer): |
|
1173 """ |
|
1174 For `Io <http://iolanguage.com/>`_ (a small, prototype-based |
|
1175 programming language) source. |
|
1176 |
|
1177 *New in Pygments 0.10.* |
|
1178 """ |
|
1179 name = 'Io' |
|
1180 filenames = ['*.io'] |
|
1181 aliases = ['io'] |
|
1182 mimetypes = ['text/x-iosrc'] |
|
1183 tokens = { |
|
1184 'root': [ |
|
1185 (r'\n', Text), |
|
1186 (r'\s+', Text), |
|
1187 # Comments |
|
1188 (r'//(.*?)\n', Comment.Single), |
|
1189 (r'#(.*?)\n', Comment.Single), |
|
1190 (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), |
|
1191 (r'/\+', Comment.Multiline, 'nestedcomment'), |
|
1192 # DoubleQuotedString |
|
1193 (r'"(\\\\|\\"|[^"])*"', String), |
|
1194 # Operators |
|
1195 (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}', |
|
1196 Operator), |
|
1197 # keywords |
|
1198 (r'(clone|do|doFile|doString|method|for|if|else|elseif|then)\b', |
|
1199 Keyword), |
|
1200 # constants |
|
1201 (r'(nil|false|true)\b', Name.Constant), |
|
1202 # names |
|
1203 ('(Object|list|List|Map|args|Sequence|Coroutine|File)\b', |
|
1204 Name.Builtin), |
|
1205 ('[a-zA-Z_][a-zA-Z0-9_]*', Name), |
|
1206 # numbers |
|
1207 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), |
|
1208 (r'\d+', Number.Integer) |
|
1209 ], |
|
1210 'nestedcomment': [ |
|
1211 (r'[^+/]+', Comment.Multiline), |
|
1212 (r'/\+', Comment.Multiline, '#push'), |
|
1213 (r'\+/', Comment.Multiline, '#pop'), |
|
1214 (r'[+/]', Comment.Multiline), |
|
1215 ] |
|
1216 } |
|
1217 |
|
1218 |
|
1219 class TclLexer(RegexLexer): |
|
1220 """ |
|
1221 For Tcl source code. |
|
1222 |
|
1223 *New in Pygments 0.10.* |
|
1224 """ |
|
1225 |
|
1226 keyword_cmds_re = ( |
|
1227 r'\b(after|apply|array|break|catch|continue|elseif|else|error|' |
|
1228 r'eval|expr|for|foreach|global|if|namespace|proc|rename|return|' |
|
1229 r'set|switch|then|trace|unset|update|uplevel|upvar|variable|' |
|
1230 r'vwait|while)\b' |
|
1231 ) |
|
1232 |
|
1233 builtin_cmds_re = ( |
|
1234 r'\b(append|bgerror|binary|cd|chan|clock|close|concat|dde|dict|' |
|
1235 r'encoding|eof|exec|exit|fblocked|fconfigure|fcopy|file|' |
|
1236 r'fileevent|flush|format|gets|glob|history|http|incr|info|interp|' |
|
1237 r'join|lappend|lassign|lindex|linsert|list|llength|load|loadTk|' |
|
1238 r'lrange|lrepeat|lreplace|lreverse|lsearch|lset|lsort|mathfunc|' |
|
1239 r'mathop|memory|msgcat|open|package|pid|pkg::create|pkg_mkIndex|' |
|
1240 r'platform|platform::shell|puts|pwd|re_syntax|read|refchan|' |
|
1241 r'regexp|registry|regsub|scan|seek|socket|source|split|string|' |
|
1242 r'subst|tell|time|tm|unknown|unload)\b' |
|
1243 ) |
|
1244 |
|
1245 name = 'Tcl' |
|
1246 aliases = ['tcl'] |
|
1247 filenames = ['*.tcl'] |
|
1248 mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl'] |
|
1249 |
|
1250 def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""): |
|
1251 return [ |
|
1252 (keyword_cmds_re, Keyword, 'params' + context), |
|
1253 (builtin_cmds_re, Name.Builtin, 'params' + context), |
|
1254 (r'([\w\.\-]+)', Name.Variable, 'params' + context), |
|
1255 (r'#', Comment, 'comment'), |
|
1256 ] |
|
1257 |
|
1258 tokens = { |
|
1259 'root': [ |
|
1260 include('command'), |
|
1261 include('basic'), |
|
1262 include('data'), |
|
1263 ], |
|
1264 'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re), |
|
1265 'command-in-brace': _gen_command_rules(keyword_cmds_re, |
|
1266 builtin_cmds_re, |
|
1267 "-in-brace"), |
|
1268 'command-in-bracket': _gen_command_rules(keyword_cmds_re, |
|
1269 builtin_cmds_re, |
|
1270 "-in-bracket"), |
|
1271 'command-in-paren': _gen_command_rules(keyword_cmds_re, |
|
1272 builtin_cmds_re, |
|
1273 "-in-paren"), |
|
1274 'basic': [ |
|
1275 (r'\(', Keyword, 'paren'), |
|
1276 (r'\[', Keyword, 'bracket'), |
|
1277 (r'\{', Keyword, 'brace'), |
|
1278 (r'"', String.Double, 'string'), |
|
1279 (r'(eq|ne|in|ni)\b', Operator.Word), |
|
1280 (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator), |
|
1281 ], |
|
1282 'data': [ |
|
1283 (r'\s+', Text), |
|
1284 (r'0x[a-fA-F0-9]+', Number.Hex), |
|
1285 (r'0[0-7]+', Number.Oct), |
|
1286 (r'\d+\.\d+', Number.Float), |
|
1287 (r'\d+', Number.Integer), |
|
1288 (r'\$([\w\.\-\:]+)', Name.Variable), |
|
1289 (r'([\w\.\-\:]+)', Text), |
|
1290 ], |
|
1291 'params': [ |
|
1292 (r';', Keyword, '#pop'), |
|
1293 (r'\n', Text, '#pop'), |
|
1294 (r'(else|elseif|then)', Keyword), |
|
1295 include('basic'), |
|
1296 include('data'), |
|
1297 ], |
|
1298 'params-in-brace': [ |
|
1299 (r'}', Keyword, ('#pop', '#pop')), |
|
1300 include('params') |
|
1301 ], |
|
1302 'params-in-paren': [ |
|
1303 (r'\)', Keyword, ('#pop', '#pop')), |
|
1304 include('params') |
|
1305 ], |
|
1306 'params-in-bracket': [ |
|
1307 (r'\]', Keyword, ('#pop', '#pop')), |
|
1308 include('params') |
|
1309 ], |
|
1310 'string': [ |
|
1311 (r'\[', String.Double, 'string-square'), |
|
1312 (r'(\\\\|\\[0-7]+|\\.|[^"])', String.Double), |
|
1313 (r'"', String.Double, '#pop') |
|
1314 ], |
|
1315 'string-square': [ |
|
1316 (r'\[', String.Double, 'string-square'), |
|
1317 (r'(\\\\|\\[0-7]+|\\.|[^\]])', String.Double), |
|
1318 (r'\]', String.Double, '#pop') |
|
1319 ], |
|
1320 'brace': [ |
|
1321 (r'}', Keyword, '#pop'), |
|
1322 include('command-in-brace'), |
|
1323 include('basic'), |
|
1324 include('data'), |
|
1325 ], |
|
1326 'paren': [ |
|
1327 (r'\)', Keyword, '#pop'), |
|
1328 include('command-in-paren'), |
|
1329 include('basic'), |
|
1330 include('data'), |
|
1331 ], |
|
1332 'bracket': [ |
|
1333 (r'\]', Keyword, '#pop'), |
|
1334 include('command-in-bracket'), |
|
1335 include('basic'), |
|
1336 include('data'), |
|
1337 ], |
|
1338 'comment': [ |
|
1339 (r'.*[^\\]\n', Comment, '#pop'), |
|
1340 (r'.*\\\n', Comment), |
|
1341 ], |
|
1342 } |
|
1343 |
|
1344 def analyse_text(text): |
|
1345 return shebang_matches(text, r'(tcl)') |
|
1346 |
|
1347 |
|
1348 class ClojureLexer(RegexLexer): |
|
1349 """ |
|
1350 Lexer for `Clojure <http://clojure.org/>`_ source code. |
|
1351 |
|
1352 *New in Pygments 0.11.* |
|
1353 """ |
|
1354 name = 'Clojure' |
|
1355 aliases = ['clojure', 'clj'] |
|
1356 filenames = ['*.clj'] |
|
1357 mimetypes = ['text/x-clojure', 'application/x-clojure'] |
|
1358 |
|
1359 keywords = [ |
|
1360 'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-', |
|
1361 'defstruct', |
|
1362 'if', 'cond', |
|
1363 'let', 'for' |
|
1364 ] |
|
1365 builtins = [ |
|
1366 '.', '..', |
|
1367 '*', '+', '-', '->', '..', '/', '<', '<=', '=', '==', '>', '>=', |
|
1368 'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns', |
|
1369 'alter', 'and', 'append-child', 'apply', 'array-map', 'aset', |
|
1370 'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float', |
|
1371 'aset-int', 'aset-long', 'aset-short', 'assert', 'assoc', 'await', |
|
1372 'await-for', 'bean', 'binding', 'bit-and', 'bit-not', 'bit-or', |
|
1373 'bit-shift-left', 'bit-shift-right', 'bit-xor', 'boolean', 'branch?', |
|
1374 'butlast', 'byte', 'cast', 'char', 'children', 'class', |
|
1375 'clear-agent-errors', 'comment', 'commute', 'comp', 'comparator', |
|
1376 'complement', 'concat', 'conj', 'cons', 'constantly', |
|
1377 'construct-proxy', 'contains?', 'count', 'create-ns', 'create-struct', |
|
1378 'cycle', 'dec', 'deref', 'difference', 'disj', 'dissoc', 'distinct', |
|
1379 'doall', 'doc', 'dorun', 'doseq', 'dosync', 'dotimes', 'doto', |
|
1380 'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure', |
|
1381 'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find', |
|
1382 'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush', |
|
1383 'fnseq', 'frest', 'gensym', 'get', 'get-proxy-class', |
|
1384 'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import', |
|
1385 'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right', |
|
1386 'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave', |
|
1387 'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys', |
|
1388 'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left', |
|
1389 'lefts', 'line-seq', 'list', 'list*', 'load', 'load-file', |
|
1390 'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1', |
|
1391 'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat', |
|
1392 'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min', |
|
1393 'min-key', 'name', 'namespace', 'neg?', 'new', 'newline', 'next', |
|
1394 'nil?', 'node', 'not', 'not-any?', 'not-every?', 'not=', 'ns-imports', |
|
1395 'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers', |
|
1396 'ns-resolve', 'ns-unmap', 'nth', 'nthrest', 'or', 'parse', 'partial', |
|
1397 'path', 'peek', 'pop', 'pos?', 'pr', 'pr-str', 'print', 'print-str', |
|
1398 'println', 'println-str', 'prn', 'prn-str', 'project', 'proxy', |
|
1399 'proxy-mappings', 'quot', 'rand', 'rand-int', 'range', 're-find', |
|
1400 're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq', |
|
1401 'read', 'read-line', 'reduce', 'ref', 'ref-set', 'refer', 'rem', |
|
1402 'remove', 'remove-method', 'remove-ns', 'rename', 'rename-keys', |
|
1403 'repeat', 'replace', 'replicate', 'resolve', 'rest', 'resultset-seq', |
|
1404 'reverse', 'rfirst', 'right', 'rights', 'root', 'rrest', 'rseq', |
|
1405 'second', 'select', 'select-keys', 'send', 'send-off', 'seq', |
|
1406 'seq-zip', 'seq?', 'set', 'short', 'slurp', 'some', 'sort', |
|
1407 'sort-by', 'sorted-map', 'sorted-map-by', 'sorted-set', |
|
1408 'special-symbol?', 'split-at', 'split-with', 'str', 'string?', |
|
1409 'struct', 'struct-map', 'subs', 'subvec', 'symbol', 'symbol?', |
|
1410 'sync', 'take', 'take-nth', 'take-while', 'test', 'time', 'to-array', |
|
1411 'to-array-2d', 'tree-seq', 'true?', 'union', 'up', 'update-proxy', |
|
1412 'val', 'vals', 'var-get', 'var-set', 'var?', 'vector', 'vector-zip', |
|
1413 'vector?', 'when', 'when-first', 'when-let', 'when-not', |
|
1414 'with-local-vars', 'with-meta', 'with-open', 'with-out-str', |
|
1415 'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper'] |
|
1416 |
|
1417 # valid names for identifiers |
|
1418 # well, names can only not consist fully of numbers |
|
1419 # but this should be good enough for now |
|
1420 valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+' |
|
1421 |
|
1422 tokens = { |
|
1423 'root' : [ |
|
1424 # the comments - always starting with semicolon |
|
1425 # and going to the end of the line |
|
1426 (r';.*$', Comment.Single), |
|
1427 |
|
1428 # whitespaces - usually not relevant |
|
1429 (r'\s+', Text), |
|
1430 |
|
1431 # numbers |
|
1432 (r'-?\d+\.\d+', Number.Float), |
|
1433 (r'-?\d+', Number.Integer), |
|
1434 # support for uncommon kinds of numbers - |
|
1435 # have to figure out what the characters mean |
|
1436 #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number), |
|
1437 |
|
1438 # strings, symbols and characters |
|
1439 (r'"(\\\\|\\"|[^"])*"', String), |
|
1440 (r"'" + valid_name, String.Symbol), |
|
1441 (r"\\([()/'\".'_!§$%& ?;=+-]{1}|[a-zA-Z0-9]+)", String.Char), |
|
1442 |
|
1443 # constants |
|
1444 (r'(#t|#f)', Name.Constant), |
|
1445 |
|
1446 # special operators |
|
1447 (r"('|#|`|,@|,|\.)", Operator), |
|
1448 |
|
1449 # highlight the keywords |
|
1450 ('(%s)' % '|'.join([ |
|
1451 re.escape(entry) + ' ' for entry in keywords]), |
|
1452 Keyword |
|
1453 ), |
|
1454 |
|
1455 # first variable in a quoted string like |
|
1456 # '(this is syntactic sugar) |
|
1457 (r"(?<='\()" + valid_name, Name.Variable), |
|
1458 (r"(?<=#\()" + valid_name, Name.Variable), |
|
1459 |
|
1460 # highlight the builtins |
|
1461 ("(?<=\()(%s)" % '|'.join([ |
|
1462 re.escape(entry) + ' ' for entry in builtins]), |
|
1463 Name.Builtin |
|
1464 ), |
|
1465 |
|
1466 # the remaining functions |
|
1467 (r'(?<=\()' + valid_name, Name.Function), |
|
1468 # find the remaining variables |
|
1469 (valid_name, Name.Variable), |
|
1470 |
|
1471 # Clojure accepts vector notation |
|
1472 (r'(\[|\])', Punctuation), |
|
1473 |
|
1474 # Clojure accepts map notation |
|
1475 (r'(\{|\})', Punctuation), |
|
1476 |
|
1477 # the famous parentheses! |
|
1478 (r'(\(|\))', Punctuation), |
|
1479 ], |
|
1480 } |