ThirdParty/Pygments/pygments/lexers/agile.py

changeset 0
de9c2efb9d02
child 12
1d8dd9706f46
equal deleted inserted replaced
-1:000000000000 0:de9c2efb9d02
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.agile
4 ~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for agile languages.
7
8 :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13 try:
14 set
15 except NameError:
16 from sets import Set as set
17
18 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
19 LexerContext, include, combined, do_insertions, bygroups, using
20 from pygments.token import Error, Text, \
21 Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
22 from pygments.util import get_bool_opt, get_list_opt, shebang_matches
23 from pygments import unistring as uni
24
25
26 __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
27 'RubyLexer', 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer',
28 'MiniDLexer', 'IoLexer', 'TclLexer', 'ClojureLexer',
29 'Python3Lexer', 'Python3TracebackLexer']
30
31 # b/w compatibility
32 from pygments.lexers.functional import SchemeLexer
33
34 line_re = re.compile('.*?\n')
35
36
37 class PythonLexer(RegexLexer):
38 """
39 For `Python <http://www.python.org>`_ source code.
40 """
41
42 name = 'Python'
43 aliases = ['python', 'py']
44 filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript']
45 mimetypes = ['text/x-python', 'application/x-python']
46
47 tokens = {
48 'root': [
49 (r'\n', Text),
50 (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
51 (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
52 (r'[^\S\n]+', Text),
53 (r'#.*$', Comment),
54 (r'[]{}:(),;[]', Punctuation),
55 (r'\\\n', Text),
56 (r'\\', Text),
57 (r'(in|is|and|or|not)\b', Operator.Word),
58 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
59 include('keywords'),
60 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
61 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
62 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'),
63 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'),
64 include('builtins'),
65 include('backtick'),
66 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
67 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
68 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
69 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
70 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
71 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
72 ('[uU]?"', String, combined('stringescape', 'dqs')),
73 ("[uU]?'", String, combined('stringescape', 'sqs')),
74 include('name'),
75 include('numbers'),
76 ],
77 'keywords': [
78 (r'(assert|break|continue|del|elif|else|except|exec|'
79 r'finally|for|global|if|lambda|pass|print|raise|'
80 r'return|try|while|yield|as|with)\b', Keyword),
81 ],
82 'builtins': [
83 (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|'
84 r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|'
85 r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|'
86 r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|'
87 r'input|int|intern|isinstance|issubclass|iter|len|list|locals|'
88 r'long|map|max|min|next|object|oct|open|ord|pow|property|range|'
89 r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|'
90 r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|'
91 r'vars|xrange|zip)\b', Name.Builtin),
92 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True'
93 r')\b', Name.Builtin.Pseudo),
94 (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
95 r'BaseException|DeprecationWarning|EOFError|EnvironmentError|'
96 r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|'
97 r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|'
98 r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
99 r'NotImplemented|NotImplementedError|OSError|OverflowError|'
100 r'OverflowWarning|PendingDeprecationWarning|ReferenceError|'
101 r'RuntimeError|RuntimeWarning|StandardError|StopIteration|'
102 r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
103 r'TypeError|UnboundLocalError|UnicodeDecodeError|'
104 r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
105 r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
106 r'WindowsError|ZeroDivisionError)\b', Name.Exception),
107 ],
108 'numbers': [
109 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
110 (r'\d+[eE][+-]?[0-9]+', Number.Float),
111 (r'0\d+', Number.Oct),
112 (r'0[xX][a-fA-F0-9]+', Number.Hex),
113 (r'\d+L', Number.Integer.Long),
114 (r'\d+', Number.Integer)
115 ],
116 'backtick': [
117 ('`.*?`', String.Backtick),
118 ],
119 'name': [
120 (r'@[a-zA-Z0-9_.]+', Name.Decorator),
121 ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
122 ],
123 'funcname': [
124 ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
125 ],
126 'classname': [
127 ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
128 ],
129 'import': [
130 (r'((?:\s|\\\s)+)(as)((?:\s|\\\s)+)',
131 bygroups(Text, Keyword.Namespace, Text)),
132 (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
133 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
134 (r'', Text, '#pop') # all else: go back
135 ],
136 'fromimport': [
137 (r'((?:\s|\\\s)+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'),
138 (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
139 ],
140 'stringescape': [
141 (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
142 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
143 ],
144 'strings': [
145 (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
146 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
147 (r'[^\\\'"%\n]+', String),
148 # quotes, percents and backslashes must be parsed one at a time
149 (r'[\'"\\]', String),
150 # unhandled string formatting sign
151 (r'%', String)
152 # newlines are an error (use "nl" state)
153 ],
154 'nl': [
155 (r'\n', String)
156 ],
157 'dqs': [
158 (r'"', String, '#pop'),
159 (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
160 include('strings')
161 ],
162 'sqs': [
163 (r"'", String, '#pop'),
164 (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
165 include('strings')
166 ],
167 'tdqs': [
168 (r'"""', String, '#pop'),
169 include('strings'),
170 include('nl')
171 ],
172 'tsqs': [
173 (r"'''", String, '#pop'),
174 include('strings'),
175 include('nl')
176 ],
177 }
178
179 def analyse_text(text):
180 return shebang_matches(text, r'pythonw?(2\.\d)?')
181
182
183 class Python3Lexer(RegexLexer):
184 """
185 For `Python <http://www.python.org>`_ source code (version 3.0).
186
187 *New in Pygments 0.10.*
188 """
189
190 name = 'Python 3'
191 aliases = ['python3', 'py3']
192 filenames = [] # Nothing until Python 3 gets widespread
193 mimetypes = ['text/x-python3', 'application/x-python3']
194
195 flags = re.MULTILINE | re.UNICODE
196
197 uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
198
199 tokens = PythonLexer.tokens.copy()
200 tokens['keywords'] = [
201 (r'(assert|break|continue|del|elif|else|except|'
202 r'finally|for|global|if|lambda|pass|raise|'
203 r'return|try|while|yield|as|with|True|False|None)\b', Keyword),
204 ]
205 tokens['builtins'] = [
206 (r'(?<!\.)(__import__|abs|all|any|bin|bool|bytearray|bytes|'
207 r'chr|classmethod|cmp|compile|complex|delattr|dict|dir|'
208 r'divmod|enumerate|eval|filter|float|format|frozenset|getattr|'
209 r'globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|'
210 r'iter|len|list|locals|map|max|memoryview|min|next|object|oct|'
211 r'open|ord|pow|print|property|range|repr|reversed|round|'
212 r'set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|'
213 r'vars|zip)\b', Name.Builtin),
214 (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
215 (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
216 r'BaseException|BufferError|BytesWarning|DeprecationWarning|'
217 r'EOFError|EnvironmentError|Exception|FloatingPointError|'
218 r'FutureWarning|GeneratorExit|IOError|ImportError|'
219 r'ImportWarning|IndentationError|IndexError|KeyError|'
220 r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
221 r'NotImplementedError|OSError|OverflowError|'
222 r'PendingDeprecationWarning|ReferenceError|'
223 r'RuntimeError|RuntimeWarning|StopIteration|'
224 r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
225 r'TypeError|UnboundLocalError|UnicodeDecodeError|'
226 r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
227 r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
228 r'WindowsError|ZeroDivisionError)\b', Name.Exception),
229 ]
230 tokens['numbers'] = [
231 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
232 (r'0[oO][0-7]+', Number.Oct),
233 (r'0[bB][01]+', Number.Bin),
234 (r'0[xX][a-fA-F0-9]+', Number.Hex),
235 (r'\d+', Number.Integer)
236 ]
237 tokens['backtick'] = []
238 tokens['name'] = [
239 (r'@[a-zA-Z0-9_]+', Name.Decorator),
240 (uni_name, Name),
241 ]
242 tokens['funcname'] = [
243 (uni_name, Name.Function, '#pop')
244 ]
245 tokens['classname'] = [
246 (uni_name, Name.Class, '#pop')
247 ]
248 tokens['import'] = [
249 (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
250 (r'\.', Name.Namespace),
251 (uni_name, Name.Namespace),
252 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
253 (r'', Text, '#pop') # all else: go back
254 ]
255 tokens['fromimport'] = [
256 (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
257 (r'\.', Name.Namespace),
258 (uni_name, Name.Namespace),
259 ]
260 # don't highlight "%s" substitutions
261 tokens['strings'] = [
262 (r'[^\\\'"%\n]+', String),
263 # quotes, percents and backslashes must be parsed one at a time
264 (r'[\'"\\]', String),
265 # unhandled string formatting sign
266 (r'%', String)
267 # newlines are an error (use "nl" state)
268 ]
269
270 def analyse_text(text):
271 return shebang_matches(text, r'pythonw?3(\.\d)?')
272
273
274 class PythonConsoleLexer(Lexer):
275 """
276 For Python console output or doctests, such as:
277
278 .. sourcecode:: pycon
279
280 >>> a = 'foo'
281 >>> print a
282 foo
283 >>> 1 / 0
284 Traceback (most recent call last):
285 File "<stdin>", line 1, in <module>
286 ZeroDivisionError: integer division or modulo by zero
287
288 Additional options:
289
290 `python3`
291 Use Python 3 lexer for code. Default is ``False``.
292 *New in Pygments 1.0.*
293 """
294 name = 'Python console session'
295 aliases = ['pycon']
296 mimetypes = ['text/x-python-doctest']
297
298 def __init__(self, **options):
299 self.python3 = get_bool_opt(options, 'python3', False)
300 Lexer.__init__(self, **options)
301
302 def get_tokens_unprocessed(self, text):
303 if self.python3:
304 pylexer = Python3Lexer(**self.options)
305 tblexer = Python3TracebackLexer(**self.options)
306 else:
307 pylexer = PythonLexer(**self.options)
308 tblexer = PythonTracebackLexer(**self.options)
309
310 curcode = ''
311 insertions = []
312 curtb = ''
313 tbindex = 0
314 tb = 0
315 for match in line_re.finditer(text):
316 line = match.group()
317 if line.startswith('>>> ') or line.startswith('... '):
318 tb = 0
319 insertions.append((len(curcode),
320 [(0, Generic.Prompt, line[:4])]))
321 curcode += line[4:]
322 elif line.rstrip() == '...':
323 tb = 0
324 insertions.append((len(curcode),
325 [(0, Generic.Prompt, '...')]))
326 curcode += line[3:]
327 else:
328 if curcode:
329 for item in do_insertions(insertions,
330 pylexer.get_tokens_unprocessed(curcode)):
331 yield item
332 curcode = ''
333 insertions = []
334 if (line.startswith('Traceback (most recent call last):') or
335 re.match(r' File "[^"]+", line \d+\n$', line)):
336 tb = 1
337 curtb = line
338 tbindex = match.start()
339 elif line == 'KeyboardInterrupt\n':
340 yield match.start(), Name.Class, line
341 elif tb:
342 curtb += line
343 if not (line.startswith(' ') or line.strip() == '...'):
344 tb = 0
345 for i, t, v in tblexer.get_tokens_unprocessed(curtb):
346 yield tbindex+i, t, v
347 else:
348 yield match.start(), Generic.Output, line
349 if curcode:
350 for item in do_insertions(insertions,
351 pylexer.get_tokens_unprocessed(curcode)):
352 yield item
353
354
355 class PythonTracebackLexer(RegexLexer):
356 """
357 For Python tracebacks.
358
359 *New in Pygments 0.7.*
360 """
361
362 name = 'Python Traceback'
363 aliases = ['pytb']
364 filenames = ['*.pytb']
365 mimetypes = ['text/x-python-traceback']
366
367 tokens = {
368 'root': [
369 (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
370 # SyntaxError starts with this.
371 (r'^(?= File "[^"]+", line \d+\n)', Generic.Traceback, 'intb'),
372 ],
373 'intb': [
374 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
375 bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)),
376 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
377 bygroups(Text, Name.Builtin, Text, Number, Text)),
378 (r'^( )(.+)(\n)',
379 bygroups(Text, using(PythonLexer), Text)),
380 (r'^([ \t]*)(...)(\n)',
381 bygroups(Text, Comment, Text)), # for doctests...
382 (r'^(.+)(: )(.+)(\n)',
383 bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'),
384 (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
385 bygroups(Name.Class, Text), '#pop')
386 ],
387 }
388
389
390 class Python3TracebackLexer(RegexLexer):
391 """
392 For Python 3.0 tracebacks, with support for chained exceptions.
393
394 *New in Pygments 1.0.*
395 """
396
397 name = 'Python 3.0 Traceback'
398 aliases = ['py3tb']
399 filenames = ['*.py3tb']
400 mimetypes = ['text/x-python3-traceback']
401
402 tokens = {
403 'root': [
404 (r'\n', Text),
405 (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
406 (r'^During handling of the above exception, another '
407 r'exception occurred:\n\n', Generic.Traceback),
408 (r'^The above exception was the direct cause of the '
409 r'following exception:\n\n', Generic.Traceback),
410 ],
411 'intb': [
412 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
413 bygroups(Text, Name.Builtin, Text, Number, Text, Name.Identifier, Text)),
414 (r'^( )(.+)(\n)',
415 bygroups(Text, using(Python3Lexer), Text)),
416 (r'^([ \t]*)(...)(\n)',
417 bygroups(Text, Comment, Text)), # for doctests...
418 (r'^(.+)(: )(.+)(\n)',
419 bygroups(Name.Class, Text, Name.Identifier, Text), '#pop'),
420 (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
421 bygroups(Name.Class, Text), '#pop')
422 ],
423 }
424
425
426 class RubyLexer(ExtendedRegexLexer):
427 """
428 For `Ruby <http://www.ruby-lang.org>`_ source code.
429 """
430
431 name = 'Ruby'
432 aliases = ['rb', 'ruby']
433 filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx']
434 mimetypes = ['text/x-ruby', 'application/x-ruby']
435
436 flags = re.DOTALL | re.MULTILINE
437
438 def heredoc_callback(self, match, ctx):
439 # okay, this is the hardest part of parsing Ruby...
440 # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
441
442 start = match.start(1)
443 yield start, Operator, match.group(1) # <<-?
444 yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
445 yield match.start(3), Name.Constant, match.group(3) # heredoc name
446 yield match.start(4), String.Heredoc, match.group(4) # quote again
447
448 heredocstack = ctx.__dict__.setdefault('heredocstack', [])
449 outermost = not bool(heredocstack)
450 heredocstack.append((match.group(1) == '<<-', match.group(3)))
451
452 ctx.pos = match.start(5)
453 ctx.end = match.end(5)
454 # this may find other heredocs
455 for i, t, v in self.get_tokens_unprocessed(context=ctx):
456 yield i, t, v
457 ctx.pos = match.end()
458
459 if outermost:
460 # this is the outer heredoc again, now we can process them all
461 for tolerant, hdname in heredocstack:
462 lines = []
463 for match in line_re.finditer(ctx.text, ctx.pos):
464 if tolerant:
465 check = match.group().strip()
466 else:
467 check = match.group().rstrip()
468 if check == hdname:
469 for amatch in lines:
470 yield amatch.start(), String.Heredoc, amatch.group()
471 yield match.start(), Name.Constant, match.group()
472 ctx.pos = match.end()
473 break
474 else:
475 lines.append(match)
476 else:
477 # end of heredoc not found -- error!
478 for amatch in lines:
479 yield amatch.start(), Error, amatch.group()
480 ctx.end = len(ctx.text)
481 del heredocstack[:]
482
483
484 def gen_rubystrings_rules():
485 def intp_regex_callback(self, match, ctx):
486 yield match.start(1), String.Regex, match.group(1) # begin
487 nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
488 for i, t, v in self.get_tokens_unprocessed(context=nctx):
489 yield match.start(3)+i, t, v
490 yield match.start(4), String.Regex, match.group(4) # end[mixounse]*
491 ctx.pos = match.end()
492
493 def intp_string_callback(self, match, ctx):
494 yield match.start(1), String.Other, match.group(1)
495 nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
496 for i, t, v in self.get_tokens_unprocessed(context=nctx):
497 yield match.start(3)+i, t, v
498 yield match.start(4), String.Other, match.group(4) # end
499 ctx.pos = match.end()
500
501 states = {}
502 states['strings'] = [
503 # easy ones
504 (r'\:([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
505 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
506 (r":'(\\\\|\\'|[^'])*'", String.Symbol),
507 (r"'(\\\\|\\'|[^'])*'", String.Single),
508 (r':"', String.Symbol, 'simple-sym'),
509 (r'"', String.Double, 'simple-string'),
510 (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
511 ]
512
513 # double-quoted string and symbol
514 for name, ttype, end in ('string', String.Double, '"'), \
515 ('sym', String.Symbol, '"'), \
516 ('backtick', String.Backtick, '`'):
517 states['simple-'+name] = [
518 include('string-intp-escaped'),
519 (r'[^\\%s#]+' % end, ttype),
520 (r'[\\#]', ttype),
521 (end, ttype, '#pop'),
522 ]
523
524 # braced quoted strings
525 for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
526 ('\\[', '\\]', 'sb'), \
527 ('\\(', '\\)', 'pa'), \
528 ('<', '>', 'ab'):
529 states[name+'-intp-string'] = [
530 (r'\\[\\' + lbrace + rbrace + ']', String.Other),
531 (r'(?<!\\)' + lbrace, String.Other, '#push'),
532 (r'(?<!\\)' + rbrace, String.Other, '#pop'),
533 include('string-intp-escaped'),
534 (r'[\\#' + lbrace + rbrace + ']', String.Other),
535 (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
536 ]
537 states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
538 name+'-intp-string'))
539 states[name+'-string'] = [
540 (r'\\[\\' + lbrace + rbrace + ']', String.Other),
541 (r'(?<!\\)' + lbrace, String.Other, '#push'),
542 (r'(?<!\\)' + rbrace, String.Other, '#pop'),
543 (r'[\\#' + lbrace + rbrace + ']', String.Other),
544 (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
545 ]
546 states['strings'].append((r'%[qsw]' + lbrace, String.Other,
547 name+'-string'))
548 states[name+'-regex'] = [
549 (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
550 (r'(?<!\\)' + lbrace, String.Regex, '#push'),
551 (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
552 include('string-intp'),
553 (r'[\\#' + lbrace + rbrace + ']', String.Regex),
554 (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
555 ]
556 states['strings'].append((r'%r' + lbrace, String.Regex,
557 name+'-regex'))
558
559 # these must come after %<brace>!
560 states['strings'] += [
561 # %r regex
562 (r'(%r([^a-zA-Z0-9]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2[mixounse]*)',
563 intp_regex_callback),
564 # regular fancy strings with qsw
565 (r'%[qsw]([^a-zA-Z0-9])([^\1\\]*(?:\\.[^\1\\]*)*)\1', String.Other),
566 (r'(%[QWx]([^a-zA-Z0-9]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2)',
567 intp_string_callback),
568 # special forms of fancy strings after operators or
569 # in method calls with braces
570 (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:[^\3\\]*(?:\\.[^\3\\]*)*)\3)',
571 bygroups(Text, String.Other, None)),
572 # and because of fixed width lookbehinds the whole thing a
573 # second time for line startings...
574 (r'^(\s*)(%([\t ])(?:[^\3\\]*(?:\\.[^\3\\]*)*)\3)',
575 bygroups(Text, String.Other, None)),
576 # all regular fancy strings without qsw
577 (r'(%([^a-zA-Z0-9\s]))([^\2\\]*(?:\\.[^\2\\]*)*)(\2)',
578 intp_string_callback),
579 ]
580
581 return states
582
583 tokens = {
584 'root': [
585 (r'#.*?$', Comment.Single),
586 (r'=begin\s.*?\n=end', Comment.Multiline),
587 # keywords
588 (r'(BEGIN|END|alias|begin|break|case|defined\?|'
589 r'do|else|elsif|end|ensure|for|if|in|next|redo|'
590 r'rescue|raise|retry|return|super|then|undef|unless|until|when|'
591 r'while|yield)\b', Keyword),
592 # start of function, class and module names
593 (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)',
594 bygroups(Keyword, Text, Name.Namespace)),
595 (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
596 (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
597 (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
598 # special methods
599 (r'(initialize|new|loop|include|extend|raise|attr_reader|'
600 r'attr_writer|attr_accessor|attr|catch|throw|private|'
601 r'module_function|public|protected|true|false|nil)\b', Keyword.Pseudo),
602 (r'(not|and|or)\b', Operator.Word),
603 (r'(autoload|block_given|const_defined|eql|equal|frozen|include|'
604 r'instance_of|is_a|iterator|kind_of|method_defined|nil|'
605 r'private_method_defined|protected_method_defined|'
606 r'public_method_defined|respond_to|tainted)\?', Name.Builtin),
607 (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
608 (r'(?<!\.)(Array|Float|Integer|String|__id__|__send__|abort|ancestors|'
609 r'at_exit|autoload|binding|callcc|caller|'
610 r'catch|chomp|chop|class_eval|class_variables|'
611 r'clone|const_defined\?|const_get|const_missing|const_set|constants|'
612 r'display|dup|eval|exec|exit|extend|fail|fork|'
613 r'format|freeze|getc|gets|global_variables|gsub|'
614 r'hash|id|included_modules|inspect|instance_eval|'
615 r'instance_method|instance_methods|'
616 r'instance_variable_get|instance_variable_set|instance_variables|'
617 r'lambda|load|local_variables|loop|'
618 r'method|method_missing|methods|module_eval|name|'
619 r'object_id|open|p|print|printf|private_class_method|'
620 r'private_instance_methods|'
621 r'private_methods|proc|protected_instance_methods|'
622 r'protected_methods|public_class_method|'
623 r'public_instance_methods|public_methods|'
624 r'putc|puts|raise|rand|readline|readlines|require|'
625 r'scan|select|self|send|set_trace_func|singleton_methods|sleep|'
626 r'split|sprintf|srand|sub|syscall|system|taint|'
627 r'test|throw|to_a|to_s|trace_var|trap|type|untaint|untrace_var|'
628 r'warn)\b', Name.Builtin),
629 (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
630 # normal heredocs
631 (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', heredoc_callback),
632 # empty string heredocs
633 (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
634 (r'__END__', Comment.Preproc, 'end-part'),
635 # multiline regex (after keywords or assignments)
636 (r'(?:^|(?<=[=<>~!])|'
637 r'(?<=(?:\s|;)when\s)|'
638 r'(?<=(?:\s|;)or\s)|'
639 r'(?<=(?:\s|;)and\s)|'
640 r'(?<=(?:\s|;|\.)index\s)|'
641 r'(?<=(?:\s|;|\.)scan\s)|'
642 r'(?<=(?:\s|;|\.)sub\s)|'
643 r'(?<=(?:\s|;|\.)sub!\s)|'
644 r'(?<=(?:\s|;|\.)gsub\s)|'
645 r'(?<=(?:\s|;|\.)gsub!\s)|'
646 r'(?<=(?:\s|;|\.)match\s)|'
647 r'(?<=(?:\s|;)if\s)|'
648 r'(?<=(?:\s|;)elsif\s)|'
649 r'(?<=^when\s)|'
650 r'(?<=^index\s)|'
651 r'(?<=^scan\s)|'
652 r'(?<=^sub\s)|'
653 r'(?<=^gsub\s)|'
654 r'(?<=^sub!\s)|'
655 r'(?<=^gsub!\s)|'
656 r'(?<=^match\s)|'
657 r'(?<=^if\s)|'
658 r'(?<=^elsif\s)'
659 r')(\s*)(/)(?!=)', bygroups(Text, String.Regex), 'multiline-regex'),
660 # multiline regex (in method calls)
661 (r'(?<=\(|,)/', String.Regex, 'multiline-regex'),
662 # multiline regex (this time the funny no whitespace rule)
663 (r'(\s+)(/[^\s=])', String.Regex, 'multiline-regex'),
664 # lex numbers and ignore following regular expressions which
665 # are division operators in fact (grrrr. i hate that. any
666 # better ideas?)
667 # since pygments 0.7 we also eat a "?" operator after numbers
668 # so that the char operator does not work. Chars are not allowed
669 # there so that you can use the ternary operator.
670 # stupid example:
671 # x>=0?n[x]:""
672 (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
673 bygroups(Number.Oct, Text, Operator)),
674 (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
675 bygroups(Number.Hex, Text, Operator)),
676 (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
677 bygroups(Number.Bin, Text, Operator)),
678 (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
679 bygroups(Number.Integer, Text, Operator)),
680 # Names
681 (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
682 (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
683 (r'\$[a-zA-Z0-9_]+', Name.Variable.Global),
684 (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global),
685 (r'\$-[0adFiIlpvw]', Name.Variable.Global),
686 (r'::', Operator),
687 include('strings'),
688 # chars
689 (r'\?(\\[MC]-)*' # modifiers
690 r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
691 r'(?!\w)',
692 String.Char),
693 (r'[A-Z][a-zA-Z0-9_]+', Name.Constant),
694 # this is needed because ruby attributes can look
695 # like keywords (class) or like this: ` ?!?
696 (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])',
697 bygroups(Operator, Name)),
698 (r'[a-zA-Z_][\w_]*[\!\?]?', Name),
699 (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
700 r'!~|&&?|\|\||\.{1,3})', Operator),
701 (r'[-+/*%=<>&!^|~]=?', Operator),
702 (r'[(){};,/?:\\]', Punctuation),
703 (r'\s+', Text)
704 ],
705 'funcname': [
706 (r'\(', Punctuation, 'defexpr'),
707 (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?'
708 r'([a-zA-Z_][\w_]*[\!\?]?|\*\*?|[-+]@?|'
709 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
710 bygroups(Name.Class, Operator, Name.Function), '#pop'),
711 (r'', Text, '#pop')
712 ],
713 'classname': [
714 (r'\(', Punctuation, 'defexpr'),
715 (r'<<', Operator, '#pop'),
716 (r'[A-Z_][\w_]*', Name.Class, '#pop'),
717 (r'', Text, '#pop')
718 ],
719 'defexpr': [
720 (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
721 (r'\(', Operator, '#push'),
722 include('root')
723 ],
724 'in-intp': [
725 ('}', String.Interpol, '#pop'),
726 include('root'),
727 ],
728 'string-intp': [
729 (r'#{', String.Interpol, 'in-intp'),
730 (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol),
731 (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol)
732 ],
733 'string-intp-escaped': [
734 include('string-intp'),
735 (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)
736 ],
737 'interpolated-regex': [
738 include('string-intp'),
739 (r'[\\#]', String.Regex),
740 (r'[^\\#]+', String.Regex),
741 ],
742 'interpolated-string': [
743 include('string-intp'),
744 (r'[\\#]', String.Other),
745 (r'[^\\#]+', String.Other),
746 ],
747 'multiline-regex': [
748 include('string-intp'),
749 (r'\\\\', String.Regex),
750 (r'\\/', String.Regex),
751 (r'[\\#]', String.Regex),
752 (r'[^\\/#]+', String.Regex),
753 (r'/[mixounse]*', String.Regex, '#pop'),
754 ],
755 'end-part': [
756 (r'.+', Comment.Preproc, '#pop')
757 ]
758 }
759 tokens.update(gen_rubystrings_rules())
760
761 def analyse_text(text):
762 return shebang_matches(text, r'ruby(1\.\d)?')
763
764
765 class RubyConsoleLexer(Lexer):
766 """
767 For Ruby interactive console (**irb**) output like:
768
769 .. sourcecode:: rbcon
770
771 irb(main):001:0> a = 1
772 => 1
773 irb(main):002:0> puts a
774 1
775 => nil
776 """
777 name = 'Ruby irb session'
778 aliases = ['rbcon', 'irb']
779 mimetypes = ['text/x-ruby-shellsession']
780
781 _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] '
782 '|>> |\?> ')
783
784 def get_tokens_unprocessed(self, text):
785 rblexer = RubyLexer(**self.options)
786
787 curcode = ''
788 insertions = []
789 for match in line_re.finditer(text):
790 line = match.group()
791 m = self._prompt_re.match(line)
792 if m is not None:
793 end = m.end()
794 insertions.append((len(curcode),
795 [(0, Generic.Prompt, line[:end])]))
796 curcode += line[end:]
797 else:
798 if curcode:
799 for item in do_insertions(insertions,
800 rblexer.get_tokens_unprocessed(curcode)):
801 yield item
802 curcode = ''
803 insertions = []
804 yield match.start(), Generic.Output, line
805 if curcode:
806 for item in do_insertions(insertions,
807 rblexer.get_tokens_unprocessed(curcode)):
808 yield item
809
810
811 class PerlLexer(RegexLexer):
812 """
813 For `Perl <http://www.perl.org>`_ source code.
814 """
815
816 name = 'Perl'
817 aliases = ['perl', 'pl']
818 filenames = ['*.pl', '*.pm']
819 mimetypes = ['text/x-perl', 'application/x-perl']
820
821 flags = re.DOTALL | re.MULTILINE
822 # TODO: give this a perl guy who knows how to parse perl...
823 tokens = {
824 'balanced-regex': [
825 (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'),
826 (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
827 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
828 (r'{(\\\\|\\}|[^}])*}[egimosx]*', String.Regex, '#pop'),
829 (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'),
830 (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'),
831 (r'\((\\\\|\\\)|[^\)])*\)[egimosx]*', String.Regex, '#pop'),
832 (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'),
833 (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'),
834 (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'),
835 (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
836 ],
837 'root': [
838 (r'\#.*?$', Comment.Single),
839 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
840 (r'(case|continue|do|else|elsif|for|foreach|if|last|my|'
841 r'next|our|redo|reset|then|unless|until|while|use|'
842 r'print|new|BEGIN|END|return)\b', Keyword),
843 (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)',
844 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
845 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
846 # common delimiters
847 (r's/(\\\\|\\/|[^/])*/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex),
848 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
849 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
850 (r's@(\\\\|\\@|[^@])*@(\\\\|\\@|[^@])*@[egimosx]*', String.Regex),
851 (r's%(\\\\|\\%|[^%])*%(\\\\|\\%|[^%])*%[egimosx]*', String.Regex),
852 # balanced delimiters
853 (r's{(\\\\|\\}|[^}])*}\s*', String.Regex, 'balanced-regex'),
854 (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'),
855 (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'),
856 (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'),
857
858 (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex),
859 (r'((?<==~)|(?<=\())\s*/(\\\\|\\/|[^/])*/[gcimosx]*', String.Regex),
860 (r'\s+', Text),
861 (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|'
862 r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|'
863 r'continue|cos|crypt|dbmclose|dbmopen|defined|delete|die|'
864 r'dump|each|endgrent|endhostent|endnetent|endprotoent|'
865 r'endpwent|endservent|eof|eval|exec|exists|exit|exp|fcntl|'
866 r'fileno|flock|fork|format|formline|getc|getgrent|getgrgid|'
867 r'getgrnam|gethostbyaddr|gethostbyname|gethostent|getlogin|'
868 r'getnetbyaddr|getnetbyname|getnetent|getpeername|getpgrp|'
869 r'getppid|getpriority|getprotobyname|getprotobynumber|'
870 r'getprotoent|getpwent|getpwnam|getpwuid|getservbyname|'
871 r'getservbyport|getservent|getsockname|getsockopt|glob|gmtime|'
872 r'goto|grep|hex|import|index|int|ioctl|join|keys|kill|last|'
873 r'lc|lcfirst|length|link|listen|local|localtime|log|lstat|'
874 r'map|mkdir|msgctl|msgget|msgrcv|msgsnd|my|next|no|oct|open|'
875 r'opendir|ord|our|pack|package|pipe|pop|pos|printf|'
876 r'prototype|push|quotemeta|rand|read|readdir|'
877 r'readline|readlink|readpipe|recv|redo|ref|rename|require|'
878 r'reverse|rewinddir|rindex|rmdir|scalar|seek|seekdir|'
879 r'select|semctl|semget|semop|send|setgrent|sethostent|setnetent|'
880 r'setpgrp|setpriority|setprotoent|setpwent|setservent|'
881 r'setsockopt|shift|shmctl|shmget|shmread|shmwrite|shutdown|'
882 r'sin|sleep|socket|socketpair|sort|splice|split|sprintf|sqrt|'
883 r'srand|stat|study|substr|symlink|syscall|sysopen|sysread|'
884 r'sysseek|system|syswrite|tell|telldir|tie|tied|time|times|tr|'
885 r'truncate|uc|ucfirst|umask|undef|unlink|unpack|unshift|untie|'
886 r'utime|values|vec|wait|waitpid|wantarray|warn|write'
887 r')\b', Name.Builtin),
888 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
889 (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String),
890 (r'__END__', Comment.Preproc, 'end-part'),
891 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
892 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
893 (r'[$@%#]+', Name.Variable, 'varname'),
894 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
895 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
896 (r'0b[01]+(_[01]+)*', Number.Bin),
897 (r'\d+', Number.Integer),
898 (r"'(\\\\|\\'|[^'])*'", String),
899 (r'"(\\\\|\\"|[^"])*"', String),
900 (r'`(\\\\|\\`|[^`])*`', String.Backtick),
901 (r'<([^\s>]+)>', String.Regexp),
902 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
903 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
904 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
905 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
906 (r'(q|qq|qw|qr|qx)(.)[.\n]*?\1', String.Other),
907 (r'package\s+', Keyword, 'modulename'),
908 (r'sub\s+', Keyword, 'funcname'),
909 (r'(\[\]|\*\*|::|<<|>>|>=|<=|<=>|={3}|!=|=~|'
910 r'!~|&&?|\|\||\.{1,3})', Operator),
911 (r'[-+/*%=<>&^|!\\~]=?', Operator),
912 (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage
913 # of punctuation in Perl!
914 (r'(?=\w)', Name, 'name'),
915 ],
916 'format': [
917 (r'\.\n', String.Interpol, '#pop'),
918 (r'[^\n]*\n', String.Interpol),
919 ],
920 'varname': [
921 (r'\s+', Text),
922 (r'\{', Punctuation, '#pop'), # hash syntax?
923 (r'\)|,', Punctuation, '#pop'), # argument specifier
924 (r'[a-zA-Z0-9_]+::', Name.Namespace),
925 (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'),
926 ],
927 'name': [
928 (r'[a-zA-Z0-9_]+::', Name.Namespace),
929 (r'[a-zA-Z0-9_:]+', Name, '#pop'),
930 (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'),
931 (r'(?=[^a-zA-Z0-9_])', Text, '#pop'),
932 ],
933 'modulename': [
934 (r'[a-zA-Z_][\w_]*', Name.Namespace, '#pop')
935 ],
936 'funcname': [
937 (r'[a-zA-Z_][\w_]*[\!\?]?', Name.Function),
938 (r'\s+', Text),
939 # argument declaration
940 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
941 (r'.*?{', Punctuation, '#pop'),
942 (r';', Punctuation, '#pop'),
943 ],
944 'cb-string': [
945 (r'\\[\{\}\\]', String.Other),
946 (r'\\', String.Other),
947 (r'\{', String.Other, 'cb-string'),
948 (r'\}', String.Other, '#pop'),
949 (r'[^\{\}\\]+', String.Other)
950 ],
951 'rb-string': [
952 (r'\\[\(\)\\]', String.Other),
953 (r'\\', String.Other),
954 (r'\(', String.Other, 'rb-string'),
955 (r'\)', String.Other, '#pop'),
956 (r'[^\(\)]+', String.Other)
957 ],
958 'sb-string': [
959 (r'\\[\[\]\\]', String.Other),
960 (r'\\', String.Other),
961 (r'\[', String.Other, 'sb-string'),
962 (r'\]', String.Other, '#pop'),
963 (r'[^\[\]]+', String.Other)
964 ],
965 'lt-string': [
966 (r'\\[\<\>\\]', String.Other),
967 (r'\\', String.Other),
968 (r'\<', String.Other, 'lt-string'),
969 (r'\>', String.Other, '#pop'),
970 (r'[^\<\>]]+', String.Other)
971 ],
972 'end-part': [
973 (r'.+', Comment.Preproc, '#pop')
974 ]
975 }
976
977 def analyse_text(text):
978 if shebang_matches(text, r'perl(\d\.\d\.\d)?'):
979 return True
980 if 'my $' in text:
981 return 0.9
982 return 0.1 # who knows, might still be perl!
983
984
985 class LuaLexer(RegexLexer):
986 """
987 For `Lua <http://www.lua.org>`_ source code.
988
989 Additional options accepted:
990
991 `func_name_highlighting`
992 If given and ``True``, highlight builtin function names
993 (default: ``True``).
994 `disabled_modules`
995 If given, must be a list of module names whose function names
996 should not be highlighted. By default all modules are highlighted.
997
998 To get a list of allowed modules have a look into the
999 `_luabuiltins` module:
1000
1001 .. sourcecode:: pycon
1002
1003 >>> from pygments.lexers._luabuiltins import MODULES
1004 >>> MODULES.keys()
1005 ['string', 'coroutine', 'modules', 'io', 'basic', ...]
1006 """
1007
1008 name = 'Lua'
1009 aliases = ['lua']
1010 filenames = ['*.lua']
1011 mimetypes = ['text/x-lua', 'application/x-lua']
1012
1013 tokens = {
1014 'root': [
1015 (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline),
1016 ('--.*$', Comment.Single),
1017
1018 (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
1019 (r'(?i)\d+e[+-]?\d+', Number.Float),
1020 ('(?i)0x[0-9a-f]*', Number.Hex),
1021 (r'\d+', Number.Integer),
1022
1023 (r'\n', Text),
1024 (r'[^\S\n]', Text),
1025 (r'(?s)\[(=*)\[.*?\]\1\]', String.Multiline),
1026 (r'[\[\]\{\}\(\)\.,:;]', Punctuation),
1027
1028 (r'(==|~=|<=|>=|\.\.|\.\.\.|[=+\-*/%^<>#])', Operator),
1029 (r'(and|or|not)\b', Operator.Word),
1030
1031 ('(break|do|else|elseif|end|for|if|in|repeat|return|then|until|'
1032 r'while)\b', Keyword),
1033 (r'(local)\b', Keyword.Declaration),
1034 (r'(true|false|nil)\b', Keyword.Constant),
1035
1036 (r'(function)(\s+)', bygroups(Keyword, Text), 'funcname'),
1037 (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
1038
1039 (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
1040
1041 # multiline strings
1042 (r'(?s)\[(=*)\[(.*?)\]\1\]', String),
1043 ("'", String.Single, combined('stringescape', 'sqs')),
1044 ('"', String.Double, combined('stringescape', 'dqs'))
1045 ],
1046
1047 'funcname': [
1048 ('[A-Za-z_][A-Za-z0-9_]*', Name.Function, '#pop'),
1049 # inline function
1050 ('\(', Punctuation, '#pop'),
1051 ],
1052
1053 'classname': [
1054 ('[A-Za-z_][A-Za-z0-9_]*', Name.Class, '#pop')
1055 ],
1056
1057 # if I understand correctly, every character is valid in a lua string,
1058 # so this state is only for later corrections
1059 'string': [
1060 ('.', String)
1061 ],
1062
1063 'stringescape': [
1064 (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
1065 ],
1066
1067 'sqs': [
1068 ("'", String, '#pop'),
1069 include('string')
1070 ],
1071
1072 'dqs': [
1073 ('"', String, '#pop'),
1074 include('string')
1075 ]
1076 }
1077
1078 def __init__(self, **options):
1079 self.func_name_highlighting = get_bool_opt(
1080 options, 'func_name_highlighting', True)
1081 self.disabled_modules = get_list_opt(options, 'disabled_modules', [])
1082
1083 self._functions = set()
1084 if self.func_name_highlighting:
1085 from pygments.lexers._luabuiltins import MODULES
1086 for mod, func in MODULES.iteritems():
1087 if mod not in self.disabled_modules:
1088 self._functions.update(func)
1089 RegexLexer.__init__(self, **options)
1090
1091 def get_tokens_unprocessed(self, text):
1092 for index, token, value in \
1093 RegexLexer.get_tokens_unprocessed(self, text):
1094 if token is Name:
1095 if value in self._functions:
1096 yield index, Name.Builtin, value
1097 continue
1098 elif '.' in value:
1099 a, b = value.split('.')
1100 yield index, Name, a
1101 yield index + len(a), Punctuation, u'.'
1102 yield index + len(a) + 1, Name, b
1103 continue
1104 yield index, token, value
1105
1106
1107 class MiniDLexer(RegexLexer):
1108 """
1109 For `MiniD <http://www.dsource.org/projects/minid>`_ (a D-like scripting
1110 language) source.
1111 """
1112 name = 'MiniD'
1113 filenames = ['*.md']
1114 aliases = ['minid']
1115 mimetypes = ['text/x-minidsrc']
1116
1117 tokens = {
1118 'root': [
1119 (r'\n', Text),
1120 (r'\s+', Text),
1121 # Comments
1122 (r'//(.*?)\n', Comment.Single),
1123 (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
1124 (r'/\+', Comment.Multiline, 'nestedcomment'),
1125 # Keywords
1126 (r'(as|assert|break|case|catch|class|continue|coroutine|default'
1127 r'|do|else|finally|for|foreach|function|global|namespace'
1128 r'|if|import|in|is|local|module|return|super|switch'
1129 r'|this|throw|try|vararg|while|with|yield)\b', Keyword),
1130 (r'(false|true|null)\b', Keyword.Constant),
1131 # FloatLiteral
1132 (r'([0-9][0-9_]*)?\.[0-9_]+([eE][+\-]?[0-9_]+)?', Number.Float),
1133 # IntegerLiteral
1134 # -- Binary
1135 (r'0[Bb][01_]+', Number),
1136 # -- Octal
1137 (r'0[Cc][0-7_]+', Number.Oct),
1138 # -- Hexadecimal
1139 (r'0[xX][0-9a-fA-F_]+', Number.Hex),
1140 # -- Decimal
1141 (r'(0|[1-9][0-9_]*)', Number.Integer),
1142 # CharacterLiteral
1143 (r"""'(\\['"?\\abfnrtv]|\\x[0-9a-fA-F]{2}|\\[0-9]{1,3}"""
1144 r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""",
1145 String.Char
1146 ),
1147 # StringLiteral
1148 # -- WysiwygString
1149 (r'@"(""|.)*"', String),
1150 # -- AlternateWysiwygString
1151 (r'`(``|.)*`', String),
1152 # -- DoubleQuotedString
1153 (r'"(\\\\|\\"|[^"])*"', String),
1154 # Tokens
1155 (
1156 r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>'
1157 r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)'
1158 r'|[-/.&$@|\+<>!()\[\]{}?,;:=*%^~#\\]', Punctuation
1159 ),
1160 # Identifier
1161 (r'[a-zA-Z_]\w*', Name),
1162 ],
1163 'nestedcomment': [
1164 (r'[^+/]+', Comment.Multiline),
1165 (r'/\+', Comment.Multiline, '#push'),
1166 (r'\+/', Comment.Multiline, '#pop'),
1167 (r'[+/]', Comment.Multiline),
1168 ],
1169 }
1170
1171
1172 class IoLexer(RegexLexer):
1173 """
1174 For `Io <http://iolanguage.com/>`_ (a small, prototype-based
1175 programming language) source.
1176
1177 *New in Pygments 0.10.*
1178 """
1179 name = 'Io'
1180 filenames = ['*.io']
1181 aliases = ['io']
1182 mimetypes = ['text/x-iosrc']
1183 tokens = {
1184 'root': [
1185 (r'\n', Text),
1186 (r'\s+', Text),
1187 # Comments
1188 (r'//(.*?)\n', Comment.Single),
1189 (r'#(.*?)\n', Comment.Single),
1190 (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
1191 (r'/\+', Comment.Multiline, 'nestedcomment'),
1192 # DoubleQuotedString
1193 (r'"(\\\\|\\"|[^"])*"', String),
1194 # Operators
1195 (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}',
1196 Operator),
1197 # keywords
1198 (r'(clone|do|doFile|doString|method|for|if|else|elseif|then)\b',
1199 Keyword),
1200 # constants
1201 (r'(nil|false|true)\b', Name.Constant),
1202 # names
1203 ('(Object|list|List|Map|args|Sequence|Coroutine|File)\b',
1204 Name.Builtin),
1205 ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
1206 # numbers
1207 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
1208 (r'\d+', Number.Integer)
1209 ],
1210 'nestedcomment': [
1211 (r'[^+/]+', Comment.Multiline),
1212 (r'/\+', Comment.Multiline, '#push'),
1213 (r'\+/', Comment.Multiline, '#pop'),
1214 (r'[+/]', Comment.Multiline),
1215 ]
1216 }
1217
1218
1219 class TclLexer(RegexLexer):
1220 """
1221 For Tcl source code.
1222
1223 *New in Pygments 0.10.*
1224 """
1225
1226 keyword_cmds_re = (
1227 r'\b(after|apply|array|break|catch|continue|elseif|else|error|'
1228 r'eval|expr|for|foreach|global|if|namespace|proc|rename|return|'
1229 r'set|switch|then|trace|unset|update|uplevel|upvar|variable|'
1230 r'vwait|while)\b'
1231 )
1232
1233 builtin_cmds_re = (
1234 r'\b(append|bgerror|binary|cd|chan|clock|close|concat|dde|dict|'
1235 r'encoding|eof|exec|exit|fblocked|fconfigure|fcopy|file|'
1236 r'fileevent|flush|format|gets|glob|history|http|incr|info|interp|'
1237 r'join|lappend|lassign|lindex|linsert|list|llength|load|loadTk|'
1238 r'lrange|lrepeat|lreplace|lreverse|lsearch|lset|lsort|mathfunc|'
1239 r'mathop|memory|msgcat|open|package|pid|pkg::create|pkg_mkIndex|'
1240 r'platform|platform::shell|puts|pwd|re_syntax|read|refchan|'
1241 r'regexp|registry|regsub|scan|seek|socket|source|split|string|'
1242 r'subst|tell|time|tm|unknown|unload)\b'
1243 )
1244
1245 name = 'Tcl'
1246 aliases = ['tcl']
1247 filenames = ['*.tcl']
1248 mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl']
1249
1250 def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""):
1251 return [
1252 (keyword_cmds_re, Keyword, 'params' + context),
1253 (builtin_cmds_re, Name.Builtin, 'params' + context),
1254 (r'([\w\.\-]+)', Name.Variable, 'params' + context),
1255 (r'#', Comment, 'comment'),
1256 ]
1257
1258 tokens = {
1259 'root': [
1260 include('command'),
1261 include('basic'),
1262 include('data'),
1263 ],
1264 'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re),
1265 'command-in-brace': _gen_command_rules(keyword_cmds_re,
1266 builtin_cmds_re,
1267 "-in-brace"),
1268 'command-in-bracket': _gen_command_rules(keyword_cmds_re,
1269 builtin_cmds_re,
1270 "-in-bracket"),
1271 'command-in-paren': _gen_command_rules(keyword_cmds_re,
1272 builtin_cmds_re,
1273 "-in-paren"),
1274 'basic': [
1275 (r'\(', Keyword, 'paren'),
1276 (r'\[', Keyword, 'bracket'),
1277 (r'\{', Keyword, 'brace'),
1278 (r'"', String.Double, 'string'),
1279 (r'(eq|ne|in|ni)\b', Operator.Word),
1280 (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator),
1281 ],
1282 'data': [
1283 (r'\s+', Text),
1284 (r'0x[a-fA-F0-9]+', Number.Hex),
1285 (r'0[0-7]+', Number.Oct),
1286 (r'\d+\.\d+', Number.Float),
1287 (r'\d+', Number.Integer),
1288 (r'\$([\w\.\-\:]+)', Name.Variable),
1289 (r'([\w\.\-\:]+)', Text),
1290 ],
1291 'params': [
1292 (r';', Keyword, '#pop'),
1293 (r'\n', Text, '#pop'),
1294 (r'(else|elseif|then)', Keyword),
1295 include('basic'),
1296 include('data'),
1297 ],
1298 'params-in-brace': [
1299 (r'}', Keyword, ('#pop', '#pop')),
1300 include('params')
1301 ],
1302 'params-in-paren': [
1303 (r'\)', Keyword, ('#pop', '#pop')),
1304 include('params')
1305 ],
1306 'params-in-bracket': [
1307 (r'\]', Keyword, ('#pop', '#pop')),
1308 include('params')
1309 ],
1310 'string': [
1311 (r'\[', String.Double, 'string-square'),
1312 (r'(\\\\|\\[0-7]+|\\.|[^"])', String.Double),
1313 (r'"', String.Double, '#pop')
1314 ],
1315 'string-square': [
1316 (r'\[', String.Double, 'string-square'),
1317 (r'(\\\\|\\[0-7]+|\\.|[^\]])', String.Double),
1318 (r'\]', String.Double, '#pop')
1319 ],
1320 'brace': [
1321 (r'}', Keyword, '#pop'),
1322 include('command-in-brace'),
1323 include('basic'),
1324 include('data'),
1325 ],
1326 'paren': [
1327 (r'\)', Keyword, '#pop'),
1328 include('command-in-paren'),
1329 include('basic'),
1330 include('data'),
1331 ],
1332 'bracket': [
1333 (r'\]', Keyword, '#pop'),
1334 include('command-in-bracket'),
1335 include('basic'),
1336 include('data'),
1337 ],
1338 'comment': [
1339 (r'.*[^\\]\n', Comment, '#pop'),
1340 (r'.*\\\n', Comment),
1341 ],
1342 }
1343
1344 def analyse_text(text):
1345 return shebang_matches(text, r'(tcl)')
1346
1347
1348 class ClojureLexer(RegexLexer):
1349 """
1350 Lexer for `Clojure <http://clojure.org/>`_ source code.
1351
1352 *New in Pygments 0.11.*
1353 """
1354 name = 'Clojure'
1355 aliases = ['clojure', 'clj']
1356 filenames = ['*.clj']
1357 mimetypes = ['text/x-clojure', 'application/x-clojure']
1358
1359 keywords = [
1360 'fn', 'def', 'defn', 'defmacro', 'defmethod', 'defmulti', 'defn-',
1361 'defstruct',
1362 'if', 'cond',
1363 'let', 'for'
1364 ]
1365 builtins = [
1366 '.', '..',
1367 '*', '+', '-', '->', '..', '/', '<', '<=', '=', '==', '>', '>=',
1368 'accessor', 'agent', 'agent-errors', 'aget', 'alength', 'all-ns',
1369 'alter', 'and', 'append-child', 'apply', 'array-map', 'aset',
1370 'aset-boolean', 'aset-byte', 'aset-char', 'aset-double', 'aset-float',
1371 'aset-int', 'aset-long', 'aset-short', 'assert', 'assoc', 'await',
1372 'await-for', 'bean', 'binding', 'bit-and', 'bit-not', 'bit-or',
1373 'bit-shift-left', 'bit-shift-right', 'bit-xor', 'boolean', 'branch?',
1374 'butlast', 'byte', 'cast', 'char', 'children', 'class',
1375 'clear-agent-errors', 'comment', 'commute', 'comp', 'comparator',
1376 'complement', 'concat', 'conj', 'cons', 'constantly',
1377 'construct-proxy', 'contains?', 'count', 'create-ns', 'create-struct',
1378 'cycle', 'dec', 'deref', 'difference', 'disj', 'dissoc', 'distinct',
1379 'doall', 'doc', 'dorun', 'doseq', 'dosync', 'dotimes', 'doto',
1380 'double', 'down', 'drop', 'drop-while', 'edit', 'end?', 'ensure',
1381 'eval', 'every?', 'false?', 'ffirst', 'file-seq', 'filter', 'find',
1382 'find-doc', 'find-ns', 'find-var', 'first', 'float', 'flush',
1383 'fnseq', 'frest', 'gensym', 'get', 'get-proxy-class',
1384 'hash-map', 'hash-set', 'identical?', 'identity', 'if-let', 'import',
1385 'in-ns', 'inc', 'index', 'insert-child', 'insert-left', 'insert-right',
1386 'inspect-table', 'inspect-tree', 'instance?', 'int', 'interleave',
1387 'intersection', 'into', 'into-array', 'iterate', 'join', 'key', 'keys',
1388 'keyword', 'keyword?', 'last', 'lazy-cat', 'lazy-cons', 'left',
1389 'lefts', 'line-seq', 'list', 'list*', 'load', 'load-file',
1390 'locking', 'long', 'loop', 'macroexpand', 'macroexpand-1',
1391 'make-array', 'make-node', 'map', 'map-invert', 'map?', 'mapcat',
1392 'max', 'max-key', 'memfn', 'merge', 'merge-with', 'meta', 'min',
1393 'min-key', 'name', 'namespace', 'neg?', 'new', 'newline', 'next',
1394 'nil?', 'node', 'not', 'not-any?', 'not-every?', 'not=', 'ns-imports',
1395 'ns-interns', 'ns-map', 'ns-name', 'ns-publics', 'ns-refers',
1396 'ns-resolve', 'ns-unmap', 'nth', 'nthrest', 'or', 'parse', 'partial',
1397 'path', 'peek', 'pop', 'pos?', 'pr', 'pr-str', 'print', 'print-str',
1398 'println', 'println-str', 'prn', 'prn-str', 'project', 'proxy',
1399 'proxy-mappings', 'quot', 'rand', 'rand-int', 'range', 're-find',
1400 're-groups', 're-matcher', 're-matches', 're-pattern', 're-seq',
1401 'read', 'read-line', 'reduce', 'ref', 'ref-set', 'refer', 'rem',
1402 'remove', 'remove-method', 'remove-ns', 'rename', 'rename-keys',
1403 'repeat', 'replace', 'replicate', 'resolve', 'rest', 'resultset-seq',
1404 'reverse', 'rfirst', 'right', 'rights', 'root', 'rrest', 'rseq',
1405 'second', 'select', 'select-keys', 'send', 'send-off', 'seq',
1406 'seq-zip', 'seq?', 'set', 'short', 'slurp', 'some', 'sort',
1407 'sort-by', 'sorted-map', 'sorted-map-by', 'sorted-set',
1408 'special-symbol?', 'split-at', 'split-with', 'str', 'string?',
1409 'struct', 'struct-map', 'subs', 'subvec', 'symbol', 'symbol?',
1410 'sync', 'take', 'take-nth', 'take-while', 'test', 'time', 'to-array',
1411 'to-array-2d', 'tree-seq', 'true?', 'union', 'up', 'update-proxy',
1412 'val', 'vals', 'var-get', 'var-set', 'var?', 'vector', 'vector-zip',
1413 'vector?', 'when', 'when-first', 'when-let', 'when-not',
1414 'with-local-vars', 'with-meta', 'with-open', 'with-out-str',
1415 'xml-seq', 'xml-zip', 'zero?', 'zipmap', 'zipper']
1416
1417 # valid names for identifiers
1418 # well, names can only not consist fully of numbers
1419 # but this should be good enough for now
1420 valid_name = r'[a-zA-Z0-9!$%&*+,/:<=>?@^_~-]+'
1421
1422 tokens = {
1423 'root' : [
1424 # the comments - always starting with semicolon
1425 # and going to the end of the line
1426 (r';.*$', Comment.Single),
1427
1428 # whitespaces - usually not relevant
1429 (r'\s+', Text),
1430
1431 # numbers
1432 (r'-?\d+\.\d+', Number.Float),
1433 (r'-?\d+', Number.Integer),
1434 # support for uncommon kinds of numbers -
1435 # have to figure out what the characters mean
1436 #(r'(#e|#i|#b|#o|#d|#x)[\d.]+', Number),
1437
1438 # strings, symbols and characters
1439 (r'"(\\\\|\\"|[^"])*"', String),
1440 (r"'" + valid_name, String.Symbol),
1441 (r"\\([()/'\".'_!§$%& ?;=+-]{1}|[a-zA-Z0-9]+)", String.Char),
1442
1443 # constants
1444 (r'(#t|#f)', Name.Constant),
1445
1446 # special operators
1447 (r"('|#|`|,@|,|\.)", Operator),
1448
1449 # highlight the keywords
1450 ('(%s)' % '|'.join([
1451 re.escape(entry) + ' ' for entry in keywords]),
1452 Keyword
1453 ),
1454
1455 # first variable in a quoted string like
1456 # '(this is syntactic sugar)
1457 (r"(?<='\()" + valid_name, Name.Variable),
1458 (r"(?<=#\()" + valid_name, Name.Variable),
1459
1460 # highlight the builtins
1461 ("(?<=\()(%s)" % '|'.join([
1462 re.escape(entry) + ' ' for entry in builtins]),
1463 Name.Builtin
1464 ),
1465
1466 # the remaining functions
1467 (r'(?<=\()' + valid_name, Name.Function),
1468 # find the remaining variables
1469 (valid_name, Name.Variable),
1470
1471 # Clojure accepts vector notation
1472 (r'(\[|\])', Punctuation),
1473
1474 # Clojure accepts map notation
1475 (r'(\{|\})', Punctuation),
1476
1477 # the famous parentheses!
1478 (r'(\(|\))', Punctuation),
1479 ],
1480 }

eric ide

mercurial