ThirdParty/Pygments/pygments/lexers/agile.py

changeset 4172
4f20dba37ab6
parent 3145
a9de05d4a22f
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 """ 2 """
3 pygments.lexers.agile 3 pygments.lexers.agile
4 ~~~~~~~~~~~~~~~~~~~~~ 4 ~~~~~~~~~~~~~~~~~~~~~
5 5
6 Lexers for agile languages. 6 Just export lexer classes previously contained in this module.
7 7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. 8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details. 9 :license: BSD, see LICENSE for details.
10 """ 10 """
11 11
12 from __future__ import unicode_literals 12 from pygments.lexers.lisp import SchemeLexer
13 from pygments.lexers.jvm import IokeLexer, ClojureLexer
14 from pygments.lexers.python import PythonLexer, PythonConsoleLexer, \
15 PythonTracebackLexer, Python3Lexer, Python3TracebackLexer, DgLexer
16 from pygments.lexers.ruby import RubyLexer, RubyConsoleLexer, FancyLexer
17 from pygments.lexers.perl import PerlLexer, Perl6Lexer
18 from pygments.lexers.d import CrocLexer, MiniDLexer
19 from pygments.lexers.iolang import IoLexer
20 from pygments.lexers.tcl import TclLexer
21 from pygments.lexers.factor import FactorLexer
22 from pygments.lexers.scripting import LuaLexer, MoonScriptLexer
13 23
14 import re 24 __all__ = []
15
16 from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \
17 LexerContext, include, combined, do_insertions, bygroups, using
18 from pygments.token import Error, Text, Other, \
19 Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation
20 from pygments.util import get_bool_opt, get_list_opt, shebang_matches
21 from pygments import unistring as uni
22
23
24 __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
25 'Python3Lexer', 'Python3TracebackLexer', 'RubyLexer',
26 'RubyConsoleLexer', 'PerlLexer', 'LuaLexer', 'MoonScriptLexer',
27 'CrocLexer', 'MiniDLexer', 'IoLexer', 'TclLexer', 'FactorLexer',
28 'FancyLexer', 'DgLexer']
29
30 # b/w compatibility
31 from pygments.lexers.functional import SchemeLexer
32 from pygments.lexers.jvm import IokeLexer, ClojureLexer
33
34 line_re = re.compile('.*?\n')
35
36
37 class PythonLexer(RegexLexer):
38 """
39 For `Python <http://www.python.org>`_ source code.
40 """
41
42 name = 'Python'
43 aliases = ['python', 'py', 'sage']
44 filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage']
45 mimetypes = ['text/x-python', 'application/x-python']
46
47 tokens = {
48 'root': [
49 (r'\n', Text),
50 (r'^(\s*)([rRuU]{,2}"""(?:.|\n)*?""")', bygroups(Text, String.Doc)),
51 (r"^(\s*)([rRuU]{,2}'''(?:.|\n)*?''')", bygroups(Text, String.Doc)),
52 (r'[^\S\n]+', Text),
53 (r'#.*$', Comment),
54 (r'[]{}:(),;[]', Punctuation),
55 (r'\\\n', Text),
56 (r'\\', Text),
57 (r'(in|is|and|or|not)\b', Operator.Word),
58 (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
59 include('keywords'),
60 (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
61 (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
62 (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
63 'fromimport'),
64 (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
65 'import'),
66 include('builtins'),
67 include('backtick'),
68 ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
69 ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
70 ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
71 ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
72 ('[uU]?"""', String, combined('stringescape', 'tdqs')),
73 ("[uU]?'''", String, combined('stringescape', 'tsqs')),
74 ('[uU]?"', String, combined('stringescape', 'dqs')),
75 ("[uU]?'", String, combined('stringescape', 'sqs')),
76 include('name'),
77 include('numbers'),
78 ],
79 'keywords': [
80 (r'(assert|break|continue|del|elif|else|except|exec|'
81 r'finally|for|global|if|lambda|pass|print|raise|'
82 r'return|try|while|yield(\s+from)?|as|with)\b', Keyword),
83 ],
84 'builtins': [
85 (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|'
86 r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|'
87 r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|'
88 r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|'
89 r'input|int|intern|isinstance|issubclass|iter|len|list|locals|'
90 r'long|map|max|min|next|object|oct|open|ord|pow|property|range|'
91 r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|'
92 r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|'
93 r'vars|xrange|zip)\b', Name.Builtin),
94 (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True'
95 r')\b', Name.Builtin.Pseudo),
96 (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
97 r'BaseException|DeprecationWarning|EOFError|EnvironmentError|'
98 r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|'
99 r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|'
100 r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
101 r'NotImplemented|NotImplementedError|OSError|OverflowError|'
102 r'OverflowWarning|PendingDeprecationWarning|ReferenceError|'
103 r'RuntimeError|RuntimeWarning|StandardError|StopIteration|'
104 r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
105 r'TypeError|UnboundLocalError|UnicodeDecodeError|'
106 r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
107 r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
108 r'WindowsError|ZeroDivisionError)\b', Name.Exception),
109 ],
110 'numbers': [
111 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
112 (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
113 (r'0[0-7]+j?', Number.Oct),
114 (r'0[xX][a-fA-F0-9]+', Number.Hex),
115 (r'\d+L', Number.Integer.Long),
116 (r'\d+j?', Number.Integer)
117 ],
118 'backtick': [
119 ('`.*?`', String.Backtick),
120 ],
121 'name': [
122 (r'@[a-zA-Z0-9_.]+', Name.Decorator),
123 ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
124 ],
125 'funcname': [
126 ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop')
127 ],
128 'classname': [
129 ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop')
130 ],
131 'import': [
132 (r'(?:[ \t]|\\\n)+', Text),
133 (r'as\b', Keyword.Namespace),
134 (r',', Operator),
135 (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace),
136 (r'', Text, '#pop') # all else: go back
137 ],
138 'fromimport': [
139 (r'(?:[ \t]|\\\n)+', Text),
140 (r'import\b', Keyword.Namespace, '#pop'),
141 # if None occurs here, it's "raise x from None", since None can
142 # never be a module name
143 (r'None\b', Name.Builtin.Pseudo, '#pop'),
144 # sadly, in "raise x from y" y will be highlighted as namespace too
145 (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace),
146 # anything else here also means "raise x from y" and is therefore
147 # not an error
148 (r'', Text, '#pop'),
149 ],
150 'stringescape': [
151 (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
152 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
153 ],
154 'strings': [
155 (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
156 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
157 (r'[^\\\'"%\n]+', String),
158 # quotes, percents and backslashes must be parsed one at a time
159 (r'[\'"\\]', String),
160 # unhandled string formatting sign
161 (r'%', String)
162 # newlines are an error (use "nl" state)
163 ],
164 'nl': [
165 (r'\n', String)
166 ],
167 'dqs': [
168 (r'"', String, '#pop'),
169 (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
170 include('strings')
171 ],
172 'sqs': [
173 (r"'", String, '#pop'),
174 (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
175 include('strings')
176 ],
177 'tdqs': [
178 (r'"""', String, '#pop'),
179 include('strings'),
180 include('nl')
181 ],
182 'tsqs': [
183 (r"'''", String, '#pop'),
184 include('strings'),
185 include('nl')
186 ],
187 }
188
189 def analyse_text(text):
190 return shebang_matches(text, r'pythonw?(2(\.\d)?)?')
191
192
193 class Python3Lexer(RegexLexer):
194 """
195 For `Python <http://www.python.org>`_ source code (version 3.0).
196
197 *New in Pygments 0.10.*
198 """
199
200 name = 'Python 3'
201 aliases = ['python3', 'py3']
202 filenames = [] # Nothing until Python 3 gets widespread
203 mimetypes = ['text/x-python3', 'application/x-python3']
204
205 flags = re.MULTILINE | re.UNICODE
206
207 uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue)
208
209 tokens = PythonLexer.tokens.copy()
210 tokens['keywords'] = [
211 (r'(assert|break|continue|del|elif|else|except|'
212 r'finally|for|global|if|lambda|pass|raise|nonlocal|'
213 r'return|try|while|yield(\s+from)?|as|with|True|False|None)\b',
214 Keyword),
215 ]
216 tokens['builtins'] = [
217 (r'(?<!\.)(__import__|abs|all|any|bin|bool|bytearray|bytes|'
218 r'chr|classmethod|cmp|compile|complex|delattr|dict|dir|'
219 r'divmod|enumerate|eval|filter|float|format|frozenset|getattr|'
220 r'globals|hasattr|hash|hex|id|input|int|isinstance|issubclass|'
221 r'iter|len|list|locals|map|max|memoryview|min|next|object|oct|'
222 r'open|ord|pow|print|property|range|repr|reversed|round|'
223 r'set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|'
224 r'vars|zip)\b', Name.Builtin),
225 (r'(?<!\.)(self|Ellipsis|NotImplemented)\b', Name.Builtin.Pseudo),
226 (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|'
227 r'BaseException|BufferError|BytesWarning|DeprecationWarning|'
228 r'EOFError|EnvironmentError|Exception|FloatingPointError|'
229 r'FutureWarning|GeneratorExit|IOError|ImportError|'
230 r'ImportWarning|IndentationError|IndexError|KeyError|'
231 r'KeyboardInterrupt|LookupError|MemoryError|NameError|'
232 r'NotImplementedError|OSError|OverflowError|'
233 r'PendingDeprecationWarning|ReferenceError|'
234 r'RuntimeError|RuntimeWarning|StopIteration|'
235 r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|'
236 r'TypeError|UnboundLocalError|UnicodeDecodeError|'
237 r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|'
238 r'UnicodeWarning|UserWarning|ValueError|VMSError|Warning|'
239 r'WindowsError|ZeroDivisionError)\b', Name.Exception),
240 ]
241 tokens['numbers'] = [
242 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
243 (r'0[oO][0-7]+', Number.Oct),
244 (r'0[bB][01]+', Number.Bin),
245 (r'0[xX][a-fA-F0-9]+', Number.Hex),
246 (r'\d+', Number.Integer)
247 ]
248 tokens['backtick'] = []
249 tokens['name'] = [
250 (r'@[a-zA-Z0-9_]+', Name.Decorator),
251 (uni_name, Name),
252 ]
253 tokens['funcname'] = [
254 (uni_name, Name.Function, '#pop')
255 ]
256 tokens['classname'] = [
257 (uni_name, Name.Class, '#pop')
258 ]
259 tokens['import'] = [
260 (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
261 (r'\.', Name.Namespace),
262 (uni_name, Name.Namespace),
263 (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
264 (r'', Text, '#pop') # all else: go back
265 ]
266 tokens['fromimport'] = [
267 (r'(\s+)(import)\b', bygroups(Text, Keyword), '#pop'),
268 (r'\.', Name.Namespace),
269 (uni_name, Name.Namespace),
270 (r'', Text, '#pop'),
271 ]
272 # don't highlight "%s" substitutions
273 tokens['strings'] = [
274 (r'[^\\\'"%\n]+', String),
275 # quotes, percents and backslashes must be parsed one at a time
276 (r'[\'"\\]', String),
277 # unhandled string formatting sign
278 (r'%', String)
279 # newlines are an error (use "nl" state)
280 ]
281
282 def analyse_text(text):
283 return shebang_matches(text, r'pythonw?3(\.\d)?')
284
285
286 class PythonConsoleLexer(Lexer):
287 """
288 For Python console output or doctests, such as:
289
290 .. sourcecode:: pycon
291
292 >>> a = 'foo'
293 >>> print a
294 foo
295 >>> 1 / 0
296 Traceback (most recent call last):
297 File "<stdin>", line 1, in <module>
298 ZeroDivisionError: integer division or modulo by zero
299
300 Additional options:
301
302 `python3`
303 Use Python 3 lexer for code. Default is ``False``.
304 *New in Pygments 1.0.*
305 """
306 name = 'Python console session'
307 aliases = ['pycon']
308 mimetypes = ['text/x-python-doctest']
309
310 def __init__(self, **options):
311 self.python3 = get_bool_opt(options, 'python3', False)
312 Lexer.__init__(self, **options)
313
314 def get_tokens_unprocessed(self, text):
315 if self.python3:
316 pylexer = Python3Lexer(**self.options)
317 tblexer = Python3TracebackLexer(**self.options)
318 else:
319 pylexer = PythonLexer(**self.options)
320 tblexer = PythonTracebackLexer(**self.options)
321
322 curcode = ''
323 insertions = []
324 curtb = ''
325 tbindex = 0
326 tb = 0
327 for match in line_re.finditer(text):
328 line = match.group()
329 if line.startswith('>>> ') or line.startswith('... '):
330 tb = 0
331 insertions.append((len(curcode),
332 [(0, Generic.Prompt, line[:4])]))
333 curcode += line[4:]
334 elif line.rstrip() == '...' and not tb:
335 # only a new >>> prompt can end an exception block
336 # otherwise an ellipsis in place of the traceback frames
337 # will be mishandled
338 insertions.append((len(curcode),
339 [(0, Generic.Prompt, '...')]))
340 curcode += line[3:]
341 else:
342 if curcode:
343 for item in do_insertions(insertions,
344 pylexer.get_tokens_unprocessed(curcode)):
345 yield item
346 curcode = ''
347 insertions = []
348 if (line.startswith('Traceback (most recent call last):') or
349 re.match(r' File "[^"]+", line \d+\n$', line)):
350 tb = 1
351 curtb = line
352 tbindex = match.start()
353 elif line == 'KeyboardInterrupt\n':
354 yield match.start(), Name.Class, line
355 elif tb:
356 curtb += line
357 if not (line.startswith(' ') or line.strip() == '...'):
358 tb = 0
359 for i, t, v in tblexer.get_tokens_unprocessed(curtb):
360 yield tbindex+i, t, v
361 else:
362 yield match.start(), Generic.Output, line
363 if curcode:
364 for item in do_insertions(insertions,
365 pylexer.get_tokens_unprocessed(curcode)):
366 yield item
367
368
369 class PythonTracebackLexer(RegexLexer):
370 """
371 For Python tracebacks.
372
373 *New in Pygments 0.7.*
374 """
375
376 name = 'Python Traceback'
377 aliases = ['pytb']
378 filenames = ['*.pytb']
379 mimetypes = ['text/x-python-traceback']
380
381 tokens = {
382 'root': [
383 (r'^Traceback \(most recent call last\):\n',
384 Generic.Traceback, 'intb'),
385 # SyntaxError starts with this.
386 (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
387 (r'^.*\n', Other),
388 ],
389 'intb': [
390 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
391 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
392 (r'^( File )("[^"]+")(, line )(\d+)(\n)',
393 bygroups(Text, Name.Builtin, Text, Number, Text)),
394 (r'^( )(.+)(\n)',
395 bygroups(Text, using(PythonLexer), Text)),
396 (r'^([ \t]*)(\.\.\.)(\n)',
397 bygroups(Text, Comment, Text)), # for doctests...
398 (r'^([^:]+)(: )(.+)(\n)',
399 bygroups(Generic.Error, Text, Name, Text), '#pop'),
400 (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
401 bygroups(Generic.Error, Text), '#pop')
402 ],
403 }
404
405
406 class Python3TracebackLexer(RegexLexer):
407 """
408 For Python 3.0 tracebacks, with support for chained exceptions.
409
410 *New in Pygments 1.0.*
411 """
412
413 name = 'Python 3.0 Traceback'
414 aliases = ['py3tb']
415 filenames = ['*.py3tb']
416 mimetypes = ['text/x-python3-traceback']
417
418 tokens = {
419 'root': [
420 (r'\n', Text),
421 (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
422 (r'^During handling of the above exception, another '
423 r'exception occurred:\n\n', Generic.Traceback),
424 (r'^The above exception was the direct cause of the '
425 r'following exception:\n\n', Generic.Traceback),
426 ],
427 'intb': [
428 (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
429 bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)),
430 (r'^( )(.+)(\n)',
431 bygroups(Text, using(Python3Lexer), Text)),
432 (r'^([ \t]*)(\.\.\.)(\n)',
433 bygroups(Text, Comment, Text)), # for doctests...
434 (r'^([^:]+)(: )(.+)(\n)',
435 bygroups(Generic.Error, Text, Name, Text), '#pop'),
436 (r'^([a-zA-Z_][a-zA-Z0-9_]*)(:?\n)',
437 bygroups(Generic.Error, Text), '#pop')
438 ],
439 }
440
441
442 class RubyLexer(ExtendedRegexLexer):
443 """
444 For `Ruby <http://www.ruby-lang.org>`_ source code.
445 """
446
447 name = 'Ruby'
448 aliases = ['rb', 'ruby', 'duby']
449 filenames = ['*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec',
450 '*.rbx', '*.duby']
451 mimetypes = ['text/x-ruby', 'application/x-ruby']
452
453 flags = re.DOTALL | re.MULTILINE
454
455 def heredoc_callback(self, match, ctx):
456 # okay, this is the hardest part of parsing Ruby...
457 # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
458
459 start = match.start(1)
460 yield start, Operator, match.group(1) # <<-?
461 yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
462 yield match.start(3), Name.Constant, match.group(3) # heredoc name
463 yield match.start(4), String.Heredoc, match.group(4) # quote again
464
465 heredocstack = ctx.__dict__.setdefault('heredocstack', [])
466 outermost = not bool(heredocstack)
467 heredocstack.append((match.group(1) == '<<-', match.group(3)))
468
469 ctx.pos = match.start(5)
470 ctx.end = match.end(5)
471 # this may find other heredocs
472 for i, t, v in self.get_tokens_unprocessed(context=ctx):
473 yield i, t, v
474 ctx.pos = match.end()
475
476 if outermost:
477 # this is the outer heredoc again, now we can process them all
478 for tolerant, hdname in heredocstack:
479 lines = []
480 for match in line_re.finditer(ctx.text, ctx.pos):
481 if tolerant:
482 check = match.group().strip()
483 else:
484 check = match.group().rstrip()
485 if check == hdname:
486 for amatch in lines:
487 yield amatch.start(), String.Heredoc, amatch.group()
488 yield match.start(), Name.Constant, match.group()
489 ctx.pos = match.end()
490 break
491 else:
492 lines.append(match)
493 else:
494 # end of heredoc not found -- error!
495 for amatch in lines:
496 yield amatch.start(), Error, amatch.group()
497 ctx.end = len(ctx.text)
498 del heredocstack[:]
499
500
501 def gen_rubystrings_rules():
502 def intp_regex_callback(self, match, ctx):
503 yield match.start(1), String.Regex, match.group(1) # begin
504 nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
505 for i, t, v in self.get_tokens_unprocessed(context=nctx):
506 yield match.start(3)+i, t, v
507 yield match.start(4), String.Regex, match.group(4) # end[mixounse]*
508 ctx.pos = match.end()
509
510 def intp_string_callback(self, match, ctx):
511 yield match.start(1), String.Other, match.group(1)
512 nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
513 for i, t, v in self.get_tokens_unprocessed(context=nctx):
514 yield match.start(3)+i, t, v
515 yield match.start(4), String.Other, match.group(4) # end
516 ctx.pos = match.end()
517
518 states = {}
519 states['strings'] = [
520 # easy ones
521 (r'\:@{0,2}([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
522 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', String.Symbol),
523 (r":'(\\\\|\\'|[^'])*'", String.Symbol),
524 (r"'(\\\\|\\'|[^'])*'", String.Single),
525 (r':"', String.Symbol, 'simple-sym'),
526 (r'([a-zA-Z_][a-zA-Z0-9]*)(:)',
527 bygroups(String.Symbol, Punctuation)), # Since Ruby 1.9
528 (r'"', String.Double, 'simple-string'),
529 (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
530 ]
531
532 # double-quoted string and symbol
533 for name, ttype, end in ('string', String.Double, '"'), \
534 ('sym', String.Symbol, '"'), \
535 ('backtick', String.Backtick, '`'):
536 states['simple-'+name] = [
537 include('string-intp-escaped'),
538 (r'[^\\%s#]+' % end, ttype),
539 (r'[\\#]', ttype),
540 (end, ttype, '#pop'),
541 ]
542
543 # braced quoted strings
544 for lbrace, rbrace, name in ('\\{', '\\}', 'cb'), \
545 ('\\[', '\\]', 'sb'), \
546 ('\\(', '\\)', 'pa'), \
547 ('<', '>', 'ab'):
548 states[name+'-intp-string'] = [
549 (r'\\[\\' + lbrace + rbrace + ']', String.Other),
550 (r'(?<!\\)' + lbrace, String.Other, '#push'),
551 (r'(?<!\\)' + rbrace, String.Other, '#pop'),
552 include('string-intp-escaped'),
553 (r'[\\#' + lbrace + rbrace + ']', String.Other),
554 (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
555 ]
556 states['strings'].append((r'%[QWx]?' + lbrace, String.Other,
557 name+'-intp-string'))
558 states[name+'-string'] = [
559 (r'\\[\\' + lbrace + rbrace + ']', String.Other),
560 (r'(?<!\\)' + lbrace, String.Other, '#push'),
561 (r'(?<!\\)' + rbrace, String.Other, '#pop'),
562 (r'[\\#' + lbrace + rbrace + ']', String.Other),
563 (r'[^\\#' + lbrace + rbrace + ']+', String.Other),
564 ]
565 states['strings'].append((r'%[qsw]' + lbrace, String.Other,
566 name+'-string'))
567 states[name+'-regex'] = [
568 (r'\\[\\' + lbrace + rbrace + ']', String.Regex),
569 (r'(?<!\\)' + lbrace, String.Regex, '#push'),
570 (r'(?<!\\)' + rbrace + '[mixounse]*', String.Regex, '#pop'),
571 include('string-intp'),
572 (r'[\\#' + lbrace + rbrace + ']', String.Regex),
573 (r'[^\\#' + lbrace + rbrace + ']+', String.Regex),
574 ]
575 states['strings'].append((r'%r' + lbrace, String.Regex,
576 name+'-regex'))
577
578 # these must come after %<brace>!
579 states['strings'] += [
580 # %r regex
581 (r'(%r([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2[mixounse]*)',
582 intp_regex_callback),
583 # regular fancy strings with qsw
584 (r'%[qsw]([^a-zA-Z0-9])((?:\\\1|(?!\1).)*)\1', String.Other),
585 (r'(%[QWx]([^a-zA-Z0-9]))((?:\\\2|(?!\2).)*)(\2)',
586 intp_string_callback),
587 # special forms of fancy strings after operators or
588 # in method calls with braces
589 (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
590 bygroups(Text, String.Other, None)),
591 # and because of fixed width lookbehinds the whole thing a
592 # second time for line startings...
593 (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
594 bygroups(Text, String.Other, None)),
595 # all regular fancy strings without qsw
596 (r'(%([^a-zA-Z0-9\s]))((?:\\\2|(?!\2).)*)(\2)',
597 intp_string_callback),
598 ]
599
600 return states
601
602 tokens = {
603 'root': [
604 (r'#.*?$', Comment.Single),
605 (r'=begin\s.*?\n=end.*?$', Comment.Multiline),
606 # keywords
607 (r'(BEGIN|END|alias|begin|break|case|defined\?|'
608 r'do|else|elsif|end|ensure|for|if|in|next|redo|'
609 r'rescue|raise|retry|return|super|then|undef|unless|until|when|'
610 r'while|yield)\b', Keyword),
611 # start of function, class and module names
612 (r'(module)(\s+)([a-zA-Z_][a-zA-Z0-9_]*'
613 r'(?:::[a-zA-Z_][a-zA-Z0-9_]*)*)',
614 bygroups(Keyword, Text, Name.Namespace)),
615 (r'(def)(\s+)', bygroups(Keyword, Text), 'funcname'),
616 (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
617 (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
618 # special methods
619 (r'(initialize|new|loop|include|extend|raise|attr_reader|'
620 r'attr_writer|attr_accessor|attr|catch|throw|private|'
621 r'module_function|public|protected|true|false|nil)\b',
622 Keyword.Pseudo),
623 (r'(not|and|or)\b', Operator.Word),
624 (r'(autoload|block_given|const_defined|eql|equal|frozen|include|'
625 r'instance_of|is_a|iterator|kind_of|method_defined|nil|'
626 r'private_method_defined|protected_method_defined|'
627 r'public_method_defined|respond_to|tainted)\?', Name.Builtin),
628 (r'(chomp|chop|exit|gsub|sub)!', Name.Builtin),
629 (r'(?<!\.)(Array|Float|Integer|String|__id__|__send__|abort|'
630 r'ancestors|at_exit|autoload|binding|callcc|caller|'
631 r'catch|chomp|chop|class_eval|class_variables|'
632 r'clone|const_defined\?|const_get|const_missing|const_set|'
633 r'constants|display|dup|eval|exec|exit|extend|fail|fork|'
634 r'format|freeze|getc|gets|global_variables|gsub|'
635 r'hash|id|included_modules|inspect|instance_eval|'
636 r'instance_method|instance_methods|'
637 r'instance_variable_get|instance_variable_set|instance_variables|'
638 r'lambda|load|local_variables|loop|'
639 r'method|method_missing|methods|module_eval|name|'
640 r'object_id|open|p|print|printf|private_class_method|'
641 r'private_instance_methods|'
642 r'private_methods|proc|protected_instance_methods|'
643 r'protected_methods|public_class_method|'
644 r'public_instance_methods|public_methods|'
645 r'putc|puts|raise|rand|readline|readlines|require|'
646 r'scan|select|self|send|set_trace_func|singleton_methods|sleep|'
647 r'split|sprintf|srand|sub|syscall|system|taint|'
648 r'test|throw|to_a|to_s|trace_var|trap|untaint|untrace_var|'
649 r'warn)\b', Name.Builtin),
650 (r'__(FILE|LINE)__\b', Name.Builtin.Pseudo),
651 # normal heredocs
652 (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
653 heredoc_callback),
654 # empty string heredocs
655 (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
656 (r'__END__', Comment.Preproc, 'end-part'),
657 # multiline regex (after keywords or assignments)
658 (r'(?:^|(?<=[=<>~!:])|'
659 r'(?<=(?:\s|;)when\s)|'
660 r'(?<=(?:\s|;)or\s)|'
661 r'(?<=(?:\s|;)and\s)|'
662 r'(?<=(?:\s|;|\.)index\s)|'
663 r'(?<=(?:\s|;|\.)scan\s)|'
664 r'(?<=(?:\s|;|\.)sub\s)|'
665 r'(?<=(?:\s|;|\.)sub!\s)|'
666 r'(?<=(?:\s|;|\.)gsub\s)|'
667 r'(?<=(?:\s|;|\.)gsub!\s)|'
668 r'(?<=(?:\s|;|\.)match\s)|'
669 r'(?<=(?:\s|;)if\s)|'
670 r'(?<=(?:\s|;)elsif\s)|'
671 r'(?<=^when\s)|'
672 r'(?<=^index\s)|'
673 r'(?<=^scan\s)|'
674 r'(?<=^sub\s)|'
675 r'(?<=^gsub\s)|'
676 r'(?<=^sub!\s)|'
677 r'(?<=^gsub!\s)|'
678 r'(?<=^match\s)|'
679 r'(?<=^if\s)|'
680 r'(?<=^elsif\s)'
681 r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
682 # multiline regex (in method calls or subscripts)
683 (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
684 # multiline regex (this time the funny no whitespace rule)
685 (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex),
686 'multiline-regex'),
687 # lex numbers and ignore following regular expressions which
688 # are division operators in fact (grrrr. i hate that. any
689 # better ideas?)
690 # since pygments 0.7 we also eat a "?" operator after numbers
691 # so that the char operator does not work. Chars are not allowed
692 # there so that you can use the ternary operator.
693 # stupid example:
694 # x>=0?n[x]:""
695 (r'(0_?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
696 bygroups(Number.Oct, Text, Operator)),
697 (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
698 bygroups(Number.Hex, Text, Operator)),
699 (r'(0b[01]+(?:_[01]+)*)(\s*)([/?])?',
700 bygroups(Number.Bin, Text, Operator)),
701 (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
702 bygroups(Number.Integer, Text, Operator)),
703 # Names
704 (r'@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
705 (r'@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
706 (r'\$[a-zA-Z0-9_]+', Name.Variable.Global),
707 (r'\$[!@&`\'+~=/\\,;.<>_*$?:"]', Name.Variable.Global),
708 (r'\$-[0adFiIlpvw]', Name.Variable.Global),
709 (r'::', Operator),
710 include('strings'),
711 # chars
712 (r'\?(\\[MC]-)*' # modifiers
713 r'(\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
714 r'(?!\w)',
715 String.Char),
716 (r'[A-Z][a-zA-Z0-9_]+', Name.Constant),
717 # this is needed because ruby attributes can look
718 # like keywords (class) or like this: ` ?!?
719 (r'(\.|::)([a-zA-Z_]\w*[\!\?]?|[*%&^`~+-/\[<>=])',
720 bygroups(Operator, Name)),
721 (r'[a-zA-Z_]\w*[\!\?]?', Name),
722 (r'(\[|\]|\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|'
723 r'!~|&&?|\|\||\.{1,3})', Operator),
724 (r'[-+/*%=<>&!^|~]=?', Operator),
725 (r'[(){};,/?:\\]', Punctuation),
726 (r'\s+', Text)
727 ],
728 'funcname': [
729 (r'\(', Punctuation, 'defexpr'),
730 (r'(?:([a-zA-Z_][a-zA-Z0-9_]*)(\.))?'
731 r'([a-zA-Z_]\w*[\!\?]?|\*\*?|[-+]@?|'
732 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
733 bygroups(Name.Class, Operator, Name.Function), '#pop'),
734 (r'', Text, '#pop')
735 ],
736 'classname': [
737 (r'\(', Punctuation, 'defexpr'),
738 (r'<<', Operator, '#pop'),
739 (r'[A-Z_]\w*', Name.Class, '#pop'),
740 (r'', Text, '#pop')
741 ],
742 'defexpr': [
743 (r'(\))(\.|::)?', bygroups(Punctuation, Operator), '#pop'),
744 (r'\(', Operator, '#push'),
745 include('root')
746 ],
747 'in-intp': [
748 ('}', String.Interpol, '#pop'),
749 include('root'),
750 ],
751 'string-intp': [
752 (r'#{', String.Interpol, 'in-intp'),
753 (r'#@@?[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol),
754 (r'#\$[a-zA-Z_][a-zA-Z0-9_]*', String.Interpol)
755 ],
756 'string-intp-escaped': [
757 include('string-intp'),
758 (r'\\([\\abefnrstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})',
759 String.Escape)
760 ],
761 'interpolated-regex': [
762 include('string-intp'),
763 (r'[\\#]', String.Regex),
764 (r'[^\\#]+', String.Regex),
765 ],
766 'interpolated-string': [
767 include('string-intp'),
768 (r'[\\#]', String.Other),
769 (r'[^\\#]+', String.Other),
770 ],
771 'multiline-regex': [
772 include('string-intp'),
773 (r'\\\\', String.Regex),
774 (r'\\/', String.Regex),
775 (r'[\\#]', String.Regex),
776 (r'[^\\/#]+', String.Regex),
777 (r'/[mixounse]*', String.Regex, '#pop'),
778 ],
779 'end-part': [
780 (r'.+', Comment.Preproc, '#pop')
781 ]
782 }
783 tokens.update(gen_rubystrings_rules())
784
785 def analyse_text(text):
786 return shebang_matches(text, r'ruby(1\.\d)?')
787
788
789 class RubyConsoleLexer(Lexer):
790 """
791 For Ruby interactive console (**irb**) output like:
792
793 .. sourcecode:: rbcon
794
795 irb(main):001:0> a = 1
796 => 1
797 irb(main):002:0> puts a
798 1
799 => nil
800 """
801 name = 'Ruby irb session'
802 aliases = ['rbcon', 'irb']
803 mimetypes = ['text/x-ruby-shellsession']
804
805 _prompt_re = re.compile('irb\([a-zA-Z_][a-zA-Z0-9_]*\):\d{3}:\d+[>*"\'] '
806 '|>> |\?> ')
807
808 def get_tokens_unprocessed(self, text):
809 rblexer = RubyLexer(**self.options)
810
811 curcode = ''
812 insertions = []
813 for match in line_re.finditer(text):
814 line = match.group()
815 m = self._prompt_re.match(line)
816 if m is not None:
817 end = m.end()
818 insertions.append((len(curcode),
819 [(0, Generic.Prompt, line[:end])]))
820 curcode += line[end:]
821 else:
822 if curcode:
823 for item in do_insertions(insertions,
824 rblexer.get_tokens_unprocessed(curcode)):
825 yield item
826 curcode = ''
827 insertions = []
828 yield match.start(), Generic.Output, line
829 if curcode:
830 for item in do_insertions(insertions,
831 rblexer.get_tokens_unprocessed(curcode)):
832 yield item
833
834
835 class PerlLexer(RegexLexer):
836 """
837 For `Perl <http://www.perl.org>`_ source code.
838 """
839
840 name = 'Perl'
841 aliases = ['perl', 'pl']
842 filenames = ['*.pl', '*.pm']
843 mimetypes = ['text/x-perl', 'application/x-perl']
844
845 flags = re.DOTALL | re.MULTILINE
846 # TODO: give this to a perl guy who knows how to parse perl...
847 tokens = {
848 'balanced-regex': [
849 (r'/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*', String.Regex, '#pop'),
850 (r'!(\\\\|\\[^\\]|[^\\!])*![egimosx]*', String.Regex, '#pop'),
851 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
852 (r'{(\\\\|\\[^\\]|[^\\}])*}[egimosx]*', String.Regex, '#pop'),
853 (r'<(\\\\|\\[^\\]|[^\\>])*>[egimosx]*', String.Regex, '#pop'),
854 (r'\[(\\\\|\\[^\\]|[^\\\]])*\][egimosx]*', String.Regex, '#pop'),
855 (r'\((\\\\|\\[^\\]|[^\\\)])*\)[egimosx]*', String.Regex, '#pop'),
856 (r'@(\\\\|\\[^\\]|[^\\\@])*@[egimosx]*', String.Regex, '#pop'),
857 (r'%(\\\\|\\[^\\]|[^\\\%])*%[egimosx]*', String.Regex, '#pop'),
858 (r'\$(\\\\|\\[^\\]|[^\\\$])*\$[egimosx]*', String.Regex, '#pop'),
859 ],
860 'root': [
861 (r'\#.*?$', Comment.Single),
862 (r'^=[a-zA-Z0-9]+\s+.*?\n=cut', Comment.Multiline),
863 (r'(case|continue|do|else|elsif|for|foreach|if|last|my|'
864 r'next|our|redo|reset|then|unless|until|while|use|'
865 r'print|new|BEGIN|CHECK|INIT|END|return)\b', Keyword),
866 (r'(format)(\s+)([a-zA-Z0-9_]+)(\s*)(=)(\s*\n)',
867 bygroups(Keyword, Text, Name, Text, Punctuation, Text), 'format'),
868 (r'(eq|lt|gt|le|ge|ne|not|and|or|cmp)\b', Operator.Word),
869 # common delimiters
870 (r's/(\\\\|\\[^\\]|[^\\/])*/(\\\\|\\[^\\]|[^\\/])*/[egimosx]*',
871 String.Regex),
872 (r's!(\\\\|\\!|[^!])*!(\\\\|\\!|[^!])*![egimosx]*', String.Regex),
873 (r's\\(\\\\|[^\\])*\\(\\\\|[^\\])*\\[egimosx]*', String.Regex),
874 (r's@(\\\\|\\[^\\]|[^\\@])*@(\\\\|\\[^\\]|[^\\@])*@[egimosx]*',
875 String.Regex),
876 (r's%(\\\\|\\[^\\]|[^\\%])*%(\\\\|\\[^\\]|[^\\%])*%[egimosx]*',
877 String.Regex),
878 # balanced delimiters
879 (r's{(\\\\|\\[^\\]|[^\\}])*}\s*', String.Regex, 'balanced-regex'),
880 (r's<(\\\\|\\[^\\]|[^\\>])*>\s*', String.Regex, 'balanced-regex'),
881 (r's\[(\\\\|\\[^\\]|[^\\\]])*\]\s*', String.Regex,
882 'balanced-regex'),
883 (r's\((\\\\|\\[^\\]|[^\\\)])*\)\s*', String.Regex,
884 'balanced-regex'),
885
886 (r'm?/(\\\\|\\[^\\]|[^\\/\n])*/[gcimosx]*', String.Regex),
887 (r'm(?=[/!\\{<\[\(@%\$])', String.Regex, 'balanced-regex'),
888 (r'((?<==~)|(?<=\())\s*/(\\\\|\\[^\\]|[^\\/])*/[gcimosx]*',
889 String.Regex),
890 (r'\s+', Text),
891 (r'(abs|accept|alarm|atan2|bind|binmode|bless|caller|chdir|'
892 r'chmod|chomp|chop|chown|chr|chroot|close|closedir|connect|'
893 r'continue|cos|crypt|dbmclose|dbmopen|defined|delete|die|'
894 r'dump|each|endgrent|endhostent|endnetent|endprotoent|'
895 r'endpwent|endservent|eof|eval|exec|exists|exit|exp|fcntl|'
896 r'fileno|flock|fork|format|formline|getc|getgrent|getgrgid|'
897 r'getgrnam|gethostbyaddr|gethostbyname|gethostent|getlogin|'
898 r'getnetbyaddr|getnetbyname|getnetent|getpeername|getpgrp|'
899 r'getppid|getpriority|getprotobyname|getprotobynumber|'
900 r'getprotoent|getpwent|getpwnam|getpwuid|getservbyname|'
901 r'getservbyport|getservent|getsockname|getsockopt|glob|gmtime|'
902 r'goto|grep|hex|import|index|int|ioctl|join|keys|kill|last|'
903 r'lc|lcfirst|length|link|listen|local|localtime|log|lstat|'
904 r'map|mkdir|msgctl|msgget|msgrcv|msgsnd|my|next|no|oct|open|'
905 r'opendir|ord|our|pack|package|pipe|pop|pos|printf|'
906 r'prototype|push|quotemeta|rand|read|readdir|'
907 r'readline|readlink|readpipe|recv|redo|ref|rename|require|'
908 r'reverse|rewinddir|rindex|rmdir|scalar|seek|seekdir|'
909 r'select|semctl|semget|semop|send|setgrent|sethostent|setnetent|'
910 r'setpgrp|setpriority|setprotoent|setpwent|setservent|'
911 r'setsockopt|shift|shmctl|shmget|shmread|shmwrite|shutdown|'
912 r'sin|sleep|socket|socketpair|sort|splice|split|sprintf|sqrt|'
913 r'srand|stat|study|substr|symlink|syscall|sysopen|sysread|'
914 r'sysseek|system|syswrite|tell|telldir|tie|tied|time|times|tr|'
915 r'truncate|uc|ucfirst|umask|undef|unlink|unpack|unshift|untie|'
916 r'utime|values|vec|wait|waitpid|wantarray|warn|write'
917 r')\b', Name.Builtin),
918 (r'((__(DATA|DIE|WARN)__)|(STD(IN|OUT|ERR)))\b', Name.Builtin.Pseudo),
919 (r'<<([\'"]?)([a-zA-Z_][a-zA-Z0-9_]*)\1;?\n.*?\n\2\n', String),
920 (r'__END__', Comment.Preproc, 'end-part'),
921 (r'\$\^[ADEFHILMOPSTWX]', Name.Variable.Global),
922 (r"\$[\\\"\[\]'&`+*.,;=%~?@$!<>(^|/-](?!\w)", Name.Variable.Global),
923 (r'[$@%#]+', Name.Variable, 'varname'),
924 (r'0_?[0-7]+(_[0-7]+)*', Number.Oct),
925 (r'0x[0-9A-Fa-f]+(_[0-9A-Fa-f]+)*', Number.Hex),
926 (r'0b[01]+(_[01]+)*', Number.Bin),
927 (r'(?i)(\d*(_\d*)*\.\d+(_\d*)*|\d+(_\d*)*\.\d+(_\d*)*)(e[+-]?\d+)?',
928 Number.Float),
929 (r'(?i)\d+(_\d*)*e[+-]?\d+(_\d*)*', Number.Float),
930 (r'\d+(_\d+)*', Number.Integer),
931 (r"'(\\\\|\\[^\\]|[^'\\])*'", String),
932 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
933 (r'`(\\\\|\\[^\\]|[^`\\])*`', String.Backtick),
934 (r'<([^\s>]+)>', String.Regex),
935 (r'(q|qq|qw|qr|qx)\{', String.Other, 'cb-string'),
936 (r'(q|qq|qw|qr|qx)\(', String.Other, 'rb-string'),
937 (r'(q|qq|qw|qr|qx)\[', String.Other, 'sb-string'),
938 (r'(q|qq|qw|qr|qx)\<', String.Other, 'lt-string'),
939 (r'(q|qq|qw|qr|qx)([^a-zA-Z0-9])(.|\n)*?\2', String.Other),
940 (r'package\s+', Keyword, 'modulename'),
941 (r'sub\s+', Keyword, 'funcname'),
942 (r'(\[\]|\*\*|::|<<|>>|>=|<=>|<=|={3}|!=|=~|'
943 r'!~|&&?|\|\||\.{1,3})', Operator),
944 (r'[-+/*%=<>&^|!\\~]=?', Operator),
945 (r'[\(\)\[\]:;,<>/\?\{\}]', Punctuation), # yes, there's no shortage
946 # of punctuation in Perl!
947 (r'(?=\w)', Name, 'name'),
948 ],
949 'format': [
950 (r'\.\n', String.Interpol, '#pop'),
951 (r'[^\n]*\n', String.Interpol),
952 ],
953 'varname': [
954 (r'\s+', Text),
955 (r'\{', Punctuation, '#pop'), # hash syntax?
956 (r'\)|,', Punctuation, '#pop'), # argument specifier
957 (r'[a-zA-Z0-9_]+::', Name.Namespace),
958 (r'[a-zA-Z0-9_:]+', Name.Variable, '#pop'),
959 ],
960 'name': [
961 (r'[a-zA-Z0-9_]+::', Name.Namespace),
962 (r'[a-zA-Z0-9_:]+', Name, '#pop'),
963 (r'[A-Z_]+(?=[^a-zA-Z0-9_])', Name.Constant, '#pop'),
964 (r'(?=[^a-zA-Z0-9_])', Text, '#pop'),
965 ],
966 'modulename': [
967 (r'[a-zA-Z_]\w*', Name.Namespace, '#pop')
968 ],
969 'funcname': [
970 (r'[a-zA-Z_]\w*[\!\?]?', Name.Function),
971 (r'\s+', Text),
972 # argument declaration
973 (r'(\([$@%]*\))(\s*)', bygroups(Punctuation, Text)),
974 (r'.*?{', Punctuation, '#pop'),
975 (r';', Punctuation, '#pop'),
976 ],
977 'cb-string': [
978 (r'\\[\{\}\\]', String.Other),
979 (r'\\', String.Other),
980 (r'\{', String.Other, 'cb-string'),
981 (r'\}', String.Other, '#pop'),
982 (r'[^\{\}\\]+', String.Other)
983 ],
984 'rb-string': [
985 (r'\\[\(\)\\]', String.Other),
986 (r'\\', String.Other),
987 (r'\(', String.Other, 'rb-string'),
988 (r'\)', String.Other, '#pop'),
989 (r'[^\(\)]+', String.Other)
990 ],
991 'sb-string': [
992 (r'\\[\[\]\\]', String.Other),
993 (r'\\', String.Other),
994 (r'\[', String.Other, 'sb-string'),
995 (r'\]', String.Other, '#pop'),
996 (r'[^\[\]]+', String.Other)
997 ],
998 'lt-string': [
999 (r'\\[\<\>\\]', String.Other),
1000 (r'\\', String.Other),
1001 (r'\<', String.Other, 'lt-string'),
1002 (r'\>', String.Other, '#pop'),
1003 (r'[^\<\>]+', String.Other)
1004 ],
1005 'end-part': [
1006 (r'.+', Comment.Preproc, '#pop')
1007 ]
1008 }
1009
1010 def analyse_text(text):
1011 if shebang_matches(text, r'perl'):
1012 return True
1013 if 'my $' in text:
1014 return 0.9
1015 return 0.1 # who knows, might still be perl!
1016
1017
1018 class LuaLexer(RegexLexer):
1019 """
1020 For `Lua <http://www.lua.org>`_ source code.
1021
1022 Additional options accepted:
1023
1024 `func_name_highlighting`
1025 If given and ``True``, highlight builtin function names
1026 (default: ``True``).
1027 `disabled_modules`
1028 If given, must be a list of module names whose function names
1029 should not be highlighted. By default all modules are highlighted.
1030
1031 To get a list of allowed modules have a look into the
1032 `_luabuiltins` module:
1033
1034 .. sourcecode:: pycon
1035
1036 >>> from pygments.lexers._luabuiltins import MODULES
1037 >>> MODULES.keys()
1038 ['string', 'coroutine', 'modules', 'io', 'basic', ...]
1039 """
1040
1041 name = 'Lua'
1042 aliases = ['lua']
1043 filenames = ['*.lua', '*.wlua']
1044 mimetypes = ['text/x-lua', 'application/x-lua']
1045
1046 tokens = {
1047 'root': [
1048 # lua allows a file to start with a shebang
1049 (r'#!(.*?)$', Comment.Preproc),
1050 (r'', Text, 'base'),
1051 ],
1052 'base': [
1053 (r'(?s)--\[(=*)\[.*?\]\1\]', Comment.Multiline),
1054 ('--.*$', Comment.Single),
1055
1056 (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
1057 (r'(?i)\d+e[+-]?\d+', Number.Float),
1058 ('(?i)0x[0-9a-f]*', Number.Hex),
1059 (r'\d+', Number.Integer),
1060
1061 (r'\n', Text),
1062 (r'[^\S\n]', Text),
1063 # multiline strings
1064 (r'(?s)\[(=*)\[.*?\]\1\]', String),
1065
1066 (r'(==|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#])', Operator),
1067 (r'[\[\]\{\}\(\)\.,:;]', Punctuation),
1068 (r'(and|or|not)\b', Operator.Word),
1069
1070 ('(break|do|else|elseif|end|for|if|in|repeat|return|then|until|'
1071 r'while)\b', Keyword),
1072 (r'(local)\b', Keyword.Declaration),
1073 (r'(true|false|nil)\b', Keyword.Constant),
1074
1075 (r'(function)\b', Keyword, 'funcname'),
1076
1077 (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
1078
1079 ("'", String.Single, combined('stringescape', 'sqs')),
1080 ('"', String.Double, combined('stringescape', 'dqs'))
1081 ],
1082
1083 'funcname': [
1084 (r'\s+', Text),
1085 ('(?:([A-Za-z_][A-Za-z0-9_]*)(\.))?([A-Za-z_][A-Za-z0-9_]*)',
1086 bygroups(Name.Class, Punctuation, Name.Function), '#pop'),
1087 # inline function
1088 ('\(', Punctuation, '#pop'),
1089 ],
1090
1091 # if I understand correctly, every character is valid in a lua string,
1092 # so this state is only for later corrections
1093 'string': [
1094 ('.', String)
1095 ],
1096
1097 'stringescape': [
1098 (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
1099 ],
1100
1101 'sqs': [
1102 ("'", String, '#pop'),
1103 include('string')
1104 ],
1105
1106 'dqs': [
1107 ('"', String, '#pop'),
1108 include('string')
1109 ]
1110 }
1111
1112 def __init__(self, **options):
1113 self.func_name_highlighting = get_bool_opt(
1114 options, 'func_name_highlighting', True)
1115 self.disabled_modules = get_list_opt(options, 'disabled_modules', [])
1116
1117 self._functions = set()
1118 if self.func_name_highlighting:
1119 from pygments.lexers._luabuiltins import MODULES
1120 for mod, func in MODULES.items():
1121 if mod not in self.disabled_modules:
1122 self._functions.update(func)
1123 RegexLexer.__init__(self, **options)
1124
1125 def get_tokens_unprocessed(self, text):
1126 for index, token, value in \
1127 RegexLexer.get_tokens_unprocessed(self, text):
1128 if token is Name:
1129 if value in self._functions:
1130 yield index, Name.Builtin, value
1131 continue
1132 elif '.' in value:
1133 a, b = value.split('.')
1134 yield index, Name, a
1135 yield index + len(a), Punctuation, '.'
1136 yield index + len(a) + 1, Name, b
1137 continue
1138 yield index, token, value
1139
1140
1141 class MoonScriptLexer(LuaLexer):
1142 """
1143 For `MoonScript <http://moonscript.org.org>`_ source code.
1144
1145 *New in Pygments 1.5.*
1146 """
1147
1148 name = "MoonScript"
1149 aliases = ["moon", "moonscript"]
1150 filenames = ["*.moon"]
1151 mimetypes = ['text/x-moonscript', 'application/x-moonscript']
1152
1153 tokens = {
1154 'root': [
1155 (r'#!(.*?)$', Comment.Preproc),
1156 (r'', Text, 'base'),
1157 ],
1158 'base': [
1159 ('--.*$', Comment.Single),
1160 (r'(?i)(\d*\.\d+|\d+\.\d*)(e[+-]?\d+)?', Number.Float),
1161 (r'(?i)\d+e[+-]?\d+', Number.Float),
1162 (r'(?i)0x[0-9a-f]*', Number.Hex),
1163 (r'\d+', Number.Integer),
1164 (r'\n', Text),
1165 (r'[^\S\n]+', Text),
1166 (r'(?s)\[(=*)\[.*?\]\1\]', String),
1167 (r'(->|=>)', Name.Function),
1168 (r':[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable),
1169 (r'(==|!=|~=|<=|>=|\.\.\.|\.\.|[=+\-*/%^<>#!.\\:])', Operator),
1170 (r'[;,]', Punctuation),
1171 (r'[\[\]\{\}\(\)]', Keyword.Type),
1172 (r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Variable),
1173 (r"(class|extends|if|then|super|do|with|import|export|"
1174 r"while|elseif|return|for|in|from|when|using|else|"
1175 r"and|or|not|switch|break)\b", Keyword),
1176 (r'(true|false|nil)\b', Keyword.Constant),
1177 (r'(and|or|not)\b', Operator.Word),
1178 (r'(self)\b', Name.Builtin.Pseudo),
1179 (r'@@?([a-zA-Z_][a-zA-Z0-9_]*)?', Name.Variable.Class),
1180 (r'[A-Z]\w*', Name.Class), # proper name
1181 (r'[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)?', Name),
1182 ("'", String.Single, combined('stringescape', 'sqs')),
1183 ('"', String.Double, combined('stringescape', 'dqs'))
1184 ],
1185 'stringescape': [
1186 (r'''\\([abfnrtv\\"']|\d{1,3})''', String.Escape)
1187 ],
1188 'sqs': [
1189 ("'", String.Single, '#pop'),
1190 (".", String)
1191 ],
1192 'dqs': [
1193 ('"', String.Double, '#pop'),
1194 (".", String)
1195 ]
1196 }
1197
1198 def get_tokens_unprocessed(self, text):
1199 # set . as Operator instead of Punctuation
1200 for index, token, value in \
1201 LuaLexer.get_tokens_unprocessed(self, text):
1202 if token == Punctuation and value == ".":
1203 token = Operator
1204 yield index, token, value
1205
1206
1207 class CrocLexer(RegexLexer):
1208 """
1209 For `Croc <http://jfbillingsley.com/croc>`_ source.
1210 """
1211 name = 'Croc'
1212 filenames = ['*.croc']
1213 aliases = ['croc']
1214 mimetypes = ['text/x-crocsrc']
1215
1216 tokens = {
1217 'root': [
1218 (r'\n', Text),
1219 (r'\s+', Text),
1220 # Comments
1221 (r'//(.*?)\n', Comment.Single),
1222 (r'/\*', Comment.Multiline, 'nestedcomment'),
1223 # Keywords
1224 (r'(as|assert|break|case|catch|class|continue|default'
1225 r'|do|else|finally|for|foreach|function|global|namespace'
1226 r'|if|import|in|is|local|module|return|scope|super|switch'
1227 r'|this|throw|try|vararg|while|with|yield)\b', Keyword),
1228 (r'(false|true|null)\b', Keyword.Constant),
1229 # FloatLiteral
1230 (r'([0-9][0-9_]*)(?=[.eE])(\.[0-9][0-9_]*)?([eE][+\-]?[0-9_]+)?',
1231 Number.Float),
1232 # IntegerLiteral
1233 # -- Binary
1234 (r'0[bB][01][01_]*', Number),
1235 # -- Hexadecimal
1236 (r'0[xX][0-9a-fA-F][0-9a-fA-F_]*', Number.Hex),
1237 # -- Decimal
1238 (r'([0-9][0-9_]*)(?![.eE])', Number.Integer),
1239 # CharacterLiteral
1240 (r"""'(\\['"\\nrt]|\\x[0-9a-fA-F]{2}|\\[0-9]{1,3}"""
1241 r"""|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|.)'""",
1242 String.Char
1243 ),
1244 # StringLiteral
1245 # -- WysiwygString
1246 (r'@"(""|[^"])*"', String),
1247 (r'@`(``|[^`])*`', String),
1248 (r"@'(''|[^'])*'", String),
1249 # -- DoubleQuotedString
1250 (r'"(\\\\|\\"|[^"])*"', String),
1251 # Tokens
1252 (
1253 r'(~=|\^=|%=|\*=|==|!=|>>>=|>>>|>>=|>>|>=|<=>|\?=|-\>'
1254 r'|<<=|<<|<=|\+\+|\+=|--|-=|\|\||\|=|&&|&=|\.\.|/=)'
1255 r'|[-/.&$@|\+<>!()\[\]{}?,;:=*%^~#\\]', Punctuation
1256 ),
1257 # Identifier
1258 (r'[a-zA-Z_]\w*', Name),
1259 ],
1260 'nestedcomment': [
1261 (r'[^*/]+', Comment.Multiline),
1262 (r'/\*', Comment.Multiline, '#push'),
1263 (r'\*/', Comment.Multiline, '#pop'),
1264 (r'[*/]', Comment.Multiline),
1265 ],
1266 }
1267
1268
1269 class MiniDLexer(CrocLexer):
1270 """
1271 For MiniD source. MiniD is now known as Croc.
1272 """
1273 name = 'MiniD'
1274 filenames = ['*.md']
1275 aliases = ['minid']
1276 mimetypes = ['text/x-minidsrc']
1277
1278
1279 class IoLexer(RegexLexer):
1280 """
1281 For `Io <http://iolanguage.com/>`_ (a small, prototype-based
1282 programming language) source.
1283
1284 *New in Pygments 0.10.*
1285 """
1286 name = 'Io'
1287 filenames = ['*.io']
1288 aliases = ['io']
1289 mimetypes = ['text/x-iosrc']
1290 tokens = {
1291 'root': [
1292 (r'\n', Text),
1293 (r'\s+', Text),
1294 # Comments
1295 (r'//(.*?)\n', Comment.Single),
1296 (r'#(.*?)\n', Comment.Single),
1297 (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
1298 (r'/\+', Comment.Multiline, 'nestedcomment'),
1299 # DoubleQuotedString
1300 (r'"(\\\\|\\"|[^"])*"', String),
1301 # Operators
1302 (r'::=|:=|=|\(|\)|;|,|\*|-|\+|>|<|@|!|/|\||\^|\.|%|&|\[|\]|\{|\}',
1303 Operator),
1304 # keywords
1305 (r'(clone|do|doFile|doString|method|for|if|else|elseif|then)\b',
1306 Keyword),
1307 # constants
1308 (r'(nil|false|true)\b', Name.Constant),
1309 # names
1310 (r'(Object|list|List|Map|args|Sequence|Coroutine|File)\b',
1311 Name.Builtin),
1312 ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
1313 # numbers
1314 (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
1315 (r'\d+', Number.Integer)
1316 ],
1317 'nestedcomment': [
1318 (r'[^+/]+', Comment.Multiline),
1319 (r'/\+', Comment.Multiline, '#push'),
1320 (r'\+/', Comment.Multiline, '#pop'),
1321 (r'[+/]', Comment.Multiline),
1322 ]
1323 }
1324
1325
1326 class TclLexer(RegexLexer):
1327 """
1328 For Tcl source code.
1329
1330 *New in Pygments 0.10.*
1331 """
1332
1333 keyword_cmds_re = (
1334 r'\b(after|apply|array|break|catch|continue|elseif|else|error|'
1335 r'eval|expr|for|foreach|global|if|namespace|proc|rename|return|'
1336 r'set|switch|then|trace|unset|update|uplevel|upvar|variable|'
1337 r'vwait|while)\b'
1338 )
1339
1340 builtin_cmds_re = (
1341 r'\b(append|bgerror|binary|cd|chan|clock|close|concat|dde|dict|'
1342 r'encoding|eof|exec|exit|fblocked|fconfigure|fcopy|file|'
1343 r'fileevent|flush|format|gets|glob|history|http|incr|info|interp|'
1344 r'join|lappend|lassign|lindex|linsert|list|llength|load|loadTk|'
1345 r'lrange|lrepeat|lreplace|lreverse|lsearch|lset|lsort|mathfunc|'
1346 r'mathop|memory|msgcat|open|package|pid|pkg::create|pkg_mkIndex|'
1347 r'platform|platform::shell|puts|pwd|re_syntax|read|refchan|'
1348 r'regexp|registry|regsub|scan|seek|socket|source|split|string|'
1349 r'subst|tell|time|tm|unknown|unload)\b'
1350 )
1351
1352 name = 'Tcl'
1353 aliases = ['tcl']
1354 filenames = ['*.tcl']
1355 mimetypes = ['text/x-tcl', 'text/x-script.tcl', 'application/x-tcl']
1356
1357 def _gen_command_rules(keyword_cmds_re, builtin_cmds_re, context=""):
1358 return [
1359 (keyword_cmds_re, Keyword, 'params' + context),
1360 (builtin_cmds_re, Name.Builtin, 'params' + context),
1361 (r'([\w\.\-]+)', Name.Variable, 'params' + context),
1362 (r'#', Comment, 'comment'),
1363 ]
1364
1365 tokens = {
1366 'root': [
1367 include('command'),
1368 include('basic'),
1369 include('data'),
1370 (r'}', Keyword), # HACK: somehow we miscounted our braces
1371 ],
1372 'command': _gen_command_rules(keyword_cmds_re, builtin_cmds_re),
1373 'command-in-brace': _gen_command_rules(keyword_cmds_re,
1374 builtin_cmds_re,
1375 "-in-brace"),
1376 'command-in-bracket': _gen_command_rules(keyword_cmds_re,
1377 builtin_cmds_re,
1378 "-in-bracket"),
1379 'command-in-paren': _gen_command_rules(keyword_cmds_re,
1380 builtin_cmds_re,
1381 "-in-paren"),
1382 'basic': [
1383 (r'\(', Keyword, 'paren'),
1384 (r'\[', Keyword, 'bracket'),
1385 (r'\{', Keyword, 'brace'),
1386 (r'"', String.Double, 'string'),
1387 (r'(eq|ne|in|ni)\b', Operator.Word),
1388 (r'!=|==|<<|>>|<=|>=|&&|\|\||\*\*|[-+~!*/%<>&^|?:]', Operator),
1389 ],
1390 'data': [
1391 (r'\s+', Text),
1392 (r'0x[a-fA-F0-9]+', Number.Hex),
1393 (r'0[0-7]+', Number.Oct),
1394 (r'\d+\.\d+', Number.Float),
1395 (r'\d+', Number.Integer),
1396 (r'\$([\w\.\-\:]+)', Name.Variable),
1397 (r'([\w\.\-\:]+)', Text),
1398 ],
1399 'params': [
1400 (r';', Keyword, '#pop'),
1401 (r'\n', Text, '#pop'),
1402 (r'(else|elseif|then)\b', Keyword),
1403 include('basic'),
1404 include('data'),
1405 ],
1406 'params-in-brace': [
1407 (r'}', Keyword, ('#pop', '#pop')),
1408 include('params')
1409 ],
1410 'params-in-paren': [
1411 (r'\)', Keyword, ('#pop', '#pop')),
1412 include('params')
1413 ],
1414 'params-in-bracket': [
1415 (r'\]', Keyword, ('#pop', '#pop')),
1416 include('params')
1417 ],
1418 'string': [
1419 (r'\[', String.Double, 'string-square'),
1420 (r'(?s)(\\\\|\\[0-7]+|\\.|[^"\\])', String.Double),
1421 (r'"', String.Double, '#pop')
1422 ],
1423 'string-square': [
1424 (r'\[', String.Double, 'string-square'),
1425 (r'(?s)(\\\\|\\[0-7]+|\\.|\\\n|[^\]\\])', String.Double),
1426 (r'\]', String.Double, '#pop')
1427 ],
1428 'brace': [
1429 (r'}', Keyword, '#pop'),
1430 include('command-in-brace'),
1431 include('basic'),
1432 include('data'),
1433 ],
1434 'paren': [
1435 (r'\)', Keyword, '#pop'),
1436 include('command-in-paren'),
1437 include('basic'),
1438 include('data'),
1439 ],
1440 'bracket': [
1441 (r'\]', Keyword, '#pop'),
1442 include('command-in-bracket'),
1443 include('basic'),
1444 include('data'),
1445 ],
1446 'comment': [
1447 (r'.*[^\\]\n', Comment, '#pop'),
1448 (r'.*\\\n', Comment),
1449 ],
1450 }
1451
1452 def analyse_text(text):
1453 return shebang_matches(text, r'(tcl)')
1454
1455
1456 class FactorLexer(RegexLexer):
1457 """
1458 Lexer for the `Factor <http://factorcode.org>`_ language.
1459
1460 *New in Pygments 1.4.*
1461 """
1462 name = 'Factor'
1463 aliases = ['factor']
1464 filenames = ['*.factor']
1465 mimetypes = ['text/x-factor']
1466
1467 flags = re.MULTILINE | re.UNICODE
1468
1469 builtin_kernel = (
1470 r'(?:or|2bi|2tri|while|wrapper|nip|4dip|wrapper\\?|bi\\*|'
1471 r'callstack>array|both\\?|hashcode|die|dupd|callstack|'
1472 r'callstack\\?|3dup|tri@|pick|curry|build|\\?execute|3bi|'
1473 r'prepose|>boolean|\\?if|clone|eq\\?|tri\\*|\\?|=|swapd|'
1474 r'2over|2keep|3keep|clear|2dup|when|not|tuple\\?|dup|2bi\\*|'
1475 r'2tri\\*|call|tri-curry|object|bi@|do|unless\\*|if\\*|loop|'
1476 r'bi-curry\\*|drop|when\\*|assert=|retainstack|assert\\?|-rot|'
1477 r'execute|2bi@|2tri@|boa|with|either\\?|3drop|bi|curry\\?|'
1478 r'datastack|until|3dip|over|3curry|tri-curry\\*|tri-curry@|swap|'
1479 r'and|2nip|throw|bi-curry|\\(clone\\)|hashcode\\*|compose|2dip|if|3tri|'
1480 r'unless|compose\\?|tuple|keep|2curry|equal\\?|assert|tri|2drop|'
1481 r'most|<wrapper>|boolean\\?|identity-hashcode|identity-tuple\\?|'
1482 r'null|new|dip|bi-curry@|rot|xor|identity-tuple|boolean)\s'
1483 )
1484
1485 builtin_assocs = (
1486 r'(?:\\?at|assoc\\?|assoc-clone-like|assoc=|delete-at\\*|'
1487 r'assoc-partition|extract-keys|new-assoc|value\\?|assoc-size|'
1488 r'map>assoc|push-at|assoc-like|key\\?|assoc-intersect|'
1489 r'assoc-refine|update|assoc-union|assoc-combine|at\\*|'
1490 r'assoc-empty\\?|at\\+|set-at|assoc-all\\?|assoc-subset\\?|'
1491 r'assoc-hashcode|change-at|assoc-each|assoc-diff|zip|values|'
1492 r'value-at|rename-at|inc-at|enum\\?|at|cache|assoc>map|<enum>|'
1493 r'assoc|assoc-map|enum|value-at\\*|assoc-map-as|>alist|'
1494 r'assoc-filter-as|clear-assoc|assoc-stack|maybe-set-at|'
1495 r'substitute|assoc-filter|2cache|delete-at|assoc-find|keys|'
1496 r'assoc-any\\?|unzip)\s'
1497 )
1498
1499 builtin_combinators = (
1500 r'(?:case|execute-effect|no-cond|no-case\\?|3cleave>quot|2cleave|'
1501 r'cond>quot|wrong-values\\?|no-cond\\?|cleave>quot|no-case|'
1502 r'case>quot|3cleave|wrong-values|to-fixed-point|alist>quot|'
1503 r'case-find|cond|cleave|call-effect|2cleave>quot|recursive-hashcode|'
1504 r'linear-case-quot|spread|spread>quot)\s'
1505 )
1506
1507 builtin_math = (
1508 r'(?:number=|if-zero|next-power-of-2|each-integer|\\?1\\+|'
1509 r'fp-special\\?|imaginary-part|unless-zero|float>bits|number\\?|'
1510 r'fp-infinity\\?|bignum\\?|fp-snan\\?|denominator|fp-bitwise=|\\*|'
1511 r'\\+|power-of-2\\?|-|u>=|/|>=|bitand|log2-expects-positive|<|'
1512 r'log2|>|integer\\?|number|bits>double|2/|zero\\?|(find-integer)|'
1513 r'bits>float|float\\?|shift|ratio\\?|even\\?|ratio|fp-sign|bitnot|'
1514 r'>fixnum|complex\\?|/i|/f|byte-array>bignum|when-zero|sgn|>bignum|'
1515 r'next-float|u<|u>|mod|recip|rational|find-last-integer|>float|'
1516 r'(all-integers\\?)|2^|times|integer|fixnum\\?|neg|fixnum|sq|'
1517 r'bignum|(each-integer)|bit\\?|fp-qnan\\?|find-integer|complex|'
1518 r'<fp-nan>|real|double>bits|bitor|rem|fp-nan-payload|all-integers\\?|'
1519 r'real-part|log2-expects-positive\\?|prev-float|align|unordered\\?|'
1520 r'float|fp-nan\\?|abs|bitxor|u<=|odd\\?|<=|/mod|rational\\?|>integer|'
1521 r'real\\?|numerator)\s'
1522 )
1523
1524 builtin_sequences = (
1525 r'(?:member-eq\\?|append|assert-sequence=|find-last-from|trim-head-slice|'
1526 r'clone-like|3sequence|assert-sequence\\?|map-as|last-index-from|'
1527 r'reversed|index-from|cut\\*|pad-tail|remove-eq!|concat-as|'
1528 r'but-last|snip|trim-tail|nths|nth|2selector|sequence|slice\\?|'
1529 r'<slice>|partition|remove-nth|tail-slice|empty\\?|tail\\*|'
1530 r'if-empty|find-from|virtual-sequence\\?|member\\?|set-length|'
1531 r'drop-prefix|unclip|unclip-last-slice|iota|map-sum|'
1532 r'bounds-error\\?|sequence-hashcode-step|selector-for|'
1533 r'accumulate-as|map|start|midpoint@|\\(accumulate\\)|rest-slice|'
1534 r'prepend|fourth|sift|accumulate!|new-sequence|follow|map!|'
1535 r'like|first4|1sequence|reverse|slice|unless-empty|padding|'
1536 r'virtual@|repetition\\?|set-last|index|4sequence|max-length|'
1537 r'set-second|immutable-sequence|first2|first3|replicate-as|'
1538 r'reduce-index|unclip-slice|supremum|suffix!|insert-nth|'
1539 r'trim-tail-slice|tail|3append|short|count|suffix|concat|'
1540 r'flip|filter|sum|immutable\\?|reverse!|2sequence|map-integers|'
1541 r'delete-all|start\\*|indices|snip-slice|check-slice|sequence\\?|'
1542 r'head|map-find|filter!|append-as|reduce|sequence=|halves|'
1543 r'collapse-slice|interleave|2map|filter-as|binary-reduce|'
1544 r'slice-error\\?|product|bounds-check\\?|bounds-check|harvest|'
1545 r'immutable|virtual-exemplar|find|produce|remove|pad-head|last|'
1546 r'replicate|set-fourth|remove-eq|shorten|reversed\\?|'
1547 r'map-find-last|3map-as|2unclip-slice|shorter\\?|3map|find-last|'
1548 r'head-slice|pop\\*|2map-as|tail-slice\\*|but-last-slice|'
1549 r'2map-reduce|iota\\?|collector-for|accumulate|each|selector|'
1550 r'append!|new-resizable|cut-slice|each-index|head-slice\\*|'
1551 r'2reverse-each|sequence-hashcode|pop|set-nth|\\?nth|'
1552 r'<flat-slice>|second|join|when-empty|collector|'
1553 r'immutable-sequence\\?|<reversed>|all\\?|3append-as|'
1554 r'virtual-sequence|subseq\\?|remove-nth!|push-either|new-like|'
1555 r'length|last-index|push-if|2all\\?|lengthen|assert-sequence|'
1556 r'copy|map-reduce|move|third|first|3each|tail\\?|set-first|'
1557 r'prefix|bounds-error|any\\?|<repetition>|trim-slice|exchange|'
1558 r'surround|2reduce|cut|change-nth|min-length|set-third|produce-as|'
1559 r'push-all|head\\?|delete-slice|rest|sum-lengths|2each|head\\*|'
1560 r'infimum|remove!|glue|slice-error|subseq|trim|replace-slice|'
1561 r'push|repetition|map-index|trim-head|unclip-last|mismatch)\s'
1562 )
1563
1564 builtin_namespaces = (
1565 r'(?:global|\\+@|change|set-namestack|change-global|init-namespaces|'
1566 r'on|off|set-global|namespace|set|with-scope|bind|with-variable|'
1567 r'inc|dec|counter|initialize|namestack|get|get-global|make-assoc)\s'
1568 )
1569
1570 builtin_arrays = (
1571 r'(?:<array>|2array|3array|pair|>array|1array|4array|pair\\?|'
1572 r'array|resize-array|array\\?)\s'
1573 )
1574
1575 builtin_io = (
1576 r'(?:\\+character\\+|bad-seek-type\\?|readln|each-morsel|stream-seek|'
1577 r'read|print|with-output-stream|contents|write1|stream-write1|'
1578 r'stream-copy|stream-element-type|with-input-stream|'
1579 r'stream-print|stream-read|stream-contents|stream-tell|'
1580 r'tell-output|bl|seek-output|bad-seek-type|nl|stream-nl|write|'
1581 r'flush|stream-lines|\\+byte\\+|stream-flush|read1|'
1582 r'seek-absolute\\?|stream-read1|lines|stream-readln|'
1583 r'stream-read-until|each-line|seek-end|with-output-stream\\*|'
1584 r'seek-absolute|with-streams|seek-input|seek-relative\\?|'
1585 r'input-stream|stream-write|read-partial|seek-end\\?|'
1586 r'seek-relative|error-stream|read-until|with-input-stream\\*|'
1587 r'with-streams\\*|tell-input|each-block|output-stream|'
1588 r'stream-read-partial|each-stream-block|each-stream-line)\s'
1589 )
1590
1591 builtin_strings = (
1592 r'(?:resize-string|>string|<string>|1string|string|string\\?)\s'
1593 )
1594
1595 builtin_vectors = (
1596 r'(?:vector\\?|<vector>|\\?push|vector|>vector|1vector)\s'
1597 )
1598
1599 builtin_continuations = (
1600 r'(?:with-return|restarts|return-continuation|with-datastack|'
1601 r'recover|rethrow-restarts|<restart>|ifcc|set-catchstack|'
1602 r'>continuation<|cleanup|ignore-errors|restart\\?|'
1603 r'compute-restarts|attempt-all-error|error-thread|continue|'
1604 r'<continuation>|attempt-all-error\\?|condition\\?|'
1605 r'<condition>|throw-restarts|error|catchstack|continue-with|'
1606 r'thread-error-hook|continuation|rethrow|callcc1|'
1607 r'error-continuation|callcc0|attempt-all|condition|'
1608 r'continuation\\?|restart|return)\s'
1609 )
1610
1611 tokens = {
1612 'root': [
1613 # TODO: (( inputs -- outputs ))
1614 # TODO: << ... >>
1615
1616 # defining words
1617 (r'(\s*)(:|::|MACRO:|MEMO:)(\s+)(\S+)',
1618 bygroups(Text, Keyword, Text, Name.Function)),
1619 (r'(\s*)(M:)(\s+)(\S+)(\s+)(\S+)',
1620 bygroups(Text, Keyword, Text, Name.Class, Text, Name.Function)),
1621 (r'(\s*)(GENERIC:)(\s+)(\S+)',
1622 bygroups(Text, Keyword, Text, Name.Function)),
1623 (r'(\s*)(HOOK:|GENERIC#)(\s+)(\S+)(\s+)(\S+)',
1624 bygroups(Text, Keyword, Text, Name.Function, Text, Name.Function)),
1625 (r'(\()(\s+)', bygroups(Name.Function, Text), 'stackeffect'),
1626 (r'\;\s', Keyword),
1627
1628 # imports and namespaces
1629 (r'(USING:)((?:\s|\\\s)+)',
1630 bygroups(Keyword.Namespace, Text), 'import'),
1631 (r'(USE:)(\s+)(\S+)',
1632 bygroups(Keyword.Namespace, Text, Name.Namespace)),
1633 (r'(UNUSE:)(\s+)(\S+)',
1634 bygroups(Keyword.Namespace, Text, Name.Namespace)),
1635 (r'(QUALIFIED:)(\s+)(\S+)',
1636 bygroups(Keyword.Namespace, Text, Name.Namespace)),
1637 (r'(QUALIFIED-WITH:)(\s+)(\S+)',
1638 bygroups(Keyword.Namespace, Text, Name.Namespace)),
1639 (r'(FROM:|EXCLUDE:)(\s+)(\S+)(\s+)(=>)',
1640 bygroups(Keyword.Namespace, Text, Name.Namespace, Text, Text)),
1641 (r'(IN:)(\s+)(\S+)',
1642 bygroups(Keyword.Namespace, Text, Name.Namespace)),
1643 (r'(?:ALIAS|DEFER|FORGET|POSTPONE):', Keyword.Namespace),
1644
1645 # tuples and classes
1646 (r'(TUPLE:)(\s+)(\S+)(\s+<\s+)(\S+)',
1647 bygroups(Keyword, Text, Name.Class, Text, Name.Class), 'slots'),
1648 (r'(TUPLE:)(\s+)(\S+)',
1649 bygroups(Keyword, Text, Name.Class), 'slots'),
1650 (r'(UNION:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)),
1651 (r'(INTERSECTION:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Class)),
1652 (r'(PREDICATE:)(\s+)(\S+)(\s+<\s+)(\S+)',
1653 bygroups(Keyword, Text, Name.Class, Text, Name.Class)),
1654 (r'(C:)(\s+)(\S+)(\s+)(\S+)',
1655 bygroups(Keyword, Text, Name.Function, Text, Name.Class)),
1656 (r'INSTANCE:', Keyword),
1657 (r'SLOT:', Keyword),
1658 (r'MIXIN:', Keyword),
1659 (r'(?:SINGLETON|SINGLETONS):', Keyword),
1660
1661 # other syntax
1662 (r'CONSTANT:', Keyword),
1663 (r'(?:SYMBOL|SYMBOLS):', Keyword),
1664 (r'ERROR:', Keyword),
1665 (r'SYNTAX:', Keyword),
1666 (r'(HELP:)(\s+)(\S+)', bygroups(Keyword, Text, Name.Function)),
1667 (r'(MAIN:)(\s+)(\S+)',
1668 bygroups(Keyword.Namespace, Text, Name.Function)),
1669 (r'(?:ALIEN|TYPEDEF|FUNCTION|STRUCT):', Keyword),
1670
1671 # vocab.private
1672 # TODO: words inside vocab.private should have red names?
1673 (r'(?:<PRIVATE|PRIVATE>)', Keyword.Namespace),
1674
1675 # strings
1676 (r'"""\s+(?:.|\n)*?\s+"""', String),
1677 (r'"(?:\\\\|\\"|[^"])*"', String),
1678 (r'CHAR:\s+(\\[\\abfnrstv]*|\S)\s', String.Char),
1679
1680 # comments
1681 (r'\!\s+.*$', Comment),
1682 (r'#\!\s+.*$', Comment),
1683
1684 # boolean constants
1685 (r'(t|f)\s', Name.Constant),
1686
1687 # numbers
1688 (r'-?\d+\.\d+\s', Number.Float),
1689 (r'-?\d+\s', Number.Integer),
1690 (r'HEX:\s+[a-fA-F\d]+\s', Number.Hex),
1691 (r'BIN:\s+[01]+\s', Number.Integer),
1692 (r'OCT:\s+[0-7]+\s', Number.Oct),
1693
1694 # operators
1695 (r'[-+/*=<>^]\s', Operator),
1696
1697 # keywords
1698 (r'(?:deprecated|final|foldable|flushable|inline|recursive)\s',
1699 Keyword),
1700
1701 # builtins
1702 (builtin_kernel, Name.Builtin),
1703 (builtin_assocs, Name.Builtin),
1704 (builtin_combinators, Name.Builtin),
1705 (builtin_math, Name.Builtin),
1706 (builtin_sequences, Name.Builtin),
1707 (builtin_namespaces, Name.Builtin),
1708 (builtin_arrays, Name.Builtin),
1709 (builtin_io, Name.Builtin),
1710 (builtin_strings, Name.Builtin),
1711 (builtin_vectors, Name.Builtin),
1712 (builtin_continuations, Name.Builtin),
1713
1714 # whitespaces - usually not relevant
1715 (r'\s+', Text),
1716
1717 # everything else is text
1718 (r'\S+', Text),
1719 ],
1720
1721 'stackeffect': [
1722 (r'\s*\(', Name.Function, 'stackeffect'),
1723 (r'\)', Name.Function, '#pop'),
1724 (r'\-\-', Name.Function),
1725 (r'\s+', Text),
1726 (r'\S+', Name.Variable),
1727 ],
1728
1729 'slots': [
1730 (r'\s+', Text),
1731 (r';\s', Keyword, '#pop'),
1732 (r'\S+', Name.Variable),
1733 ],
1734
1735 'import': [
1736 (r';', Keyword, '#pop'),
1737 (r'\S+', Name.Namespace),
1738 (r'\s+', Text),
1739 ],
1740 }
1741
1742
1743 class FancyLexer(RegexLexer):
1744 """
1745 Pygments Lexer For `Fancy <http://www.fancy-lang.org/>`_.
1746
1747 Fancy is a self-hosted, pure object-oriented, dynamic,
1748 class-based, concurrent general-purpose programming language
1749 running on Rubinius, the Ruby VM.
1750
1751 *New in Pygments 1.5.*
1752 """
1753 name = 'Fancy'
1754 filenames = ['*.fy', '*.fancypack']
1755 aliases = ['fancy', 'fy']
1756 mimetypes = ['text/x-fancysrc']
1757
1758 tokens = {
1759 # copied from PerlLexer:
1760 'balanced-regex': [
1761 (r'/(\\\\|\\/|[^/])*/[egimosx]*', String.Regex, '#pop'),
1762 (r'!(\\\\|\\!|[^!])*![egimosx]*', String.Regex, '#pop'),
1763 (r'\\(\\\\|[^\\])*\\[egimosx]*', String.Regex, '#pop'),
1764 (r'{(\\\\|\\}|[^}])*}[egimosx]*', String.Regex, '#pop'),
1765 (r'<(\\\\|\\>|[^>])*>[egimosx]*', String.Regex, '#pop'),
1766 (r'\[(\\\\|\\\]|[^\]])*\][egimosx]*', String.Regex, '#pop'),
1767 (r'\((\\\\|\\\)|[^\)])*\)[egimosx]*', String.Regex, '#pop'),
1768 (r'@(\\\\|\\\@|[^\@])*@[egimosx]*', String.Regex, '#pop'),
1769 (r'%(\\\\|\\\%|[^\%])*%[egimosx]*', String.Regex, '#pop'),
1770 (r'\$(\\\\|\\\$|[^\$])*\$[egimosx]*', String.Regex, '#pop'),
1771 ],
1772 'root': [
1773 (r'\s+', Text),
1774
1775 # balanced delimiters (copied from PerlLexer):
1776 (r's{(\\\\|\\}|[^}])*}\s*', String.Regex, 'balanced-regex'),
1777 (r's<(\\\\|\\>|[^>])*>\s*', String.Regex, 'balanced-regex'),
1778 (r's\[(\\\\|\\\]|[^\]])*\]\s*', String.Regex, 'balanced-regex'),
1779 (r's\((\\\\|\\\)|[^\)])*\)\s*', String.Regex, 'balanced-regex'),
1780 (r'm?/(\\\\|\\/|[^/\n])*/[gcimosx]*', String.Regex),
1781 (r'm(?=[/!\\{<\[\(@%\$])', String.Regex, 'balanced-regex'),
1782
1783 # Comments
1784 (r'#(.*?)\n', Comment.Single),
1785 # Symbols
1786 (r'\'([^\'\s\[\]\(\)\{\}]+|\[\])', String.Symbol),
1787 # Multi-line DoubleQuotedString
1788 (r'"""(\\\\|\\"|[^"])*"""', String),
1789 # DoubleQuotedString
1790 (r'"(\\\\|\\"|[^"])*"', String),
1791 # keywords
1792 (r'(def|class|try|catch|finally|retry|return|return_local|match|'
1793 r'case|->|=>)\b', Keyword),
1794 # constants
1795 (r'(self|super|nil|false|true)\b', Name.Constant),
1796 (r'[(){};,/?\|:\\]', Punctuation),
1797 # names
1798 (r'(Object|Array|Hash|Directory|File|Class|String|Number|'
1799 r'Enumerable|FancyEnumerable|Block|TrueClass|NilClass|'
1800 r'FalseClass|Tuple|Symbol|Stack|Set|FancySpec|Method|Package|'
1801 r'Range)\b', Name.Builtin),
1802 # functions
1803 (r'[a-zA-Z]([a-zA-Z0-9_]|[-+?!=*/^><%])*:', Name.Function),
1804 # operators, must be below functions
1805 (r'[-+*/~,<>=&!?%^\[\]\.$]+', Operator),
1806 ('[A-Z][a-zA-Z0-9_]*', Name.Constant),
1807 ('@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Instance),
1808 ('@@[a-zA-Z_][a-zA-Z0-9_]*', Name.Variable.Class),
1809 ('@@?', Operator),
1810 ('[a-zA-Z_][a-zA-Z0-9_]*', Name),
1811 # numbers - / checks are necessary to avoid mismarking regexes,
1812 # see comment in RubyLexer
1813 (r'(0[oO]?[0-7]+(?:_[0-7]+)*)(\s*)([/?])?',
1814 bygroups(Number.Oct, Text, Operator)),
1815 (r'(0[xX][0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*)(\s*)([/?])?',
1816 bygroups(Number.Hex, Text, Operator)),
1817 (r'(0[bB][01]+(?:_[01]+)*)(\s*)([/?])?',
1818 bygroups(Number.Bin, Text, Operator)),
1819 (r'([\d]+(?:_\d+)*)(\s*)([/?])?',
1820 bygroups(Number.Integer, Text, Operator)),
1821 (r'\d+([eE][+-]?[0-9]+)|\d+\.\d+([eE][+-]?[0-9]+)?', Number.Float),
1822 (r'\d+', Number.Integer)
1823 ]
1824 }
1825
1826
1827 class DgLexer(RegexLexer):
1828 """
1829 Lexer for `dg <http://pyos.github.com/dg>`_,
1830 a functional and object-oriented programming language
1831 running on the CPython 3 VM.
1832
1833 *New in Pygments 1.6.*
1834 """
1835 name = 'dg'
1836 aliases = ['dg']
1837 filenames = ['*.dg']
1838 mimetypes = ['text/x-dg']
1839
1840 tokens = {
1841 'root': [
1842 # Whitespace:
1843 (r'\s+', Text),
1844 (r'#.*?$', Comment.Single),
1845 # Lexemes:
1846 # Numbers
1847 (r'0[bB][01]+', Number.Bin),
1848 (r'0[oO][0-7]+', Number.Oct),
1849 (r'0[xX][\da-fA-F]+', Number.Hex),
1850 (r'[+-]?\d+\.\d+([eE][+-]?\d+)?[jJ]?', Number.Float),
1851 (r'[+-]?\d+[eE][+-]?\d+[jJ]?', Number.Float),
1852 (r'[+-]?\d+[jJ]?', Number.Integer),
1853 # Character/String Literals
1854 (r"[br]*'''", String, combined('stringescape', 'tsqs', 'string')),
1855 (r'[br]*"""', String, combined('stringescape', 'tdqs', 'string')),
1856 (r"[br]*'", String, combined('stringescape', 'sqs', 'string')),
1857 (r'[br]*"', String, combined('stringescape', 'dqs', 'string')),
1858 # Operators
1859 (r"`\w+'*`", Operator), # Infix links
1860 # Reserved infix links
1861 (r'\b(or|and|if|else|where|is|in)\b', Operator.Word),
1862 (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
1863 # Identifiers
1864 # Python 3 types
1865 (r"(?<!\.)(bool|bytearray|bytes|classmethod|complex|dict'?|"
1866 r"float|frozenset|int|list'?|memoryview|object|property|range|"
1867 r"set'?|slice|staticmethod|str|super|tuple'?|type)"
1868 r"(?!['\w])", Name.Builtin),
1869 # Python 3 builtins + some more
1870 (r'(?<!\.)(__import__|abs|all|any|bin|bind|chr|cmp|compile|complex|'
1871 r'delattr|dir|divmod|drop|dropwhile|enumerate|eval|filter|flip|'
1872 r'foldl1?|format|fst|getattr|globals|hasattr|hash|head|hex|id|'
1873 r'init|input|isinstance|issubclass|iter|iterate|last|len|locals|'
1874 r'map|max|min|next|oct|open|ord|pow|print|repr|reversed|round|'
1875 r'setattr|scanl1?|snd|sorted|sum|tail|take|takewhile|vars|zip)'
1876 r"(?!['\w])", Name.Builtin),
1877 (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
1878 Name.Builtin.Pseudo),
1879 (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
1880 Name.Exception),
1881 (r"(?<!\.)(KeyboardInterrupt|SystemExit|StopIteration|"
1882 r"GeneratorExit)(?!['\w])", Name.Exception),
1883 # Compiler-defined identifiers
1884 (r"(?<![\.\w])(import|inherit|for|while|switch|not|raise|unsafe|"
1885 r"yield|with)(?!['\w])", Keyword.Reserved),
1886 # Other links
1887 (r"[A-Z_']+\b", Name),
1888 (r"[A-Z][\w']*\b", Keyword.Type),
1889 (r"\w+'*", Name),
1890 # Blocks
1891 (r'[()]', Punctuation),
1892 ],
1893 'stringescape': [
1894 (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'
1895 r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
1896 ],
1897 'string': [
1898 (r'%(\([a-zA-Z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
1899 '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol),
1900 (r'[^\\\'"%\n]+', String),
1901 # quotes, percents and backslashes must be parsed one at a time
1902 (r'[\'"\\]', String),
1903 # unhandled string formatting sign
1904 (r'%', String),
1905 (r'\n', String)
1906 ],
1907 'dqs': [
1908 (r'"', String, '#pop')
1909 ],
1910 'sqs': [
1911 (r"'", String, '#pop')
1912 ],
1913 'tdqs': [
1914 (r'"""', String, '#pop')
1915 ],
1916 'tsqs': [
1917 (r"'''", String, '#pop')
1918 ],
1919 }

eric ide

mercurial