|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.erlang |
|
4 ~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for Erlang. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ |
|
15 include, default |
|
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
17 Number, Punctuation, Generic |
|
18 |
|
19 __all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', |
|
20 'ElixirLexer'] |
|
21 |
|
22 |
|
23 line_re = re.compile('.*?\n') |
|
24 |
|
25 |
|
26 class ErlangLexer(RegexLexer): |
|
27 """ |
|
28 For the Erlang functional programming language. |
|
29 |
|
30 Blame Jeremy Thurgood (http://jerith.za.net/). |
|
31 |
|
32 .. versionadded:: 0.9 |
|
33 """ |
|
34 |
|
35 name = 'Erlang' |
|
36 aliases = ['erlang'] |
|
37 filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] |
|
38 mimetypes = ['text/x-erlang'] |
|
39 |
|
40 keywords = ( |
|
41 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', |
|
42 'let', 'of', 'query', 'receive', 'try', 'when', |
|
43 ) |
|
44 |
|
45 builtins = ( # See erlang(3) man page |
|
46 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', |
|
47 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', |
|
48 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', |
|
49 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', |
|
50 'float', 'float_to_list', 'fun_info', 'fun_to_list', |
|
51 'function_exported', 'garbage_collect', 'get', 'get_keys', |
|
52 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', |
|
53 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', |
|
54 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', |
|
55 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', |
|
56 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', |
|
57 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', |
|
58 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', |
|
59 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', |
|
60 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', |
|
61 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', |
|
62 'pid_to_list', 'port_close', 'port_command', 'port_connect', |
|
63 'port_control', 'port_call', 'port_info', 'port_to_list', |
|
64 'process_display', 'process_flag', 'process_info', 'purge_module', |
|
65 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', |
|
66 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', |
|
67 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', |
|
68 'spawn_opt', 'split_binary', 'start_timer', 'statistics', |
|
69 'suspend_process', 'system_flag', 'system_info', 'system_monitor', |
|
70 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', |
|
71 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', |
|
72 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' |
|
73 ) |
|
74 |
|
75 operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' |
|
76 word_operators = ( |
|
77 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', |
|
78 'div', 'not', 'or', 'orelse', 'rem', 'xor' |
|
79 ) |
|
80 |
|
81 atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" |
|
82 |
|
83 variable_re = r'(?:[A-Z_]\w*)' |
|
84 |
|
85 escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' |
|
86 |
|
87 macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' |
|
88 |
|
89 base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' |
|
90 |
|
91 tokens = { |
|
92 'root': [ |
|
93 (r'\s+', Text), |
|
94 (r'%.*\n', Comment), |
|
95 (words(keywords, suffix=r'\b'), Keyword), |
|
96 (words(builtins, suffix=r'\b'), Name.Builtin), |
|
97 (words(word_operators, suffix=r'\b'), Operator.Word), |
|
98 (r'^-', Punctuation, 'directive'), |
|
99 (operators, Operator), |
|
100 (r'"', String, 'string'), |
|
101 (r'<<', Name.Label), |
|
102 (r'>>', Name.Label), |
|
103 ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), |
|
104 ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', |
|
105 bygroups(Name.Function, Text, Punctuation)), |
|
106 (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), |
|
107 (r'[+-]?\d+', Number.Integer), |
|
108 (r'[+-]?\d+.\d+', Number.Float), |
|
109 (r'[]\[:_@\".{}()|;,]', Punctuation), |
|
110 (variable_re, Name.Variable), |
|
111 (atom_re, Name), |
|
112 (r'\?'+macro_re, Name.Constant), |
|
113 (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), |
|
114 (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), |
|
115 ], |
|
116 'string': [ |
|
117 (escape_re, String.Escape), |
|
118 (r'"', String, '#pop'), |
|
119 (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), |
|
120 (r'[^"\\~]+', String), |
|
121 (r'~', String), |
|
122 ], |
|
123 'directive': [ |
|
124 (r'(define)(\s*)(\()('+macro_re+r')', |
|
125 bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), |
|
126 (r'(record)(\s*)(\()('+macro_re+r')', |
|
127 bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), |
|
128 (atom_re, Name.Entity, '#pop'), |
|
129 ], |
|
130 } |
|
131 |
|
132 |
|
133 class ErlangShellLexer(Lexer): |
|
134 """ |
|
135 Shell sessions in erl (for Erlang code). |
|
136 |
|
137 .. versionadded:: 1.1 |
|
138 """ |
|
139 name = 'Erlang erl session' |
|
140 aliases = ['erl'] |
|
141 filenames = ['*.erl-sh'] |
|
142 mimetypes = ['text/x-erl-shellsession'] |
|
143 |
|
144 _prompt_re = re.compile(r'\d+>(?=\s|\Z)') |
|
145 |
|
146 def get_tokens_unprocessed(self, text): |
|
147 erlexer = ErlangLexer(**self.options) |
|
148 |
|
149 curcode = '' |
|
150 insertions = [] |
|
151 for match in line_re.finditer(text): |
|
152 line = match.group() |
|
153 m = self._prompt_re.match(line) |
|
154 if m is not None: |
|
155 end = m.end() |
|
156 insertions.append((len(curcode), |
|
157 [(0, Generic.Prompt, line[:end])])) |
|
158 curcode += line[end:] |
|
159 else: |
|
160 if curcode: |
|
161 for item in do_insertions(insertions, |
|
162 erlexer.get_tokens_unprocessed(curcode)): |
|
163 yield item |
|
164 curcode = '' |
|
165 insertions = [] |
|
166 if line.startswith('*'): |
|
167 yield match.start(), Generic.Traceback, line |
|
168 else: |
|
169 yield match.start(), Generic.Output, line |
|
170 if curcode: |
|
171 for item in do_insertions(insertions, |
|
172 erlexer.get_tokens_unprocessed(curcode)): |
|
173 yield item |
|
174 |
|
175 |
|
176 def gen_elixir_string_rules(name, symbol, token): |
|
177 states = {} |
|
178 states['string_' + name] = [ |
|
179 (r'[^#%s\\]+' % (symbol,), token), |
|
180 include('escapes'), |
|
181 (r'\\.', token), |
|
182 (r'(%s)' % (symbol,), bygroups(token), "#pop"), |
|
183 include('interpol') |
|
184 ] |
|
185 return states |
|
186 |
|
187 |
|
188 def gen_elixir_sigstr_rules(term, token, interpol=True): |
|
189 if interpol: |
|
190 return [ |
|
191 (r'[^#%s\\]+' % (term,), token), |
|
192 include('escapes'), |
|
193 (r'\\.', token), |
|
194 (r'%s[a-zA-Z]*' % (term,), token, '#pop'), |
|
195 include('interpol') |
|
196 ] |
|
197 else: |
|
198 return [ |
|
199 (r'[^%s\\]+' % (term,), token), |
|
200 (r'\\.', token), |
|
201 (r'%s[a-zA-Z]*' % (term,), token, '#pop'), |
|
202 ] |
|
203 |
|
204 |
|
205 class ElixirLexer(RegexLexer): |
|
206 """ |
|
207 For the `Elixir language <http://elixir-lang.org>`_. |
|
208 |
|
209 .. versionadded:: 1.5 |
|
210 """ |
|
211 |
|
212 name = 'Elixir' |
|
213 aliases = ['elixir', 'ex', 'exs'] |
|
214 filenames = ['*.ex', '*.exs'] |
|
215 mimetypes = ['text/x-elixir'] |
|
216 |
|
217 KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') |
|
218 KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') |
|
219 BUILTIN = ( |
|
220 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', |
|
221 'quote', 'unquote', 'unquote_splicing', 'throw', 'super' |
|
222 ) |
|
223 BUILTIN_DECLARATION = ( |
|
224 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', |
|
225 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback' |
|
226 ) |
|
227 |
|
228 BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') |
|
229 CONSTANT = ('nil', 'true', 'false') |
|
230 |
|
231 PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') |
|
232 |
|
233 OPERATORS3 = ( |
|
234 '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', |
|
235 '~>>', '<~>', '|~>', '<|>', |
|
236 ) |
|
237 OPERATORS2 = ( |
|
238 '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', |
|
239 '->', '<-', '|', '.', '=', '~>', '<~', |
|
240 ) |
|
241 OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') |
|
242 |
|
243 PUNCTUATION = ( |
|
244 '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']' |
|
245 ) |
|
246 |
|
247 def get_tokens_unprocessed(self, text): |
|
248 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): |
|
249 if token is Name: |
|
250 if value in self.KEYWORD: |
|
251 yield index, Keyword, value |
|
252 elif value in self.KEYWORD_OPERATOR: |
|
253 yield index, Operator.Word, value |
|
254 elif value in self.BUILTIN: |
|
255 yield index, Keyword, value |
|
256 elif value in self.BUILTIN_DECLARATION: |
|
257 yield index, Keyword.Declaration, value |
|
258 elif value in self.BUILTIN_NAMESPACE: |
|
259 yield index, Keyword.Namespace, value |
|
260 elif value in self.CONSTANT: |
|
261 yield index, Name.Constant, value |
|
262 elif value in self.PSEUDO_VAR: |
|
263 yield index, Name.Builtin.Pseudo, value |
|
264 else: |
|
265 yield index, token, value |
|
266 else: |
|
267 yield index, token, value |
|
268 |
|
269 def gen_elixir_sigil_rules(): |
|
270 # all valid sigil terminators (excluding heredocs) |
|
271 terminators = [ |
|
272 (r'\{', r'\}', 'cb'), |
|
273 (r'\[', r'\]', 'sb'), |
|
274 (r'\(', r'\)', 'pa'), |
|
275 (r'<', r'>', 'ab'), |
|
276 (r'/', r'/', 'slas'), |
|
277 (r'\|', r'\|', 'pipe'), |
|
278 ('"', '"', 'quot'), |
|
279 ("'", "'", 'apos'), |
|
280 ] |
|
281 |
|
282 # heredocs have slightly different rules |
|
283 triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] |
|
284 |
|
285 token = String.Other |
|
286 states = {'sigils': []} |
|
287 |
|
288 for term, name in triquotes: |
|
289 states['sigils'] += [ |
|
290 (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), |
|
291 (name + '-end', name + '-intp')), |
|
292 (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), |
|
293 (name + '-end', name + '-no-intp')), |
|
294 ] |
|
295 |
|
296 states[name + '-end'] = [ |
|
297 (r'[a-zA-Z]+', token, '#pop'), |
|
298 default('#pop'), |
|
299 ] |
|
300 states[name + '-intp'] = [ |
|
301 (r'^\s*' + term, String.Heredoc, '#pop'), |
|
302 include('heredoc_interpol'), |
|
303 ] |
|
304 states[name + '-no-intp'] = [ |
|
305 (r'^\s*' + term, String.Heredoc, '#pop'), |
|
306 include('heredoc_no_interpol'), |
|
307 ] |
|
308 |
|
309 for lterm, rterm, name in terminators: |
|
310 states['sigils'] += [ |
|
311 (r'~[a-z]' + lterm, token, name + '-intp'), |
|
312 (r'~[A-Z]' + lterm, token, name + '-no-intp'), |
|
313 ] |
|
314 states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) |
|
315 states[name + '-no-intp'] = \ |
|
316 gen_elixir_sigstr_rules(rterm, token, interpol=False) |
|
317 |
|
318 return states |
|
319 |
|
320 op3_re = "|".join(re.escape(s) for s in OPERATORS3) |
|
321 op2_re = "|".join(re.escape(s) for s in OPERATORS2) |
|
322 op1_re = "|".join(re.escape(s) for s in OPERATORS1) |
|
323 ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re) |
|
324 punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) |
|
325 alnum = '\w' |
|
326 name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum |
|
327 modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum} |
|
328 complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re) |
|
329 special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})' |
|
330 |
|
331 long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})' |
|
332 hex_char_re = r'(\\x[\da-fA-F]{1,2})' |
|
333 escape_char_re = r'(\\[abdefnrstv])' |
|
334 |
|
335 tokens = { |
|
336 'root': [ |
|
337 (r'\s+', Text), |
|
338 (r'#.*$', Comment.Single), |
|
339 |
|
340 # Various kinds of characters |
|
341 (r'(\?)' + long_hex_char_re, |
|
342 bygroups(String.Char, |
|
343 String.Escape, Number.Hex, String.Escape)), |
|
344 (r'(\?)' + hex_char_re, |
|
345 bygroups(String.Char, String.Escape)), |
|
346 (r'(\?)' + escape_char_re, |
|
347 bygroups(String.Char, String.Escape)), |
|
348 (r'\?\\?.', String.Char), |
|
349 |
|
350 # '::' has to go before atoms |
|
351 (r':::', String.Symbol), |
|
352 (r'::', Operator), |
|
353 |
|
354 # atoms |
|
355 (r':' + special_atom_re, String.Symbol), |
|
356 (r':' + complex_name_re, String.Symbol), |
|
357 (r':"', String.Symbol, 'string_double_atom'), |
|
358 (r":'", String.Symbol, 'string_single_atom'), |
|
359 |
|
360 # [keywords: ...] |
|
361 (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re), |
|
362 bygroups(String.Symbol, Punctuation)), |
|
363 |
|
364 # @attributes |
|
365 (r'@' + name_re, Name.Attribute), |
|
366 |
|
367 # identifiers |
|
368 (name_re, Name), |
|
369 (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)), |
|
370 |
|
371 # operators and punctuation |
|
372 (op3_re, Operator), |
|
373 (op2_re, Operator), |
|
374 (punctuation_re, Punctuation), |
|
375 (r'&\d', Name.Entity), # anon func arguments |
|
376 (op1_re, Operator), |
|
377 |
|
378 # numbers |
|
379 (r'0b[01]+', Number.Bin), |
|
380 (r'0o[0-7]+', Number.Oct), |
|
381 (r'0x[\da-fA-F]+', Number.Hex), |
|
382 (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), |
|
383 (r'\d(_?\d)*', Number.Integer), |
|
384 |
|
385 # strings and heredocs |
|
386 (r'"""\s*', String.Heredoc, 'heredoc_double'), |
|
387 (r"'''\s*$", String.Heredoc, 'heredoc_single'), |
|
388 (r'"', String.Double, 'string_double'), |
|
389 (r"'", String.Single, 'string_single'), |
|
390 |
|
391 include('sigils'), |
|
392 |
|
393 (r'%\{', Punctuation, 'map_key'), |
|
394 (r'\{', Punctuation, 'tuple'), |
|
395 ], |
|
396 'heredoc_double': [ |
|
397 (r'^\s*"""', String.Heredoc, '#pop'), |
|
398 include('heredoc_interpol'), |
|
399 ], |
|
400 'heredoc_single': [ |
|
401 (r"^\s*'''", String.Heredoc, '#pop'), |
|
402 include('heredoc_interpol'), |
|
403 ], |
|
404 'heredoc_interpol': [ |
|
405 (r'[^#\\\n]+', String.Heredoc), |
|
406 include('escapes'), |
|
407 (r'\\.', String.Heredoc), |
|
408 (r'\n+', String.Heredoc), |
|
409 include('interpol'), |
|
410 ], |
|
411 'heredoc_no_interpol': [ |
|
412 (r'[^\\\n]+', String.Heredoc), |
|
413 (r'\\.', String.Heredoc), |
|
414 (r'\n+', String.Heredoc), |
|
415 ], |
|
416 'escapes': [ |
|
417 (long_hex_char_re, |
|
418 bygroups(String.Escape, Number.Hex, String.Escape)), |
|
419 (hex_char_re, String.Escape), |
|
420 (escape_char_re, String.Escape), |
|
421 ], |
|
422 'interpol': [ |
|
423 (r'#\{', String.Interpol, 'interpol_string'), |
|
424 ], |
|
425 'interpol_string': [ |
|
426 (r'\}', String.Interpol, "#pop"), |
|
427 include('root') |
|
428 ], |
|
429 'map_key': [ |
|
430 include('root'), |
|
431 (r':', Punctuation, 'map_val'), |
|
432 (r'=>', Punctuation, 'map_val'), |
|
433 (r'\}', Punctuation, '#pop'), |
|
434 ], |
|
435 'map_val': [ |
|
436 include('root'), |
|
437 (r',', Punctuation, '#pop'), |
|
438 (r'(?=\})', Punctuation, '#pop'), |
|
439 ], |
|
440 'tuple': [ |
|
441 include('root'), |
|
442 (r'\}', Punctuation, '#pop'), |
|
443 ], |
|
444 } |
|
445 tokens.update(gen_elixir_string_rules('double', '"', String.Double)) |
|
446 tokens.update(gen_elixir_string_rules('single', "'", String.Single)) |
|
447 tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) |
|
448 tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) |
|
449 tokens.update(gen_elixir_sigil_rules()) |
|
450 |
|
451 |
|
452 class ElixirConsoleLexer(Lexer): |
|
453 """ |
|
454 For Elixir interactive console (iex) output like: |
|
455 |
|
456 .. sourcecode:: iex |
|
457 |
|
458 iex> [head | tail] = [1,2,3] |
|
459 [1,2,3] |
|
460 iex> head |
|
461 1 |
|
462 iex> tail |
|
463 [2,3] |
|
464 iex> [head | tail] |
|
465 [1,2,3] |
|
466 iex> length [head | tail] |
|
467 3 |
|
468 |
|
469 .. versionadded:: 1.5 |
|
470 """ |
|
471 |
|
472 name = 'Elixir iex session' |
|
473 aliases = ['iex'] |
|
474 mimetypes = ['text/x-elixir-shellsession'] |
|
475 |
|
476 _prompt_re = re.compile('(iex|\.{3})(\(\d+\))?> ') |
|
477 |
|
478 def get_tokens_unprocessed(self, text): |
|
479 exlexer = ElixirLexer(**self.options) |
|
480 |
|
481 curcode = '' |
|
482 in_error = False |
|
483 insertions = [] |
|
484 for match in line_re.finditer(text): |
|
485 line = match.group() |
|
486 if line.startswith(u'** '): |
|
487 in_error = True |
|
488 insertions.append((len(curcode), |
|
489 [(0, Generic.Error, line[:-1])])) |
|
490 curcode += line[-1:] |
|
491 else: |
|
492 m = self._prompt_re.match(line) |
|
493 if m is not None: |
|
494 in_error = False |
|
495 end = m.end() |
|
496 insertions.append((len(curcode), |
|
497 [(0, Generic.Prompt, line[:end])])) |
|
498 curcode += line[end:] |
|
499 else: |
|
500 if curcode: |
|
501 for item in do_insertions( |
|
502 insertions, exlexer.get_tokens_unprocessed(curcode)): |
|
503 yield item |
|
504 curcode = '' |
|
505 insertions = [] |
|
506 token = Generic.Error if in_error else Generic.Output |
|
507 yield match.start(), token, line |
|
508 if curcode: |
|
509 for item in do_insertions( |
|
510 insertions, exlexer.get_tokens_unprocessed(curcode)): |
|
511 yield item |