eric6/ThirdParty/Pygments/pygments/lexers/erlang.py

changeset 8258
82b608e352ec
parent 8257
28146736bbfc
child 8259
2bbec88047dd
equal deleted inserted replaced
8257:28146736bbfc 8258:82b608e352ec
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.erlang
4 ~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexers for Erlang.
7
8 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \
15 include, default
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Generic
18
19 __all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer',
20 'ElixirLexer']
21
22
23 line_re = re.compile('.*?\n')
24
25
26 class ErlangLexer(RegexLexer):
27 """
28 For the Erlang functional programming language.
29
30 Blame Jeremy Thurgood (http://jerith.za.net/).
31
32 .. versionadded:: 0.9
33 """
34
35 name = 'Erlang'
36 aliases = ['erlang']
37 filenames = ['*.erl', '*.hrl', '*.es', '*.escript']
38 mimetypes = ['text/x-erlang']
39
40 keywords = (
41 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if',
42 'let', 'of', 'query', 'receive', 'try', 'when',
43 )
44
45 builtins = ( # See erlang(3) man page
46 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list',
47 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions',
48 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module',
49 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit',
50 'float', 'float_to_list', 'fun_info', 'fun_to_list',
51 'function_exported', 'garbage_collect', 'get', 'get_keys',
52 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary',
53 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean',
54 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list',
55 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record',
56 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom',
57 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom',
58 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple',
59 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5',
60 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor',
61 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2',
62 'pid_to_list', 'port_close', 'port_command', 'port_connect',
63 'port_control', 'port_call', 'port_info', 'port_to_list',
64 'process_display', 'process_flag', 'process_info', 'purge_module',
65 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process',
66 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie',
67 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor',
68 'spawn_opt', 'split_binary', 'start_timer', 'statistics',
69 'suspend_process', 'system_flag', 'system_info', 'system_monitor',
70 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered',
71 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list',
72 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis'
73 )
74
75 operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)'
76 word_operators = (
77 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor',
78 'div', 'not', 'or', 'orelse', 'rem', 'xor'
79 )
80
81 atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')"
82
83 variable_re = r'(?:[A-Z_]\w*)'
84
85 esc_char_re = r'[bdefnrstv\'"\\]'
86 esc_octal_re = r'[0-7][0-7]?[0-7]?'
87 esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})'
88 esc_ctrl_re = r'\^[a-zA-Z]'
89 escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))'
90
91 macro_re = r'(?:'+variable_re+r'|'+atom_re+r')'
92
93 base_re = r'(?:[2-9]|[12][0-9]|3[0-6])'
94
95 tokens = {
96 'root': [
97 (r'\s+', Text),
98 (r'%.*\n', Comment),
99 (words(keywords, suffix=r'\b'), Keyword),
100 (words(builtins, suffix=r'\b'), Name.Builtin),
101 (words(word_operators, suffix=r'\b'), Operator.Word),
102 (r'^-', Punctuation, 'directive'),
103 (operators, Operator),
104 (r'"', String, 'string'),
105 (r'<<', Name.Label),
106 (r'>>', Name.Label),
107 ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)),
108 ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()',
109 bygroups(Name.Function, Text, Punctuation)),
110 (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer),
111 (r'[+-]?\d+', Number.Integer),
112 (r'[+-]?\d+.\d+', Number.Float),
113 (r'[]\[:_@\".{}()|;,]', Punctuation),
114 (variable_re, Name.Variable),
115 (atom_re, Name),
116 (r'\?'+macro_re, Name.Constant),
117 (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char),
118 (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label),
119
120 # Erlang script shebang
121 (r'\A#!.+\n', Comment.Hashbang),
122
123 # EEP 43: Maps
124 # http://www.erlang.org/eeps/eep-0043.html
125 (r'#\{', Punctuation, 'map_key'),
126 ],
127 'string': [
128 (escape_re, String.Escape),
129 (r'"', String, '#pop'),
130 (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol),
131 (r'[^"\\~]+', String),
132 (r'~', String),
133 ],
134 'directive': [
135 (r'(define)(\s*)(\()('+macro_re+r')',
136 bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'),
137 (r'(record)(\s*)(\()('+macro_re+r')',
138 bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'),
139 (atom_re, Name.Entity, '#pop'),
140 ],
141 'map_key': [
142 include('root'),
143 (r'=>', Punctuation, 'map_val'),
144 (r':=', Punctuation, 'map_val'),
145 (r'\}', Punctuation, '#pop'),
146 ],
147 'map_val': [
148 include('root'),
149 (r',', Punctuation, '#pop'),
150 (r'(?=\})', Punctuation, '#pop'),
151 ],
152 }
153
154
155 class ErlangShellLexer(Lexer):
156 """
157 Shell sessions in erl (for Erlang code).
158
159 .. versionadded:: 1.1
160 """
161 name = 'Erlang erl session'
162 aliases = ['erl']
163 filenames = ['*.erl-sh']
164 mimetypes = ['text/x-erl-shellsession']
165
166 _prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)')
167
168 def get_tokens_unprocessed(self, text):
169 erlexer = ErlangLexer(**self.options)
170
171 curcode = ''
172 insertions = []
173 for match in line_re.finditer(text):
174 line = match.group()
175 m = self._prompt_re.match(line)
176 if m is not None:
177 end = m.end()
178 insertions.append((len(curcode),
179 [(0, Generic.Prompt, line[:end])]))
180 curcode += line[end:]
181 else:
182 if curcode:
183 yield from do_insertions(insertions,
184 erlexer.get_tokens_unprocessed(curcode))
185 curcode = ''
186 insertions = []
187 if line.startswith('*'):
188 yield match.start(), Generic.Traceback, line
189 else:
190 yield match.start(), Generic.Output, line
191 if curcode:
192 yield from do_insertions(insertions,
193 erlexer.get_tokens_unprocessed(curcode))
194
195
196 def gen_elixir_string_rules(name, symbol, token):
197 states = {}
198 states['string_' + name] = [
199 (r'[^#%s\\]+' % (symbol,), token),
200 include('escapes'),
201 (r'\\.', token),
202 (r'(%s)' % (symbol,), bygroups(token), "#pop"),
203 include('interpol')
204 ]
205 return states
206
207
208 def gen_elixir_sigstr_rules(term, term_class, token, interpol=True):
209 if interpol:
210 return [
211 (r'[^#%s\\]+' % (term_class,), token),
212 include('escapes'),
213 (r'\\.', token),
214 (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
215 include('interpol')
216 ]
217 else:
218 return [
219 (r'[^%s\\]+' % (term_class,), token),
220 (r'\\.', token),
221 (r'%s[a-zA-Z]*' % (term,), token, '#pop'),
222 ]
223
224
225 class ElixirLexer(RegexLexer):
226 """
227 For the `Elixir language <http://elixir-lang.org>`_.
228
229 .. versionadded:: 1.5
230 """
231
232 name = 'Elixir'
233 aliases = ['elixir', 'ex', 'exs']
234 filenames = ['*.ex', '*.eex', '*.exs']
235 mimetypes = ['text/x-elixir']
236
237 KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch')
238 KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in')
239 BUILTIN = (
240 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise',
241 'quote', 'unquote', 'unquote_splicing', 'throw', 'super',
242 )
243 BUILTIN_DECLARATION = (
244 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop',
245 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback',
246 )
247
248 BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias')
249 CONSTANT = ('nil', 'true', 'false')
250
251 PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__')
252
253 OPERATORS3 = (
254 '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==',
255 '~>>', '<~>', '|~>', '<|>',
256 )
257 OPERATORS2 = (
258 '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~',
259 '->', '<-', '|', '.', '=', '~>', '<~',
260 )
261 OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&')
262
263 PUNCTUATION = (
264 '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']',
265 )
266
267 def get_tokens_unprocessed(self, text):
268 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
269 if token is Name:
270 if value in self.KEYWORD:
271 yield index, Keyword, value
272 elif value in self.KEYWORD_OPERATOR:
273 yield index, Operator.Word, value
274 elif value in self.BUILTIN:
275 yield index, Keyword, value
276 elif value in self.BUILTIN_DECLARATION:
277 yield index, Keyword.Declaration, value
278 elif value in self.BUILTIN_NAMESPACE:
279 yield index, Keyword.Namespace, value
280 elif value in self.CONSTANT:
281 yield index, Name.Constant, value
282 elif value in self.PSEUDO_VAR:
283 yield index, Name.Builtin.Pseudo, value
284 else:
285 yield index, token, value
286 else:
287 yield index, token, value
288
289 def gen_elixir_sigil_rules():
290 # all valid sigil terminators (excluding heredocs)
291 terminators = [
292 (r'\{', r'\}', '}', 'cb'),
293 (r'\[', r'\]', r'\]', 'sb'),
294 (r'\(', r'\)', ')', 'pa'),
295 ('<', '>', '>', 'ab'),
296 ('/', '/', '/', 'slas'),
297 (r'\|', r'\|', '|', 'pipe'),
298 ('"', '"', '"', 'quot'),
299 ("'", "'", "'", 'apos'),
300 ]
301
302 # heredocs have slightly different rules
303 triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')]
304
305 token = String.Other
306 states = {'sigils': []}
307
308 for term, name in triquotes:
309 states['sigils'] += [
310 (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc),
311 (name + '-end', name + '-intp')),
312 (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc),
313 (name + '-end', name + '-no-intp')),
314 ]
315
316 states[name + '-end'] = [
317 (r'[a-zA-Z]+', token, '#pop'),
318 default('#pop'),
319 ]
320 states[name + '-intp'] = [
321 (r'^\s*' + term, String.Heredoc, '#pop'),
322 include('heredoc_interpol'),
323 ]
324 states[name + '-no-intp'] = [
325 (r'^\s*' + term, String.Heredoc, '#pop'),
326 include('heredoc_no_interpol'),
327 ]
328
329 for lterm, rterm, rterm_class, name in terminators:
330 states['sigils'] += [
331 (r'~[a-z]' + lterm, token, name + '-intp'),
332 (r'~[A-Z]' + lterm, token, name + '-no-intp'),
333 ]
334 states[name + '-intp'] = \
335 gen_elixir_sigstr_rules(rterm, rterm_class, token)
336 states[name + '-no-intp'] = \
337 gen_elixir_sigstr_rules(rterm, rterm_class, token, interpol=False)
338
339 return states
340
341 op3_re = "|".join(re.escape(s) for s in OPERATORS3)
342 op2_re = "|".join(re.escape(s) for s in OPERATORS2)
343 op1_re = "|".join(re.escape(s) for s in OPERATORS1)
344 ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re)
345 punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION)
346 alnum = r'\w'
347 name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum
348 modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum}
349 complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re)
350 special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})'
351
352 long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})'
353 hex_char_re = r'(\\x[\da-fA-F]{1,2})'
354 escape_char_re = r'(\\[abdefnrstv])'
355
356 tokens = {
357 'root': [
358 (r'\s+', Text),
359 (r'#.*$', Comment.Single),
360
361 # Various kinds of characters
362 (r'(\?)' + long_hex_char_re,
363 bygroups(String.Char,
364 String.Escape, Number.Hex, String.Escape)),
365 (r'(\?)' + hex_char_re,
366 bygroups(String.Char, String.Escape)),
367 (r'(\?)' + escape_char_re,
368 bygroups(String.Char, String.Escape)),
369 (r'\?\\?.', String.Char),
370
371 # '::' has to go before atoms
372 (r':::', String.Symbol),
373 (r'::', Operator),
374
375 # atoms
376 (r':' + special_atom_re, String.Symbol),
377 (r':' + complex_name_re, String.Symbol),
378 (r':"', String.Symbol, 'string_double_atom'),
379 (r":'", String.Symbol, 'string_single_atom'),
380
381 # [keywords: ...]
382 (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re),
383 bygroups(String.Symbol, Punctuation)),
384
385 # @attributes
386 (r'@' + name_re, Name.Attribute),
387
388 # identifiers
389 (name_re, Name),
390 (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)),
391
392 # operators and punctuation
393 (op3_re, Operator),
394 (op2_re, Operator),
395 (punctuation_re, Punctuation),
396 (r'&\d', Name.Entity), # anon func arguments
397 (op1_re, Operator),
398
399 # numbers
400 (r'0b[01]+', Number.Bin),
401 (r'0o[0-7]+', Number.Oct),
402 (r'0x[\da-fA-F]+', Number.Hex),
403 (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float),
404 (r'\d(_?\d)*', Number.Integer),
405
406 # strings and heredocs
407 (r'"""\s*', String.Heredoc, 'heredoc_double'),
408 (r"'''\s*$", String.Heredoc, 'heredoc_single'),
409 (r'"', String.Double, 'string_double'),
410 (r"'", String.Single, 'string_single'),
411
412 include('sigils'),
413
414 (r'%\{', Punctuation, 'map_key'),
415 (r'\{', Punctuation, 'tuple'),
416 ],
417 'heredoc_double': [
418 (r'^\s*"""', String.Heredoc, '#pop'),
419 include('heredoc_interpol'),
420 ],
421 'heredoc_single': [
422 (r"^\s*'''", String.Heredoc, '#pop'),
423 include('heredoc_interpol'),
424 ],
425 'heredoc_interpol': [
426 (r'[^#\\\n]+', String.Heredoc),
427 include('escapes'),
428 (r'\\.', String.Heredoc),
429 (r'\n+', String.Heredoc),
430 include('interpol'),
431 ],
432 'heredoc_no_interpol': [
433 (r'[^\\\n]+', String.Heredoc),
434 (r'\\.', String.Heredoc),
435 (r'\n+', String.Heredoc),
436 ],
437 'escapes': [
438 (long_hex_char_re,
439 bygroups(String.Escape, Number.Hex, String.Escape)),
440 (hex_char_re, String.Escape),
441 (escape_char_re, String.Escape),
442 ],
443 'interpol': [
444 (r'#\{', String.Interpol, 'interpol_string'),
445 ],
446 'interpol_string': [
447 (r'\}', String.Interpol, "#pop"),
448 include('root')
449 ],
450 'map_key': [
451 include('root'),
452 (r':', Punctuation, 'map_val'),
453 (r'=>', Punctuation, 'map_val'),
454 (r'\}', Punctuation, '#pop'),
455 ],
456 'map_val': [
457 include('root'),
458 (r',', Punctuation, '#pop'),
459 (r'(?=\})', Punctuation, '#pop'),
460 ],
461 'tuple': [
462 include('root'),
463 (r'\}', Punctuation, '#pop'),
464 ],
465 }
466 tokens.update(gen_elixir_string_rules('double', '"', String.Double))
467 tokens.update(gen_elixir_string_rules('single', "'", String.Single))
468 tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol))
469 tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol))
470 tokens.update(gen_elixir_sigil_rules())
471
472
473 class ElixirConsoleLexer(Lexer):
474 """
475 For Elixir interactive console (iex) output like:
476
477 .. sourcecode:: iex
478
479 iex> [head | tail] = [1,2,3]
480 [1,2,3]
481 iex> head
482 1
483 iex> tail
484 [2,3]
485 iex> [head | tail]
486 [1,2,3]
487 iex> length [head | tail]
488 3
489
490 .. versionadded:: 1.5
491 """
492
493 name = 'Elixir iex session'
494 aliases = ['iex']
495 mimetypes = ['text/x-elixir-shellsession']
496
497 _prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ')
498
499 def get_tokens_unprocessed(self, text):
500 exlexer = ElixirLexer(**self.options)
501
502 curcode = ''
503 in_error = False
504 insertions = []
505 for match in line_re.finditer(text):
506 line = match.group()
507 if line.startswith('** '):
508 in_error = True
509 insertions.append((len(curcode),
510 [(0, Generic.Error, line[:-1])]))
511 curcode += line[-1:]
512 else:
513 m = self._prompt_re.match(line)
514 if m is not None:
515 in_error = False
516 end = m.end()
517 insertions.append((len(curcode),
518 [(0, Generic.Prompt, line[:end])]))
519 curcode += line[end:]
520 else:
521 if curcode:
522 yield from do_insertions(
523 insertions, exlexer.get_tokens_unprocessed(curcode))
524 curcode = ''
525 insertions = []
526 token = Generic.Error if in_error else Generic.Output
527 yield match.start(), token, line
528 if curcode:
529 yield from do_insertions(
530 insertions, exlexer.get_tokens_unprocessed(curcode))

eric ide

mercurial