ThirdParty/Pygments/pygments/lexers/crystal.py

changeset 5713
6762afd9f963
equal deleted inserted replaced
5712:f0d08bdeacf4 5713:6762afd9f963
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.crystal
4 ~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexer for Crystal.
7
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 import re
13
14 from pygments.lexer import ExtendedRegexLexer, include, \
15 bygroups, default, LexerContext, words
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
17 Number, Punctuation, Error
18
19 __all__ = ['CrystalLexer']
20
21 line_re = re.compile('.*?\n')
22
23
24 CRYSTAL_OPERATORS = [
25 '!=', '!~', '!', '%', '&&', '&', '**', '*', '+', '-', '/', '<=>', '<<', '<=', '<',
26 '===', '==', '=~', '=', '>=', '>>', '>', '[]=', '[]?', '[]', '^', '||', '|', '~'
27 ]
28
29
30 class CrystalLexer(ExtendedRegexLexer):
31 """
32 For `Crystal <http://crystal-lang.org>`_ source code.
33
34 .. versionadded:: 2.2
35 """
36
37 name = 'Crystal'
38 aliases = ['cr', 'crystal']
39 filenames = ['*.cr']
40 mimetypes = ['text/x-crystal']
41
42 flags = re.DOTALL | re.MULTILINE
43
44 def heredoc_callback(self, match, ctx):
45 # okay, this is the hardest part of parsing Crystal...
46 # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line
47
48 start = match.start(1)
49 yield start, Operator, match.group(1) # <<-?
50 yield match.start(2), String.Heredoc, match.group(2) # quote ", ', `
51 yield match.start(3), String.Delimiter, match.group(3) # heredoc name
52 yield match.start(4), String.Heredoc, match.group(4) # quote again
53
54 heredocstack = ctx.__dict__.setdefault('heredocstack', [])
55 outermost = not bool(heredocstack)
56 heredocstack.append((match.group(1) == '<<-', match.group(3)))
57
58 ctx.pos = match.start(5)
59 ctx.end = match.end(5)
60 # this may find other heredocs
61 for i, t, v in self.get_tokens_unprocessed(context=ctx):
62 yield i, t, v
63 ctx.pos = match.end()
64
65 if outermost:
66 # this is the outer heredoc again, now we can process them all
67 for tolerant, hdname in heredocstack:
68 lines = []
69 for match in line_re.finditer(ctx.text, ctx.pos):
70 if tolerant:
71 check = match.group().strip()
72 else:
73 check = match.group().rstrip()
74 if check == hdname:
75 for amatch in lines:
76 yield amatch.start(), String.Heredoc, amatch.group()
77 yield match.start(), String.Delimiter, match.group()
78 ctx.pos = match.end()
79 break
80 else:
81 lines.append(match)
82 else:
83 # end of heredoc not found -- error!
84 for amatch in lines:
85 yield amatch.start(), Error, amatch.group()
86 ctx.end = len(ctx.text)
87 del heredocstack[:]
88
89 def gen_crystalstrings_rules():
90 def intp_regex_callback(self, match, ctx):
91 yield match.start(1), String.Regex, match.group(1) # begin
92 nctx = LexerContext(match.group(3), 0, ['interpolated-regex'])
93 for i, t, v in self.get_tokens_unprocessed(context=nctx):
94 yield match.start(3)+i, t, v
95 yield match.start(4), String.Regex, match.group(4) # end[imsx]*
96 ctx.pos = match.end()
97
98 def intp_string_callback(self, match, ctx):
99 yield match.start(1), String.Other, match.group(1)
100 nctx = LexerContext(match.group(3), 0, ['interpolated-string'])
101 for i, t, v in self.get_tokens_unprocessed(context=nctx):
102 yield match.start(3)+i, t, v
103 yield match.start(4), String.Other, match.group(4) # end
104 ctx.pos = match.end()
105
106 states = {}
107 states['strings'] = [
108 (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol),
109 (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol),
110 (r":'(\\\\|\\'|[^'])*'", String.Symbol),
111 # This allows arbitrary text after '\ for simplicity
112 (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char),
113 (r':"', String.Symbol, 'simple-sym'),
114 # Crystal doesn't have "symbol:"s but this simplifies function args
115 (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)),
116 (r'"', String.Double, 'simple-string'),
117 (r'(?<!\.)`', String.Backtick, 'simple-backtick'),
118 ]
119
120 # double-quoted string and symbol
121 for name, ttype, end in ('string', String.Double, '"'), \
122 ('sym', String.Symbol, '"'), \
123 ('backtick', String.Backtick, '`'):
124 states['simple-'+name] = [
125 include('string-escaped' if name == 'sym' else 'string-intp-escaped'),
126 (r'[^\\%s#]+' % end, ttype),
127 (r'[\\#]', ttype),
128 (end, ttype, '#pop'),
129 ]
130
131 # braced quoted strings
132 for lbrace, rbrace, bracecc, name in \
133 ('\\{', '\\}', '{}', 'cb'), \
134 ('\\[', '\\]', '\\[\\]', 'sb'), \
135 ('\\(', '\\)', '()', 'pa'), \
136 ('<', '>', '<>', 'ab'):
137 states[name+'-intp-string'] = [
138 (r'\\[' + lbrace + ']', String.Other),
139 (lbrace, String.Other, '#push'),
140 (rbrace, String.Other, '#pop'),
141 include('string-intp-escaped'),
142 (r'[\\#' + bracecc + ']', String.Other),
143 (r'[^\\#' + bracecc + ']+', String.Other),
144 ]
145 states['strings'].append((r'%' + lbrace, String.Other,
146 name+'-intp-string'))
147 states[name+'-string'] = [
148 (r'\\[\\' + bracecc + ']', String.Other),
149 (lbrace, String.Other, '#push'),
150 (rbrace, String.Other, '#pop'),
151 (r'[\\#' + bracecc + ']', String.Other),
152 (r'[^\\#' + bracecc + ']+', String.Other),
153 ]
154 # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html
155 states['strings'].append((r'%[wi]' + lbrace, String.Other,
156 name+'-string'))
157 states[name+'-regex'] = [
158 (r'\\[\\' + bracecc + ']', String.Regex),
159 (lbrace, String.Regex, '#push'),
160 (rbrace + '[imsx]*', String.Regex, '#pop'),
161 include('string-intp'),
162 (r'[\\#' + bracecc + ']', String.Regex),
163 (r'[^\\#' + bracecc + ']+', String.Regex),
164 ]
165 states['strings'].append((r'%r' + lbrace, String.Regex,
166 name+'-regex'))
167
168 # these must come after %<brace>!
169 states['strings'] += [
170 # %r regex
171 (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)',
172 intp_regex_callback),
173 # regular fancy strings with qsw
174 (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)',
175 intp_string_callback),
176 # special forms of fancy strings after operators or
177 # in method calls with braces
178 (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
179 bygroups(Text, String.Other, None)),
180 # and because of fixed width lookbehinds the whole thing a
181 # second time for line startings...
182 (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)',
183 bygroups(Text, String.Other, None)),
184 # all regular fancy strings without qsw
185 (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)',
186 intp_string_callback),
187 ]
188
189 return states
190
191 tokens = {
192 'root': [
193 (r'#.*?$', Comment.Single),
194 # keywords
195 (words('''
196 abstract asm as begin break case do else elsif end ensure extend ifdef if
197 include instance_sizeof next of pointerof private protected rescue return
198 require sizeof super then typeof unless until when while with yield
199 '''.split(), suffix=r'\b'), Keyword),
200 (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant),
201 # start of function, class and module names
202 (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)',
203 bygroups(Keyword, Text, Name.Namespace)),
204 (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)',
205 bygroups(Keyword, Text, Name.Namespace), 'funcname'),
206 (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'),
207 (r'(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)',
208 bygroups(Keyword, Text, Name.Namespace), 'classname'),
209 (r'(self|out|uninitialized)\b|(is_a|responds_to)\?', Keyword.Pseudo),
210 # macros
211 (words('''
212 debugger record pp assert_responds_to spawn parallel
213 getter setter property delegate def_hash def_equals def_equals_and_hash
214 forward_missing_to
215 '''.split(), suffix=r'\b'), Name.Builtin.Pseudo),
216 (r'getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b', Name.Builtin.Pseudo),
217 # builtins
218 # http://crystal-lang.org/api/toplevel.html
219 (words('''
220 Object Value Struct Reference Proc Class Nil Symbol Enum Void
221 Bool Number Int Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64
222 Float Float32 Float64 Char String
223 Pointer Slice Range Exception Regex
224 Mutex StaticArray Array Hash Set Tuple Deque Box Process File
225 Dir Time Channel Concurrent Scheduler
226 abort at_exit caller delay exit fork future get_stack_top gets
227 lazy loop main p print printf puts
228 raise rand read_line sleep sprintf system with_color
229 '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin),
230 # normal heredocs
231 (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)',
232 heredoc_callback),
233 # empty string heredocs
234 (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback),
235 (r'__END__', Comment.Preproc, 'end-part'),
236 # multiline regex (after keywords or assignments)
237 (r'(?:^|(?<=[=<>~!:])|'
238 r'(?<=(?:\s|;)when\s)|'
239 r'(?<=(?:\s|;)or\s)|'
240 r'(?<=(?:\s|;)and\s)|'
241 r'(?<=\.index\s)|'
242 r'(?<=\.scan\s)|'
243 r'(?<=\.sub\s)|'
244 r'(?<=\.sub!\s)|'
245 r'(?<=\.gsub\s)|'
246 r'(?<=\.gsub!\s)|'
247 r'(?<=\.match\s)|'
248 r'(?<=(?:\s|;)if\s)|'
249 r'(?<=(?:\s|;)elsif\s)|'
250 r'(?<=^when\s)|'
251 r'(?<=^index\s)|'
252 r'(?<=^scan\s)|'
253 r'(?<=^sub\s)|'
254 r'(?<=^gsub\s)|'
255 r'(?<=^sub!\s)|'
256 r'(?<=^gsub!\s)|'
257 r'(?<=^match\s)|'
258 r'(?<=^if\s)|'
259 r'(?<=^elsif\s)'
260 r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'),
261 # multiline regex (in method calls or subscripts)
262 (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'),
263 # multiline regex (this time the funny no whitespace rule)
264 (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex),
265 'multiline-regex'),
266 # lex numbers and ignore following regular expressions which
267 # are division operators in fact (grrrr. i hate that. any
268 # better ideas?)
269 # since pygments 0.7 we also eat a "?" operator after numbers
270 # so that the char operator does not work. Chars are not allowed
271 # there so that you can use the ternary operator.
272 # stupid example:
273 # x>=0?n[x]:""
274 (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
275 bygroups(Number.Oct, Text, Operator)),
276 (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
277 bygroups(Number.Hex, Text, Operator)),
278 (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
279 bygroups(Number.Bin, Text, Operator)),
280 # 3 separate expressions for floats because any of the 3 optional
281 # parts makes it a float
282 (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?'
283 r'(?:_?f[0-9]+)?)(\s*)([/?])?',
284 bygroups(Number.Float, Text, Operator)),
285 (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)'
286 r'(?:_?f[0-9]+)?)(\s*)([/?])?',
287 bygroups(Number.Float, Text, Operator)),
288 (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?'
289 r'(?:_?f[0-9]+))(\s*)([/?])?',
290 bygroups(Number.Float, Text, Operator)),
291 (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?',
292 bygroups(Number.Integer, Text, Operator)),
293 # Names
294 (r'@@[a-zA-Z_]\w*', Name.Variable.Class),
295 (r'@[a-zA-Z_]\w*', Name.Variable.Instance),
296 (r'\$\w+', Name.Variable.Global),
297 (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global),
298 (r'\$-[0adFiIlpvw]', Name.Variable.Global),
299 (r'::', Operator),
300 include('strings'),
301 # chars
302 (r'\?(\\[MC]-)*' # modifiers
303 r'(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)'
304 r'(?!\w)',
305 String.Char),
306 (r'[A-Z][A-Z_]+\b', Name.Constant),
307 # macro expansion
308 (r'\{%', String.Interpol, 'in-macro-control'),
309 (r'\{\{', String.Interpol, 'in-macro-expr'),
310 # attributes
311 (r'(@\[)(\s*)([A-Z]\w*)',
312 bygroups(Operator, Text, Name.Decorator), 'in-attr'),
313 # this is needed because Crystal attributes can look
314 # like keywords (class) or like this: ` ?!?
315 (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'),
316 bygroups(Operator, Name.Operator)),
317 (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])',
318 bygroups(Operator, Name)),
319 # Names can end with [!?] unless it's "!="
320 (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name),
321 (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|'
322 r'!~|&&?|\|\||\.{1,3})', Operator),
323 (r'[-+/*%=<>&!^|~]=?', Operator),
324 (r'[(){};,/?:\\]', Punctuation),
325 (r'\s+', Text)
326 ],
327 'funcname': [
328 (r'(?:([a-zA-Z_]\w*)(\.))?'
329 r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
330 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
331 bygroups(Name.Class, Operator, Name.Function), '#pop'),
332 default('#pop')
333 ],
334 'classname': [
335 (r'[A-Z_]\w*', Name.Class),
336 (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))',
337 bygroups(Punctuation, Text, Name.Class, Text, Punctuation)),
338 default('#pop')
339 ],
340 'in-intp': [
341 (r'\{', String.Interpol, '#push'),
342 (r'\}', String.Interpol, '#pop'),
343 include('root'),
344 ],
345 'string-intp': [
346 (r'#\{', String.Interpol, 'in-intp'),
347 ],
348 'string-escaped': [
349 (r'\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape)
350 ],
351 'string-intp-escaped': [
352 include('string-intp'),
353 include('string-escaped'),
354 ],
355 'interpolated-regex': [
356 include('string-intp'),
357 (r'[\\#]', String.Regex),
358 (r'[^\\#]+', String.Regex),
359 ],
360 'interpolated-string': [
361 include('string-intp'),
362 (r'[\\#]', String.Other),
363 (r'[^\\#]+', String.Other),
364 ],
365 'multiline-regex': [
366 include('string-intp'),
367 (r'\\\\', String.Regex),
368 (r'\\/', String.Regex),
369 (r'[\\#]', String.Regex),
370 (r'[^\\/#]+', String.Regex),
371 (r'/[imsx]*', String.Regex, '#pop'),
372 ],
373 'end-part': [
374 (r'.+', Comment.Preproc, '#pop')
375 ],
376 'in-macro-control': [
377 (r'\{%', String.Interpol, '#push'),
378 (r'%\}', String.Interpol, '#pop'),
379 (r'for\b|in\b', Keyword),
380 include('root'),
381 ],
382 'in-macro-expr': [
383 (r'\{\{', String.Interpol, '#push'),
384 (r'\}\}', String.Interpol, '#pop'),
385 include('root'),
386 ],
387 'in-attr': [
388 (r'\[', Operator, '#push'),
389 (r'\]', Operator, '#pop'),
390 include('root'),
391 ],
392 }
393 tokens.update(gen_crystalstrings_rules())

eric ide

mercurial