|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.crystal |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for Crystal. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import ExtendedRegexLexer, include, \ |
|
15 bygroups, default, LexerContext, words |
|
16 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
17 Number, Punctuation, Error |
|
18 |
|
19 __all__ = ['CrystalLexer'] |
|
20 |
|
21 line_re = re.compile('.*?\n') |
|
22 |
|
23 |
|
24 CRYSTAL_OPERATORS = [ |
|
25 '!=', '!~', '!', '%', '&&', '&', '**', '*', '+', '-', '/', '<=>', '<<', '<=', '<', |
|
26 '===', '==', '=~', '=', '>=', '>>', '>', '[]=', '[]?', '[]', '^', '||', '|', '~' |
|
27 ] |
|
28 |
|
29 |
|
30 class CrystalLexer(ExtendedRegexLexer): |
|
31 """ |
|
32 For `Crystal <http://crystal-lang.org>`_ source code. |
|
33 |
|
34 .. versionadded:: 2.2 |
|
35 """ |
|
36 |
|
37 name = 'Crystal' |
|
38 aliases = ['cr', 'crystal'] |
|
39 filenames = ['*.cr'] |
|
40 mimetypes = ['text/x-crystal'] |
|
41 |
|
42 flags = re.DOTALL | re.MULTILINE |
|
43 |
|
44 def heredoc_callback(self, match, ctx): |
|
45 # okay, this is the hardest part of parsing Crystal... |
|
46 # match: 1 = <<-?, 2 = quote? 3 = name 4 = quote? 5 = rest of line |
|
47 |
|
48 start = match.start(1) |
|
49 yield start, Operator, match.group(1) # <<-? |
|
50 yield match.start(2), String.Heredoc, match.group(2) # quote ", ', ` |
|
51 yield match.start(3), String.Delimiter, match.group(3) # heredoc name |
|
52 yield match.start(4), String.Heredoc, match.group(4) # quote again |
|
53 |
|
54 heredocstack = ctx.__dict__.setdefault('heredocstack', []) |
|
55 outermost = not bool(heredocstack) |
|
56 heredocstack.append((match.group(1) == '<<-', match.group(3))) |
|
57 |
|
58 ctx.pos = match.start(5) |
|
59 ctx.end = match.end(5) |
|
60 # this may find other heredocs |
|
61 for i, t, v in self.get_tokens_unprocessed(context=ctx): |
|
62 yield i, t, v |
|
63 ctx.pos = match.end() |
|
64 |
|
65 if outermost: |
|
66 # this is the outer heredoc again, now we can process them all |
|
67 for tolerant, hdname in heredocstack: |
|
68 lines = [] |
|
69 for match in line_re.finditer(ctx.text, ctx.pos): |
|
70 if tolerant: |
|
71 check = match.group().strip() |
|
72 else: |
|
73 check = match.group().rstrip() |
|
74 if check == hdname: |
|
75 for amatch in lines: |
|
76 yield amatch.start(), String.Heredoc, amatch.group() |
|
77 yield match.start(), String.Delimiter, match.group() |
|
78 ctx.pos = match.end() |
|
79 break |
|
80 else: |
|
81 lines.append(match) |
|
82 else: |
|
83 # end of heredoc not found -- error! |
|
84 for amatch in lines: |
|
85 yield amatch.start(), Error, amatch.group() |
|
86 ctx.end = len(ctx.text) |
|
87 del heredocstack[:] |
|
88 |
|
89 def gen_crystalstrings_rules(): |
|
90 def intp_regex_callback(self, match, ctx): |
|
91 yield match.start(1), String.Regex, match.group(1) # begin |
|
92 nctx = LexerContext(match.group(3), 0, ['interpolated-regex']) |
|
93 for i, t, v in self.get_tokens_unprocessed(context=nctx): |
|
94 yield match.start(3)+i, t, v |
|
95 yield match.start(4), String.Regex, match.group(4) # end[imsx]* |
|
96 ctx.pos = match.end() |
|
97 |
|
98 def intp_string_callback(self, match, ctx): |
|
99 yield match.start(1), String.Other, match.group(1) |
|
100 nctx = LexerContext(match.group(3), 0, ['interpolated-string']) |
|
101 for i, t, v in self.get_tokens_unprocessed(context=nctx): |
|
102 yield match.start(3)+i, t, v |
|
103 yield match.start(4), String.Other, match.group(4) # end |
|
104 ctx.pos = match.end() |
|
105 |
|
106 states = {} |
|
107 states['strings'] = [ |
|
108 (r'\:@{0,2}[a-zA-Z_]\w*[!?]?', String.Symbol), |
|
109 (words(CRYSTAL_OPERATORS, prefix=r'\:@{0,2}'), String.Symbol), |
|
110 (r":'(\\\\|\\'|[^'])*'", String.Symbol), |
|
111 # This allows arbitrary text after '\ for simplicity |
|
112 (r"'(\\\\|\\'|[^']|\\[^'\\]+)'", String.Char), |
|
113 (r':"', String.Symbol, 'simple-sym'), |
|
114 # Crystal doesn't have "symbol:"s but this simplifies function args |
|
115 (r'([a-zA-Z_]\w*)(:)(?!:)', bygroups(String.Symbol, Punctuation)), |
|
116 (r'"', String.Double, 'simple-string'), |
|
117 (r'(?<!\.)`', String.Backtick, 'simple-backtick'), |
|
118 ] |
|
119 |
|
120 # double-quoted string and symbol |
|
121 for name, ttype, end in ('string', String.Double, '"'), \ |
|
122 ('sym', String.Symbol, '"'), \ |
|
123 ('backtick', String.Backtick, '`'): |
|
124 states['simple-'+name] = [ |
|
125 include('string-escaped' if name == 'sym' else 'string-intp-escaped'), |
|
126 (r'[^\\%s#]+' % end, ttype), |
|
127 (r'[\\#]', ttype), |
|
128 (end, ttype, '#pop'), |
|
129 ] |
|
130 |
|
131 # braced quoted strings |
|
132 for lbrace, rbrace, bracecc, name in \ |
|
133 ('\\{', '\\}', '{}', 'cb'), \ |
|
134 ('\\[', '\\]', '\\[\\]', 'sb'), \ |
|
135 ('\\(', '\\)', '()', 'pa'), \ |
|
136 ('<', '>', '<>', 'ab'): |
|
137 states[name+'-intp-string'] = [ |
|
138 (r'\\[' + lbrace + ']', String.Other), |
|
139 (lbrace, String.Other, '#push'), |
|
140 (rbrace, String.Other, '#pop'), |
|
141 include('string-intp-escaped'), |
|
142 (r'[\\#' + bracecc + ']', String.Other), |
|
143 (r'[^\\#' + bracecc + ']+', String.Other), |
|
144 ] |
|
145 states['strings'].append((r'%' + lbrace, String.Other, |
|
146 name+'-intp-string')) |
|
147 states[name+'-string'] = [ |
|
148 (r'\\[\\' + bracecc + ']', String.Other), |
|
149 (lbrace, String.Other, '#push'), |
|
150 (rbrace, String.Other, '#pop'), |
|
151 (r'[\\#' + bracecc + ']', String.Other), |
|
152 (r'[^\\#' + bracecc + ']+', String.Other), |
|
153 ] |
|
154 # http://crystal-lang.org/docs/syntax_and_semantics/literals/array.html |
|
155 states['strings'].append((r'%[wi]' + lbrace, String.Other, |
|
156 name+'-string')) |
|
157 states[name+'-regex'] = [ |
|
158 (r'\\[\\' + bracecc + ']', String.Regex), |
|
159 (lbrace, String.Regex, '#push'), |
|
160 (rbrace + '[imsx]*', String.Regex, '#pop'), |
|
161 include('string-intp'), |
|
162 (r'[\\#' + bracecc + ']', String.Regex), |
|
163 (r'[^\\#' + bracecc + ']+', String.Regex), |
|
164 ] |
|
165 states['strings'].append((r'%r' + lbrace, String.Regex, |
|
166 name+'-regex')) |
|
167 |
|
168 # these must come after %<brace>! |
|
169 states['strings'] += [ |
|
170 # %r regex |
|
171 (r'(%r([\W_]))((?:\\\2|(?!\2).)*)(\2[imsx]*)', |
|
172 intp_regex_callback), |
|
173 # regular fancy strings with qsw |
|
174 (r'(%[wi]([\W_]))((?:\\\2|(?!\2).)*)(\2)', |
|
175 intp_string_callback), |
|
176 # special forms of fancy strings after operators or |
|
177 # in method calls with braces |
|
178 (r'(?<=[-+/*%=<>&!^|~,(])(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', |
|
179 bygroups(Text, String.Other, None)), |
|
180 # and because of fixed width lookbehinds the whole thing a |
|
181 # second time for line startings... |
|
182 (r'^(\s*)(%([\t ])(?:(?:\\\3|(?!\3).)*)\3)', |
|
183 bygroups(Text, String.Other, None)), |
|
184 # all regular fancy strings without qsw |
|
185 (r'(%([\[{(<]))((?:\\\2|(?!\2).)*)(\2)', |
|
186 intp_string_callback), |
|
187 ] |
|
188 |
|
189 return states |
|
190 |
|
191 tokens = { |
|
192 'root': [ |
|
193 (r'#.*?$', Comment.Single), |
|
194 # keywords |
|
195 (words(''' |
|
196 abstract asm as begin break case do else elsif end ensure extend ifdef if |
|
197 include instance_sizeof next of pointerof private protected rescue return |
|
198 require sizeof super then typeof unless until when while with yield |
|
199 '''.split(), suffix=r'\b'), Keyword), |
|
200 (words(['true', 'false', 'nil'], suffix=r'\b'), Keyword.Constant), |
|
201 # start of function, class and module names |
|
202 (r'(module|lib)(\s+)([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)', |
|
203 bygroups(Keyword, Text, Name.Namespace)), |
|
204 (r'(def|fun|macro)(\s+)((?:[a-zA-Z_]\w*::)*)', |
|
205 bygroups(Keyword, Text, Name.Namespace), 'funcname'), |
|
206 (r'def(?=[*%&^`~+-/\[<>=])', Keyword, 'funcname'), |
|
207 (r'(class|struct|union|type|alias|enum)(\s+)((?:[a-zA-Z_]\w*::)*)', |
|
208 bygroups(Keyword, Text, Name.Namespace), 'classname'), |
|
209 (r'(self|out|uninitialized)\b|(is_a|responds_to)\?', Keyword.Pseudo), |
|
210 # macros |
|
211 (words(''' |
|
212 debugger record pp assert_responds_to spawn parallel |
|
213 getter setter property delegate def_hash def_equals def_equals_and_hash |
|
214 forward_missing_to |
|
215 '''.split(), suffix=r'\b'), Name.Builtin.Pseudo), |
|
216 (r'getter[!?]|property[!?]|__(DIR|FILE|LINE)__\b', Name.Builtin.Pseudo), |
|
217 # builtins |
|
218 # http://crystal-lang.org/api/toplevel.html |
|
219 (words(''' |
|
220 Object Value Struct Reference Proc Class Nil Symbol Enum Void |
|
221 Bool Number Int Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 |
|
222 Float Float32 Float64 Char String |
|
223 Pointer Slice Range Exception Regex |
|
224 Mutex StaticArray Array Hash Set Tuple Deque Box Process File |
|
225 Dir Time Channel Concurrent Scheduler |
|
226 abort at_exit caller delay exit fork future get_stack_top gets |
|
227 lazy loop main p print printf puts |
|
228 raise rand read_line sleep sprintf system with_color |
|
229 '''.split(), prefix=r'(?<!\.)', suffix=r'\b'), Name.Builtin), |
|
230 # normal heredocs |
|
231 (r'(?<!\w)(<<-?)(["`\']?)([a-zA-Z_]\w*)(\2)(.*?\n)', |
|
232 heredoc_callback), |
|
233 # empty string heredocs |
|
234 (r'(<<-?)("|\')()(\2)(.*?\n)', heredoc_callback), |
|
235 (r'__END__', Comment.Preproc, 'end-part'), |
|
236 # multiline regex (after keywords or assignments) |
|
237 (r'(?:^|(?<=[=<>~!:])|' |
|
238 r'(?<=(?:\s|;)when\s)|' |
|
239 r'(?<=(?:\s|;)or\s)|' |
|
240 r'(?<=(?:\s|;)and\s)|' |
|
241 r'(?<=\.index\s)|' |
|
242 r'(?<=\.scan\s)|' |
|
243 r'(?<=\.sub\s)|' |
|
244 r'(?<=\.sub!\s)|' |
|
245 r'(?<=\.gsub\s)|' |
|
246 r'(?<=\.gsub!\s)|' |
|
247 r'(?<=\.match\s)|' |
|
248 r'(?<=(?:\s|;)if\s)|' |
|
249 r'(?<=(?:\s|;)elsif\s)|' |
|
250 r'(?<=^when\s)|' |
|
251 r'(?<=^index\s)|' |
|
252 r'(?<=^scan\s)|' |
|
253 r'(?<=^sub\s)|' |
|
254 r'(?<=^gsub\s)|' |
|
255 r'(?<=^sub!\s)|' |
|
256 r'(?<=^gsub!\s)|' |
|
257 r'(?<=^match\s)|' |
|
258 r'(?<=^if\s)|' |
|
259 r'(?<=^elsif\s)' |
|
260 r')(\s*)(/)', bygroups(Text, String.Regex), 'multiline-regex'), |
|
261 # multiline regex (in method calls or subscripts) |
|
262 (r'(?<=\(|,|\[)/', String.Regex, 'multiline-regex'), |
|
263 # multiline regex (this time the funny no whitespace rule) |
|
264 (r'(\s+)(/)(?![\s=])', bygroups(Text, String.Regex), |
|
265 'multiline-regex'), |
|
266 # lex numbers and ignore following regular expressions which |
|
267 # are division operators in fact (grrrr. i hate that. any |
|
268 # better ideas?) |
|
269 # since pygments 0.7 we also eat a "?" operator after numbers |
|
270 # so that the char operator does not work. Chars are not allowed |
|
271 # there so that you can use the ternary operator. |
|
272 # stupid example: |
|
273 # x>=0?n[x]:"" |
|
274 (r'(0o[0-7]+(?:_[0-7]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', |
|
275 bygroups(Number.Oct, Text, Operator)), |
|
276 (r'(0x[0-9A-Fa-f]+(?:_[0-9A-Fa-f]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', |
|
277 bygroups(Number.Hex, Text, Operator)), |
|
278 (r'(0b[01]+(?:_[01]+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', |
|
279 bygroups(Number.Bin, Text, Operator)), |
|
280 # 3 separate expressions for floats because any of the 3 optional |
|
281 # parts makes it a float |
|
282 (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)(?:e[+-]?[0-9]+)?' |
|
283 r'(?:_?f[0-9]+)?)(\s*)([/?])?', |
|
284 bygroups(Number.Float, Text, Operator)), |
|
285 (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)' |
|
286 r'(?:_?f[0-9]+)?)(\s*)([/?])?', |
|
287 bygroups(Number.Float, Text, Operator)), |
|
288 (r'((?:0(?![0-9])|[1-9][\d_]*)(?:\.\d[\d_]*)?(?:e[+-]?[0-9]+)?' |
|
289 r'(?:_?f[0-9]+))(\s*)([/?])?', |
|
290 bygroups(Number.Float, Text, Operator)), |
|
291 (r'(0\b|[1-9][\d]*(?:_\d+)*(?:_?[iu][0-9]+)?)\b(\s*)([/?])?', |
|
292 bygroups(Number.Integer, Text, Operator)), |
|
293 # Names |
|
294 (r'@@[a-zA-Z_]\w*', Name.Variable.Class), |
|
295 (r'@[a-zA-Z_]\w*', Name.Variable.Instance), |
|
296 (r'\$\w+', Name.Variable.Global), |
|
297 (r'\$[!@&`\'+~=/\\,;.<>_*$?:"^-]', Name.Variable.Global), |
|
298 (r'\$-[0adFiIlpvw]', Name.Variable.Global), |
|
299 (r'::', Operator), |
|
300 include('strings'), |
|
301 # chars |
|
302 (r'\?(\\[MC]-)*' # modifiers |
|
303 r'(\\([\\befnrtv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)' |
|
304 r'(?!\w)', |
|
305 String.Char), |
|
306 (r'[A-Z][A-Z_]+\b', Name.Constant), |
|
307 # macro expansion |
|
308 (r'\{%', String.Interpol, 'in-macro-control'), |
|
309 (r'\{\{', String.Interpol, 'in-macro-expr'), |
|
310 # attributes |
|
311 (r'(@\[)(\s*)([A-Z]\w*)', |
|
312 bygroups(Operator, Text, Name.Decorator), 'in-attr'), |
|
313 # this is needed because Crystal attributes can look |
|
314 # like keywords (class) or like this: ` ?!? |
|
315 (words(CRYSTAL_OPERATORS, prefix=r'(\.|::)'), |
|
316 bygroups(Operator, Name.Operator)), |
|
317 (r'(\.|::)([a-zA-Z_]\w*[!?]?|[*%&^`~+\-/\[<>=])', |
|
318 bygroups(Operator, Name)), |
|
319 # Names can end with [!?] unless it's "!=" |
|
320 (r'[a-zA-Z_]\w*(?:[!?](?!=))?', Name), |
|
321 (r'(\[|\]\??|\*\*|<=>?|>=|<<?|>>?|=~|===|' |
|
322 r'!~|&&?|\|\||\.{1,3})', Operator), |
|
323 (r'[-+/*%=<>&!^|~]=?', Operator), |
|
324 (r'[(){};,/?:\\]', Punctuation), |
|
325 (r'\s+', Text) |
|
326 ], |
|
327 'funcname': [ |
|
328 (r'(?:([a-zA-Z_]\w*)(\.))?' |
|
329 r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|' |
|
330 r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)', |
|
331 bygroups(Name.Class, Operator, Name.Function), '#pop'), |
|
332 default('#pop') |
|
333 ], |
|
334 'classname': [ |
|
335 (r'[A-Z_]\w*', Name.Class), |
|
336 (r'(\()(\s*)([A-Z_]\w*)(\s*)(\))', |
|
337 bygroups(Punctuation, Text, Name.Class, Text, Punctuation)), |
|
338 default('#pop') |
|
339 ], |
|
340 'in-intp': [ |
|
341 (r'\{', String.Interpol, '#push'), |
|
342 (r'\}', String.Interpol, '#pop'), |
|
343 include('root'), |
|
344 ], |
|
345 'string-intp': [ |
|
346 (r'#\{', String.Interpol, 'in-intp'), |
|
347 ], |
|
348 'string-escaped': [ |
|
349 (r'\\([\\befnstv#"\']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})', String.Escape) |
|
350 ], |
|
351 'string-intp-escaped': [ |
|
352 include('string-intp'), |
|
353 include('string-escaped'), |
|
354 ], |
|
355 'interpolated-regex': [ |
|
356 include('string-intp'), |
|
357 (r'[\\#]', String.Regex), |
|
358 (r'[^\\#]+', String.Regex), |
|
359 ], |
|
360 'interpolated-string': [ |
|
361 include('string-intp'), |
|
362 (r'[\\#]', String.Other), |
|
363 (r'[^\\#]+', String.Other), |
|
364 ], |
|
365 'multiline-regex': [ |
|
366 include('string-intp'), |
|
367 (r'\\\\', String.Regex), |
|
368 (r'\\/', String.Regex), |
|
369 (r'[\\#]', String.Regex), |
|
370 (r'[^\\/#]+', String.Regex), |
|
371 (r'/[imsx]*', String.Regex, '#pop'), |
|
372 ], |
|
373 'end-part': [ |
|
374 (r'.+', Comment.Preproc, '#pop') |
|
375 ], |
|
376 'in-macro-control': [ |
|
377 (r'\{%', String.Interpol, '#push'), |
|
378 (r'%\}', String.Interpol, '#pop'), |
|
379 (r'for\b|in\b', Keyword), |
|
380 include('root'), |
|
381 ], |
|
382 'in-macro-expr': [ |
|
383 (r'\{\{', String.Interpol, '#push'), |
|
384 (r'\}\}', String.Interpol, '#pop'), |
|
385 include('root'), |
|
386 ], |
|
387 'in-attr': [ |
|
388 (r'\[', Operator, '#push'), |
|
389 (r'\]', Operator, '#pop'), |
|
390 include('root'), |
|
391 ], |
|
392 } |
|
393 tokens.update(gen_crystalstrings_rules()) |