|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.parsers |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexers for parser generators. |
|
7 |
|
8 :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 import re |
|
13 |
|
14 from pygments.lexer import RegexLexer, DelegatingLexer, \ |
|
15 include, bygroups, using |
|
16 from pygments.token import Punctuation, Other, Text, Comment, Operator, \ |
|
17 Keyword, Name, String, Number, Whitespace |
|
18 from pygments.lexers.jvm import JavaLexer |
|
19 from pygments.lexers.c_cpp import CLexer, CppLexer |
|
20 from pygments.lexers.objective import ObjectiveCLexer |
|
21 from pygments.lexers.d import DLexer |
|
22 from pygments.lexers.dotnet import CSharpLexer |
|
23 from pygments.lexers.ruby import RubyLexer |
|
24 from pygments.lexers.python import PythonLexer |
|
25 from pygments.lexers.perl import PerlLexer |
|
26 |
|
27 __all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer', |
|
28 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer', |
|
29 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer', |
|
30 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer', |
|
31 # 'AntlrCLexer', |
|
32 'AntlrCSharpLexer', 'AntlrObjectiveCLexer', |
|
33 'AntlrJavaLexer', 'AntlrActionScriptLexer', |
|
34 'TreetopLexer', 'EbnfLexer'] |
|
35 |
|
36 |
|
37 class RagelLexer(RegexLexer): |
|
38 """ |
|
39 A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for |
|
40 fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead |
|
41 (or one of the language-specific subclasses). |
|
42 |
|
43 .. versionadded:: 1.1 |
|
44 """ |
|
45 |
|
46 name = 'Ragel' |
|
47 aliases = ['ragel'] |
|
48 filenames = [] |
|
49 |
|
50 tokens = { |
|
51 'whitespace': [ |
|
52 (r'\s+', Whitespace) |
|
53 ], |
|
54 'comments': [ |
|
55 (r'\#.*$', Comment), |
|
56 ], |
|
57 'keywords': [ |
|
58 (r'(access|action|alphtype)\b', Keyword), |
|
59 (r'(getkey|write|machine|include)\b', Keyword), |
|
60 (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword), |
|
61 (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword) |
|
62 ], |
|
63 'numbers': [ |
|
64 (r'0x[0-9A-Fa-f]+', Number.Hex), |
|
65 (r'[+-]?[0-9]+', Number.Integer), |
|
66 ], |
|
67 'literals': [ |
|
68 (r'"(\\\\|\\"|[^"])*"', String), # double quote string |
|
69 (r"'(\\\\|\\'|[^'])*'", String), # single quote string |
|
70 (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals |
|
71 (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions |
|
72 ], |
|
73 'identifiers': [ |
|
74 (r'[a-zA-Z_]\w*', Name.Variable), |
|
75 ], |
|
76 'operators': [ |
|
77 (r',', Operator), # Join |
|
78 (r'\||&|--?', Operator), # Union, Intersection and Subtraction |
|
79 (r'\.|<:|:>>?', Operator), # Concatention |
|
80 (r':', Operator), # Label |
|
81 (r'->', Operator), # Epsilon Transition |
|
82 (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions |
|
83 (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions |
|
84 (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions |
|
85 (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions |
|
86 (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions |
|
87 (r'>|@|\$|%', Operator), # Transition Actions and Priorities |
|
88 (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition |
|
89 (r'!|\^', Operator), # Negation |
|
90 (r'\(|\)', Operator), # Grouping |
|
91 ], |
|
92 'root': [ |
|
93 include('literals'), |
|
94 include('whitespace'), |
|
95 include('comments'), |
|
96 include('keywords'), |
|
97 include('numbers'), |
|
98 include('identifiers'), |
|
99 include('operators'), |
|
100 (r'\{', Punctuation, 'host'), |
|
101 (r'=', Operator), |
|
102 (r';', Punctuation), |
|
103 ], |
|
104 'host': [ |
|
105 (r'(' + r'|'.join(( # keep host code in largest possible chunks |
|
106 r'[^{}\'"/#]+', # exclude unsafe characters |
|
107 r'[^\\]\\[{}]', # allow escaped { or } |
|
108 |
|
109 # strings and comments may safely contain unsafe characters |
|
110 r'"(\\\\|\\"|[^"])*"', # double quote string |
|
111 r"'(\\\\|\\'|[^'])*'", # single quote string |
|
112 r'//.*$\n?', # single line comment |
|
113 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment |
|
114 r'\#.*$\n?', # ruby comment |
|
115 |
|
116 # regular expression: There's no reason for it to start |
|
117 # with a * and this stops confusion with comments. |
|
118 r'/(?!\*)(\\\\|\\/|[^/])*/', |
|
119 |
|
120 # / is safe now that we've handled regex and javadoc comments |
|
121 r'/', |
|
122 )) + r')+', Other), |
|
123 |
|
124 (r'\{', Punctuation, '#push'), |
|
125 (r'\}', Punctuation, '#pop'), |
|
126 ], |
|
127 } |
|
128 |
|
129 |
|
130 class RagelEmbeddedLexer(RegexLexer): |
|
131 """ |
|
132 A lexer for `Ragel`_ embedded in a host language file. |
|
133 |
|
134 This will only highlight Ragel statements. If you want host language |
|
135 highlighting then call the language-specific Ragel lexer. |
|
136 |
|
137 .. versionadded:: 1.1 |
|
138 """ |
|
139 |
|
140 name = 'Embedded Ragel' |
|
141 aliases = ['ragel-em'] |
|
142 filenames = ['*.rl'] |
|
143 |
|
144 tokens = { |
|
145 'root': [ |
|
146 (r'(' + r'|'.join(( # keep host code in largest possible chunks |
|
147 r'[^%\'"/#]+', # exclude unsafe characters |
|
148 r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them |
|
149 |
|
150 # strings and comments may safely contain unsafe characters |
|
151 r'"(\\\\|\\"|[^"])*"', # double quote string |
|
152 r"'(\\\\|\\'|[^'])*'", # single quote string |
|
153 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment |
|
154 r'//.*$\n?', # single line comment |
|
155 r'\#.*$\n?', # ruby/ragel comment |
|
156 r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression |
|
157 |
|
158 # / is safe now that we've handled regex and javadoc comments |
|
159 r'/', |
|
160 )) + r')+', Other), |
|
161 |
|
162 # Single Line FSM. |
|
163 # Please don't put a quoted newline in a single line FSM. |
|
164 # That's just mean. It will break this. |
|
165 (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation, |
|
166 using(RagelLexer), |
|
167 Punctuation, Text)), |
|
168 |
|
169 # Multi Line FSM. |
|
170 (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'), |
|
171 ], |
|
172 'multi-line-fsm': [ |
|
173 (r'(' + r'|'.join(( # keep ragel code in largest possible chunks. |
|
174 r'(' + r'|'.join(( |
|
175 r'[^}\'"\[/#]', # exclude unsafe characters |
|
176 r'\}(?=[^%]|$)', # } is okay as long as it's not followed by % |
|
177 r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two... |
|
178 r'[^\\]\\[{}]', # ...and } is okay if it's escaped |
|
179 |
|
180 # allow / if it's preceded with one of these symbols |
|
181 # (ragel EOF actions) |
|
182 r'(>|\$|%|<|@|<>)/', |
|
183 |
|
184 # specifically allow regex followed immediately by * |
|
185 # so it doesn't get mistaken for a comment |
|
186 r'/(?!\*)(\\\\|\\/|[^/])*/\*', |
|
187 |
|
188 # allow / as long as it's not followed by another / or by a * |
|
189 r'/(?=[^/*]|$)', |
|
190 |
|
191 # We want to match as many of these as we can in one block. |
|
192 # Not sure if we need the + sign here, |
|
193 # does it help performance? |
|
194 )) + r')+', |
|
195 |
|
196 # strings and comments may safely contain unsafe characters |
|
197 r'"(\\\\|\\"|[^"])*"', # double quote string |
|
198 r"'(\\\\|\\'|[^'])*'", # single quote string |
|
199 r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal |
|
200 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment |
|
201 r'//.*$\n?', # single line comment |
|
202 r'\#.*$\n?', # ruby/ragel comment |
|
203 )) + r')+', using(RagelLexer)), |
|
204 |
|
205 (r'\}%%', Punctuation, '#pop'), |
|
206 ] |
|
207 } |
|
208 |
|
209 def analyse_text(text): |
|
210 return '@LANG: indep' in text |
|
211 |
|
212 |
|
213 class RagelRubyLexer(DelegatingLexer): |
|
214 """ |
|
215 A lexer for `Ragel`_ in a Ruby host file. |
|
216 |
|
217 .. versionadded:: 1.1 |
|
218 """ |
|
219 |
|
220 name = 'Ragel in Ruby Host' |
|
221 aliases = ['ragel-ruby', 'ragel-rb'] |
|
222 filenames = ['*.rl'] |
|
223 |
|
224 def __init__(self, **options): |
|
225 super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer, |
|
226 **options) |
|
227 |
|
228 def analyse_text(text): |
|
229 return '@LANG: ruby' in text |
|
230 |
|
231 |
|
232 class RagelCLexer(DelegatingLexer): |
|
233 """ |
|
234 A lexer for `Ragel`_ in a C host file. |
|
235 |
|
236 .. versionadded:: 1.1 |
|
237 """ |
|
238 |
|
239 name = 'Ragel in C Host' |
|
240 aliases = ['ragel-c'] |
|
241 filenames = ['*.rl'] |
|
242 |
|
243 def __init__(self, **options): |
|
244 super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer, |
|
245 **options) |
|
246 |
|
247 def analyse_text(text): |
|
248 return '@LANG: c' in text |
|
249 |
|
250 |
|
251 class RagelDLexer(DelegatingLexer): |
|
252 """ |
|
253 A lexer for `Ragel`_ in a D host file. |
|
254 |
|
255 .. versionadded:: 1.1 |
|
256 """ |
|
257 |
|
258 name = 'Ragel in D Host' |
|
259 aliases = ['ragel-d'] |
|
260 filenames = ['*.rl'] |
|
261 |
|
262 def __init__(self, **options): |
|
263 super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options) |
|
264 |
|
265 def analyse_text(text): |
|
266 return '@LANG: d' in text |
|
267 |
|
268 |
|
269 class RagelCppLexer(DelegatingLexer): |
|
270 """ |
|
271 A lexer for `Ragel`_ in a CPP host file. |
|
272 |
|
273 .. versionadded:: 1.1 |
|
274 """ |
|
275 |
|
276 name = 'Ragel in CPP Host' |
|
277 aliases = ['ragel-cpp'] |
|
278 filenames = ['*.rl'] |
|
279 |
|
280 def __init__(self, **options): |
|
281 super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options) |
|
282 |
|
283 def analyse_text(text): |
|
284 return '@LANG: c++' in text |
|
285 |
|
286 |
|
287 class RagelObjectiveCLexer(DelegatingLexer): |
|
288 """ |
|
289 A lexer for `Ragel`_ in an Objective C host file. |
|
290 |
|
291 .. versionadded:: 1.1 |
|
292 """ |
|
293 |
|
294 name = 'Ragel in Objective C Host' |
|
295 aliases = ['ragel-objc'] |
|
296 filenames = ['*.rl'] |
|
297 |
|
298 def __init__(self, **options): |
|
299 super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer, |
|
300 RagelEmbeddedLexer, |
|
301 **options) |
|
302 |
|
303 def analyse_text(text): |
|
304 return '@LANG: objc' in text |
|
305 |
|
306 |
|
307 class RagelJavaLexer(DelegatingLexer): |
|
308 """ |
|
309 A lexer for `Ragel`_ in a Java host file. |
|
310 |
|
311 .. versionadded:: 1.1 |
|
312 """ |
|
313 |
|
314 name = 'Ragel in Java Host' |
|
315 aliases = ['ragel-java'] |
|
316 filenames = ['*.rl'] |
|
317 |
|
318 def __init__(self, **options): |
|
319 super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer, |
|
320 **options) |
|
321 |
|
322 def analyse_text(text): |
|
323 return '@LANG: java' in text |
|
324 |
|
325 |
|
326 class AntlrLexer(RegexLexer): |
|
327 """ |
|
328 Generic `ANTLR`_ Lexer. |
|
329 Should not be called directly, instead |
|
330 use DelegatingLexer for your target language. |
|
331 |
|
332 .. versionadded:: 1.1 |
|
333 |
|
334 .. _ANTLR: http://www.antlr.org/ |
|
335 """ |
|
336 |
|
337 name = 'ANTLR' |
|
338 aliases = ['antlr'] |
|
339 filenames = [] |
|
340 |
|
341 _id = r'[A-Za-z]\w*' |
|
342 _TOKEN_REF = r'[A-Z]\w*' |
|
343 _RULE_REF = r'[a-z]\w*' |
|
344 _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\'' |
|
345 _INT = r'[0-9]+' |
|
346 |
|
347 tokens = { |
|
348 'whitespace': [ |
|
349 (r'\s+', Whitespace), |
|
350 ], |
|
351 'comments': [ |
|
352 (r'//.*$', Comment), |
|
353 (r'/\*(.|\n)*?\*/', Comment), |
|
354 ], |
|
355 'root': [ |
|
356 include('whitespace'), |
|
357 include('comments'), |
|
358 |
|
359 (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)', |
|
360 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class, |
|
361 Punctuation)), |
|
362 # optionsSpec |
|
363 (r'options\b', Keyword, 'options'), |
|
364 # tokensSpec |
|
365 (r'tokens\b', Keyword, 'tokens'), |
|
366 # attrScope |
|
367 (r'(scope)(\s*)(' + _id + r')(\s*)(\{)', |
|
368 bygroups(Keyword, Whitespace, Name.Variable, Whitespace, |
|
369 Punctuation), 'action'), |
|
370 # exception |
|
371 (r'(catch|finally)\b', Keyword, 'exception'), |
|
372 # action |
|
373 (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)', |
|
374 bygroups(Name.Label, Whitespace, Punctuation, Whitespace, |
|
375 Name.Label, Whitespace, Punctuation), 'action'), |
|
376 # rule |
|
377 (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?', |
|
378 bygroups(Keyword, Whitespace, Name.Label, Punctuation), |
|
379 ('rule-alts', 'rule-prelims')), |
|
380 ], |
|
381 'exception': [ |
|
382 (r'\n', Whitespace, '#pop'), |
|
383 (r'\s', Whitespace), |
|
384 include('comments'), |
|
385 |
|
386 (r'\[', Punctuation, 'nested-arg-action'), |
|
387 (r'\{', Punctuation, 'action'), |
|
388 ], |
|
389 'rule-prelims': [ |
|
390 include('whitespace'), |
|
391 include('comments'), |
|
392 |
|
393 (r'returns\b', Keyword), |
|
394 (r'\[', Punctuation, 'nested-arg-action'), |
|
395 (r'\{', Punctuation, 'action'), |
|
396 # throwsSpec |
|
397 (r'(throws)(\s+)(' + _id + ')', |
|
398 bygroups(Keyword, Whitespace, Name.Label)), |
|
399 (r'(,)(\s*)(' + _id + ')', |
|
400 bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws |
|
401 # optionsSpec |
|
402 (r'options\b', Keyword, 'options'), |
|
403 # ruleScopeSpec - scope followed by target language code or name of action |
|
404 # TODO finish implementing other possibilities for scope |
|
405 # L173 ANTLRv3.g from ANTLR book |
|
406 (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation), |
|
407 'action'), |
|
408 (r'(scope)(\s+)(' + _id + r')(\s*)(;)', |
|
409 bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)), |
|
410 # ruleAction |
|
411 (r'(@' + _id + r')(\s*)(\{)', |
|
412 bygroups(Name.Label, Whitespace, Punctuation), 'action'), |
|
413 # finished prelims, go to rule alts! |
|
414 (r':', Punctuation, '#pop') |
|
415 ], |
|
416 'rule-alts': [ |
|
417 include('whitespace'), |
|
418 include('comments'), |
|
419 |
|
420 # These might need to go in a separate 'block' state triggered by ( |
|
421 (r'options\b', Keyword, 'options'), |
|
422 (r':', Punctuation), |
|
423 |
|
424 # literals |
|
425 (r"'(\\\\|\\'|[^'])*'", String), |
|
426 (r'"(\\\\|\\"|[^"])*"', String), |
|
427 (r'<<([^>]|>[^>])>>', String), |
|
428 # identifiers |
|
429 # Tokens start with capital letter. |
|
430 (r'\$?[A-Z_]\w*', Name.Constant), |
|
431 # Rules start with small letter. |
|
432 (r'\$?[a-z_]\w*', Name.Variable), |
|
433 # operators |
|
434 (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator), |
|
435 (r',', Punctuation), |
|
436 (r'\[', Punctuation, 'nested-arg-action'), |
|
437 (r'\{', Punctuation, 'action'), |
|
438 (r';', Punctuation, '#pop') |
|
439 ], |
|
440 'tokens': [ |
|
441 include('whitespace'), |
|
442 include('comments'), |
|
443 (r'\{', Punctuation), |
|
444 (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL |
|
445 + r')?(\s*)(;)', |
|
446 bygroups(Name.Label, Whitespace, Punctuation, Whitespace, |
|
447 String, Whitespace, Punctuation)), |
|
448 (r'\}', Punctuation, '#pop'), |
|
449 ], |
|
450 'options': [ |
|
451 include('whitespace'), |
|
452 include('comments'), |
|
453 (r'\{', Punctuation), |
|
454 (r'(' + _id + r')(\s*)(=)(\s*)(' + |
|
455 '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)', |
|
456 bygroups(Name.Variable, Whitespace, Punctuation, Whitespace, |
|
457 Text, Whitespace, Punctuation)), |
|
458 (r'\}', Punctuation, '#pop'), |
|
459 ], |
|
460 'action': [ |
|
461 (r'(' + r'|'.join(( # keep host code in largest possible chunks |
|
462 r'[^${}\'"/\\]+', # exclude unsafe characters |
|
463 |
|
464 # strings and comments may safely contain unsafe characters |
|
465 r'"(\\\\|\\"|[^"])*"', # double quote string |
|
466 r"'(\\\\|\\'|[^'])*'", # single quote string |
|
467 r'//.*$\n?', # single line comment |
|
468 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment |
|
469 |
|
470 # regular expression: There's no reason for it to start |
|
471 # with a * and this stops confusion with comments. |
|
472 r'/(?!\*)(\\\\|\\/|[^/])*/', |
|
473 |
|
474 # backslashes are okay, as long as we are not backslashing a % |
|
475 r'\\(?!%)', |
|
476 |
|
477 # Now that we've handled regex and javadoc comments |
|
478 # it's safe to let / through. |
|
479 r'/', |
|
480 )) + r')+', Other), |
|
481 (r'(\\)(%)', bygroups(Punctuation, Other)), |
|
482 (r'(\$[a-zA-Z]+)(\.?)(text|value)?', |
|
483 bygroups(Name.Variable, Punctuation, Name.Property)), |
|
484 (r'\{', Punctuation, '#push'), |
|
485 (r'\}', Punctuation, '#pop'), |
|
486 ], |
|
487 'nested-arg-action': [ |
|
488 (r'(' + r'|'.join(( # keep host code in largest possible chunks. |
|
489 r'[^$\[\]\'"/]+', # exclude unsafe characters |
|
490 |
|
491 # strings and comments may safely contain unsafe characters |
|
492 r'"(\\\\|\\"|[^"])*"', # double quote string |
|
493 r"'(\\\\|\\'|[^'])*'", # single quote string |
|
494 r'//.*$\n?', # single line comment |
|
495 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment |
|
496 |
|
497 # regular expression: There's no reason for it to start |
|
498 # with a * and this stops confusion with comments. |
|
499 r'/(?!\*)(\\\\|\\/|[^/])*/', |
|
500 |
|
501 # Now that we've handled regex and javadoc comments |
|
502 # it's safe to let / through. |
|
503 r'/', |
|
504 )) + r')+', Other), |
|
505 |
|
506 |
|
507 (r'\[', Punctuation, '#push'), |
|
508 (r'\]', Punctuation, '#pop'), |
|
509 (r'(\$[a-zA-Z]+)(\.?)(text|value)?', |
|
510 bygroups(Name.Variable, Punctuation, Name.Property)), |
|
511 (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other), |
|
512 ] |
|
513 } |
|
514 |
|
515 def analyse_text(text): |
|
516 return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M) |
|
517 |
|
518 # http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets |
|
519 |
|
520 # TH: I'm not aware of any language features of C++ that will cause |
|
521 # incorrect lexing of C files. Antlr doesn't appear to make a distinction, |
|
522 # so just assume they're C++. No idea how to make Objective C work in the |
|
523 # future. |
|
524 |
|
525 # class AntlrCLexer(DelegatingLexer): |
|
526 # """ |
|
527 # ANTLR with C Target |
|
528 # |
|
529 # .. versionadded:: 1.1 |
|
530 # """ |
|
531 # |
|
532 # name = 'ANTLR With C Target' |
|
533 # aliases = ['antlr-c'] |
|
534 # filenames = ['*.G', '*.g'] |
|
535 # |
|
536 # def __init__(self, **options): |
|
537 # super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options) |
|
538 # |
|
539 # def analyse_text(text): |
|
540 # return re.match(r'^\s*language\s*=\s*C\s*;', text) |
|
541 |
|
542 |
|
543 class AntlrCppLexer(DelegatingLexer): |
|
544 """ |
|
545 `ANTLR`_ with CPP Target |
|
546 |
|
547 .. versionadded:: 1.1 |
|
548 """ |
|
549 |
|
550 name = 'ANTLR With CPP Target' |
|
551 aliases = ['antlr-cpp'] |
|
552 filenames = ['*.G', '*.g'] |
|
553 |
|
554 def __init__(self, **options): |
|
555 super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options) |
|
556 |
|
557 def analyse_text(text): |
|
558 return AntlrLexer.analyse_text(text) and \ |
|
559 re.search(r'^\s*language\s*=\s*C\s*;', text, re.M) |
|
560 |
|
561 |
|
562 class AntlrObjectiveCLexer(DelegatingLexer): |
|
563 """ |
|
564 `ANTLR`_ with Objective-C Target |
|
565 |
|
566 .. versionadded:: 1.1 |
|
567 """ |
|
568 |
|
569 name = 'ANTLR With ObjectiveC Target' |
|
570 aliases = ['antlr-objc'] |
|
571 filenames = ['*.G', '*.g'] |
|
572 |
|
573 def __init__(self, **options): |
|
574 super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer, |
|
575 AntlrLexer, **options) |
|
576 |
|
577 def analyse_text(text): |
|
578 return AntlrLexer.analyse_text(text) and \ |
|
579 re.search(r'^\s*language\s*=\s*ObjC\s*;', text) |
|
580 |
|
581 |
|
582 class AntlrCSharpLexer(DelegatingLexer): |
|
583 """ |
|
584 `ANTLR`_ with C# Target |
|
585 |
|
586 .. versionadded:: 1.1 |
|
587 """ |
|
588 |
|
589 name = 'ANTLR With C# Target' |
|
590 aliases = ['antlr-csharp', 'antlr-c#'] |
|
591 filenames = ['*.G', '*.g'] |
|
592 |
|
593 def __init__(self, **options): |
|
594 super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer, |
|
595 **options) |
|
596 |
|
597 def analyse_text(text): |
|
598 return AntlrLexer.analyse_text(text) and \ |
|
599 re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M) |
|
600 |
|
601 |
|
602 class AntlrPythonLexer(DelegatingLexer): |
|
603 """ |
|
604 `ANTLR`_ with Python Target |
|
605 |
|
606 .. versionadded:: 1.1 |
|
607 """ |
|
608 |
|
609 name = 'ANTLR With Python Target' |
|
610 aliases = ['antlr-python'] |
|
611 filenames = ['*.G', '*.g'] |
|
612 |
|
613 def __init__(self, **options): |
|
614 super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer, |
|
615 **options) |
|
616 |
|
617 def analyse_text(text): |
|
618 return AntlrLexer.analyse_text(text) and \ |
|
619 re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M) |
|
620 |
|
621 |
|
622 class AntlrJavaLexer(DelegatingLexer): |
|
623 """ |
|
624 `ANTLR`_ with Java Target |
|
625 |
|
626 .. versionadded:: 1. |
|
627 """ |
|
628 |
|
629 name = 'ANTLR With Java Target' |
|
630 aliases = ['antlr-java'] |
|
631 filenames = ['*.G', '*.g'] |
|
632 |
|
633 def __init__(self, **options): |
|
634 super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer, |
|
635 **options) |
|
636 |
|
637 def analyse_text(text): |
|
638 # Antlr language is Java by default |
|
639 return AntlrLexer.analyse_text(text) and 0.9 |
|
640 |
|
641 |
|
642 class AntlrRubyLexer(DelegatingLexer): |
|
643 """ |
|
644 `ANTLR`_ with Ruby Target |
|
645 |
|
646 .. versionadded:: 1.1 |
|
647 """ |
|
648 |
|
649 name = 'ANTLR With Ruby Target' |
|
650 aliases = ['antlr-ruby', 'antlr-rb'] |
|
651 filenames = ['*.G', '*.g'] |
|
652 |
|
653 def __init__(self, **options): |
|
654 super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer, |
|
655 **options) |
|
656 |
|
657 def analyse_text(text): |
|
658 return AntlrLexer.analyse_text(text) and \ |
|
659 re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M) |
|
660 |
|
661 |
|
662 class AntlrPerlLexer(DelegatingLexer): |
|
663 """ |
|
664 `ANTLR`_ with Perl Target |
|
665 |
|
666 .. versionadded:: 1.1 |
|
667 """ |
|
668 |
|
669 name = 'ANTLR With Perl Target' |
|
670 aliases = ['antlr-perl'] |
|
671 filenames = ['*.G', '*.g'] |
|
672 |
|
673 def __init__(self, **options): |
|
674 super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer, |
|
675 **options) |
|
676 |
|
677 def analyse_text(text): |
|
678 return AntlrLexer.analyse_text(text) and \ |
|
679 re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M) |
|
680 |
|
681 |
|
682 class AntlrActionScriptLexer(DelegatingLexer): |
|
683 """ |
|
684 `ANTLR`_ with ActionScript Target |
|
685 |
|
686 .. versionadded:: 1.1 |
|
687 """ |
|
688 |
|
689 name = 'ANTLR With ActionScript Target' |
|
690 aliases = ['antlr-as', 'antlr-actionscript'] |
|
691 filenames = ['*.G', '*.g'] |
|
692 |
|
693 def __init__(self, **options): |
|
694 from pygments.lexers.actionscript import ActionScriptLexer |
|
695 super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer, |
|
696 AntlrLexer, **options) |
|
697 |
|
698 def analyse_text(text): |
|
699 return AntlrLexer.analyse_text(text) and \ |
|
700 re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M) |
|
701 |
|
702 |
|
703 class TreetopBaseLexer(RegexLexer): |
|
704 """ |
|
705 A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars. |
|
706 Not for direct use; use TreetopLexer instead. |
|
707 |
|
708 .. versionadded:: 1.6 |
|
709 """ |
|
710 |
|
711 tokens = { |
|
712 'root': [ |
|
713 include('space'), |
|
714 (r'require[ \t]+[^\n\r]+[\n\r]', Other), |
|
715 (r'module\b', Keyword.Namespace, 'module'), |
|
716 (r'grammar\b', Keyword, 'grammar'), |
|
717 ], |
|
718 'module': [ |
|
719 include('space'), |
|
720 include('end'), |
|
721 (r'module\b', Keyword, '#push'), |
|
722 (r'grammar\b', Keyword, 'grammar'), |
|
723 (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace), |
|
724 ], |
|
725 'grammar': [ |
|
726 include('space'), |
|
727 include('end'), |
|
728 (r'rule\b', Keyword, 'rule'), |
|
729 (r'include\b', Keyword, 'include'), |
|
730 (r'[A-Z]\w*', Name), |
|
731 ], |
|
732 'include': [ |
|
733 include('space'), |
|
734 (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'), |
|
735 ], |
|
736 'rule': [ |
|
737 include('space'), |
|
738 include('end'), |
|
739 (r'"(\\\\|\\"|[^"])*"', String.Double), |
|
740 (r"'(\\\\|\\'|[^'])*'", String.Single), |
|
741 (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)), |
|
742 (r'[A-Za-z_]\w*', Name), |
|
743 (r'[()]', Punctuation), |
|
744 (r'[?+*/&!~]', Operator), |
|
745 (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex), |
|
746 (r'([0-9]*)(\.\.)([0-9]*)', |
|
747 bygroups(Number.Integer, Operator, Number.Integer)), |
|
748 (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)), |
|
749 (r'\{', Punctuation, 'inline_module'), |
|
750 (r'\.', String.Regex), |
|
751 ], |
|
752 'inline_module': [ |
|
753 (r'\{', Other, 'ruby'), |
|
754 (r'\}', Punctuation, '#pop'), |
|
755 (r'[^{}]+', Other), |
|
756 ], |
|
757 'ruby': [ |
|
758 (r'\{', Other, '#push'), |
|
759 (r'\}', Other, '#pop'), |
|
760 (r'[^{}]+', Other), |
|
761 ], |
|
762 'space': [ |
|
763 (r'[ \t\n\r]+', Whitespace), |
|
764 (r'#[^\n]*', Comment.Single), |
|
765 ], |
|
766 'end': [ |
|
767 (r'end\b', Keyword, '#pop'), |
|
768 ], |
|
769 } |
|
770 |
|
771 |
|
772 class TreetopLexer(DelegatingLexer): |
|
773 """ |
|
774 A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars. |
|
775 |
|
776 .. versionadded:: 1.6 |
|
777 """ |
|
778 |
|
779 name = 'Treetop' |
|
780 aliases = ['treetop'] |
|
781 filenames = ['*.treetop', '*.tt'] |
|
782 |
|
783 def __init__(self, **options): |
|
784 super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options) |
|
785 |
|
786 |
|
787 class EbnfLexer(RegexLexer): |
|
788 """ |
|
789 Lexer for `ISO/IEC 14977 EBNF |
|
790 <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_ |
|
791 grammars. |
|
792 |
|
793 .. versionadded:: 2.0 |
|
794 """ |
|
795 |
|
796 name = 'EBNF' |
|
797 aliases = ['ebnf'] |
|
798 filenames = ['*.ebnf'] |
|
799 mimetypes = ['text/x-ebnf'] |
|
800 |
|
801 tokens = { |
|
802 'root': [ |
|
803 include('whitespace'), |
|
804 include('comment_start'), |
|
805 include('identifier'), |
|
806 (r'=', Operator, 'production'), |
|
807 ], |
|
808 'production': [ |
|
809 include('whitespace'), |
|
810 include('comment_start'), |
|
811 include('identifier'), |
|
812 (r'"[^"]*"', String.Double), |
|
813 (r"'[^']*'", String.Single), |
|
814 (r'(\?[^?]*\?)', Name.Entity), |
|
815 (r'[\[\]{}(),|]', Punctuation), |
|
816 (r'-', Operator), |
|
817 (r';', Punctuation, '#pop'), |
|
818 (r'\.', Punctuation, '#pop'), |
|
819 ], |
|
820 'whitespace': [ |
|
821 (r'\s+', Text), |
|
822 ], |
|
823 'comment_start': [ |
|
824 (r'\(\*', Comment.Multiline, 'comment'), |
|
825 ], |
|
826 'comment': [ |
|
827 (r'[^*)]', Comment.Multiline), |
|
828 include('comment_start'), |
|
829 (r'\*\)', Comment.Multiline, '#pop'), |
|
830 (r'[*)]', Comment.Multiline), |
|
831 ], |
|
832 'identifier': [ |
|
833 (r'([a-zA-Z][\w \-]*)', Keyword), |
|
834 ], |
|
835 } |