|
1 # -*- coding: utf-8 -*- |
|
2 """ |
|
3 pygments.lexers.robotframework |
|
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
5 |
|
6 Lexer for Robot Framework. |
|
7 |
|
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. |
|
9 :license: BSD, see LICENSE for details. |
|
10 """ |
|
11 |
|
12 # Copyright 2012 Nokia Siemens Networks Oyj |
|
13 # |
|
14 # Licensed under the Apache License, Version 2.0 (the "License"); |
|
15 # you may not use this file except in compliance with the License. |
|
16 # You may obtain a copy of the License at |
|
17 # |
|
18 # http://www.apache.org/licenses/LICENSE-2.0 |
|
19 # |
|
20 # Unless required by applicable law or agreed to in writing, software |
|
21 # distributed under the License is distributed on an "AS IS" BASIS, |
|
22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
23 # See the License for the specific language governing permissions and |
|
24 # limitations under the License. |
|
25 |
|
26 import re |
|
27 |
|
28 from pygments.lexer import Lexer |
|
29 from pygments.token import Token |
|
30 from pygments.util import text_type |
|
31 |
|
32 __all__ = ['RobotFrameworkLexer'] |
|
33 |
|
34 |
|
35 HEADING = Token.Generic.Heading |
|
36 SETTING = Token.Keyword.Namespace |
|
37 IMPORT = Token.Name.Namespace |
|
38 TC_KW_NAME = Token.Generic.Subheading |
|
39 KEYWORD = Token.Name.Function |
|
40 ARGUMENT = Token.String |
|
41 VARIABLE = Token.Name.Variable |
|
42 COMMENT = Token.Comment |
|
43 SEPARATOR = Token.Punctuation |
|
44 SYNTAX = Token.Punctuation |
|
45 GHERKIN = Token.Generic.Emph |
|
46 ERROR = Token.Error |
|
47 |
|
48 |
|
49 def normalize(string, remove=''): |
|
50 string = string.lower() |
|
51 for char in remove + ' ': |
|
52 if char in string: |
|
53 string = string.replace(char, '') |
|
54 return string |
|
55 |
|
56 |
|
57 class RobotFrameworkLexer(Lexer): |
|
58 """ |
|
59 For `Robot Framework <http://robotframework.org>`_ test data. |
|
60 |
|
61 Supports both space and pipe separated plain text formats. |
|
62 |
|
63 .. versionadded:: 1.6 |
|
64 """ |
|
65 name = 'RobotFramework' |
|
66 aliases = ['robotframework'] |
|
67 filenames = ['*.txt', '*.robot'] |
|
68 mimetypes = ['text/x-robotframework'] |
|
69 |
|
70 def __init__(self, **options): |
|
71 options['tabsize'] = 2 |
|
72 options['encoding'] = 'UTF-8' |
|
73 Lexer.__init__(self, **options) |
|
74 |
|
75 def get_tokens_unprocessed(self, text): |
|
76 row_tokenizer = RowTokenizer() |
|
77 var_tokenizer = VariableTokenizer() |
|
78 index = 0 |
|
79 for row in text.splitlines(): |
|
80 for value, token in row_tokenizer.tokenize(row): |
|
81 for value, token in var_tokenizer.tokenize(value, token): |
|
82 if value: |
|
83 yield index, token, text_type(value) |
|
84 index += len(value) |
|
85 |
|
86 |
|
87 class VariableTokenizer(object): |
|
88 |
|
89 def tokenize(self, string, token): |
|
90 var = VariableSplitter(string, identifiers='$@%') |
|
91 if var.start < 0 or token in (COMMENT, ERROR): |
|
92 yield string, token |
|
93 return |
|
94 for value, token in self._tokenize(var, string, token): |
|
95 if value: |
|
96 yield value, token |
|
97 |
|
98 def _tokenize(self, var, string, orig_token): |
|
99 before = string[:var.start] |
|
100 yield before, orig_token |
|
101 yield var.identifier + '{', SYNTAX |
|
102 for value, token in self.tokenize(var.base, VARIABLE): |
|
103 yield value, token |
|
104 yield '}', SYNTAX |
|
105 if var.index: |
|
106 yield '[', SYNTAX |
|
107 for value, token in self.tokenize(var.index, VARIABLE): |
|
108 yield value, token |
|
109 yield ']', SYNTAX |
|
110 for value, token in self.tokenize(string[var.end:], orig_token): |
|
111 yield value, token |
|
112 |
|
113 |
|
114 class RowTokenizer(object): |
|
115 |
|
116 def __init__(self): |
|
117 self._table = UnknownTable() |
|
118 self._splitter = RowSplitter() |
|
119 testcases = TestCaseTable() |
|
120 settings = SettingTable(testcases.set_default_template) |
|
121 variables = VariableTable() |
|
122 keywords = KeywordTable() |
|
123 self._tables = {'settings': settings, 'setting': settings, |
|
124 'metadata': settings, |
|
125 'variables': variables, 'variable': variables, |
|
126 'testcases': testcases, 'testcase': testcases, |
|
127 'keywords': keywords, 'keyword': keywords, |
|
128 'userkeywords': keywords, 'userkeyword': keywords} |
|
129 |
|
130 def tokenize(self, row): |
|
131 commented = False |
|
132 heading = False |
|
133 for index, value in enumerate(self._splitter.split(row)): |
|
134 # First value, and every second after that, is a separator. |
|
135 index, separator = divmod(index-1, 2) |
|
136 if value.startswith('#'): |
|
137 commented = True |
|
138 elif index == 0 and value.startswith('*'): |
|
139 self._table = self._start_table(value) |
|
140 heading = True |
|
141 for value, token in self._tokenize(value, index, commented, |
|
142 separator, heading): |
|
143 yield value, token |
|
144 self._table.end_row() |
|
145 |
|
146 def _start_table(self, header): |
|
147 name = normalize(header, remove='*') |
|
148 return self._tables.get(name, UnknownTable()) |
|
149 |
|
150 def _tokenize(self, value, index, commented, separator, heading): |
|
151 if commented: |
|
152 yield value, COMMENT |
|
153 elif separator: |
|
154 yield value, SEPARATOR |
|
155 elif heading: |
|
156 yield value, HEADING |
|
157 else: |
|
158 for value, token in self._table.tokenize(value, index): |
|
159 yield value, token |
|
160 |
|
161 |
|
162 class RowSplitter(object): |
|
163 _space_splitter = re.compile('( {2,})') |
|
164 _pipe_splitter = re.compile('((?:^| +)\|(?: +|$))') |
|
165 |
|
166 def split(self, row): |
|
167 splitter = (row.startswith('| ') and self._split_from_pipes |
|
168 or self._split_from_spaces) |
|
169 for value in splitter(row): |
|
170 yield value |
|
171 yield '\n' |
|
172 |
|
173 def _split_from_spaces(self, row): |
|
174 yield '' # Start with (pseudo)separator similarly as with pipes |
|
175 for value in self._space_splitter.split(row): |
|
176 yield value |
|
177 |
|
178 def _split_from_pipes(self, row): |
|
179 _, separator, rest = self._pipe_splitter.split(row, 1) |
|
180 yield separator |
|
181 while self._pipe_splitter.search(rest): |
|
182 cell, separator, rest = self._pipe_splitter.split(rest, 1) |
|
183 yield cell |
|
184 yield separator |
|
185 yield rest |
|
186 |
|
187 |
|
188 class Tokenizer(object): |
|
189 _tokens = None |
|
190 |
|
191 def __init__(self): |
|
192 self._index = 0 |
|
193 |
|
194 def tokenize(self, value): |
|
195 values_and_tokens = self._tokenize(value, self._index) |
|
196 self._index += 1 |
|
197 if isinstance(values_and_tokens, type(Token)): |
|
198 values_and_tokens = [(value, values_and_tokens)] |
|
199 return values_and_tokens |
|
200 |
|
201 def _tokenize(self, value, index): |
|
202 index = min(index, len(self._tokens) - 1) |
|
203 return self._tokens[index] |
|
204 |
|
205 def _is_assign(self, value): |
|
206 if value.endswith('='): |
|
207 value = value[:-1].strip() |
|
208 var = VariableSplitter(value, identifiers='$@') |
|
209 return var.start == 0 and var.end == len(value) |
|
210 |
|
211 |
|
212 class Comment(Tokenizer): |
|
213 _tokens = (COMMENT,) |
|
214 |
|
215 |
|
216 class Setting(Tokenizer): |
|
217 _tokens = (SETTING, ARGUMENT) |
|
218 _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown', |
|
219 'suitepostcondition', 'testsetup', 'testprecondition', |
|
220 'testteardown', 'testpostcondition', 'testtemplate') |
|
221 _import_settings = ('library', 'resource', 'variables') |
|
222 _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags', |
|
223 'testtimeout') |
|
224 _custom_tokenizer = None |
|
225 |
|
226 def __init__(self, template_setter=None): |
|
227 Tokenizer.__init__(self) |
|
228 self._template_setter = template_setter |
|
229 |
|
230 def _tokenize(self, value, index): |
|
231 if index == 1 and self._template_setter: |
|
232 self._template_setter(value) |
|
233 if index == 0: |
|
234 normalized = normalize(value) |
|
235 if normalized in self._keyword_settings: |
|
236 self._custom_tokenizer = KeywordCall(support_assign=False) |
|
237 elif normalized in self._import_settings: |
|
238 self._custom_tokenizer = ImportSetting() |
|
239 elif normalized not in self._other_settings: |
|
240 return ERROR |
|
241 elif self._custom_tokenizer: |
|
242 return self._custom_tokenizer.tokenize(value) |
|
243 return Tokenizer._tokenize(self, value, index) |
|
244 |
|
245 |
|
246 class ImportSetting(Tokenizer): |
|
247 _tokens = (IMPORT, ARGUMENT) |
|
248 |
|
249 |
|
250 class TestCaseSetting(Setting): |
|
251 _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition', |
|
252 'template') |
|
253 _import_settings = () |
|
254 _other_settings = ('documentation', 'tags', 'timeout') |
|
255 |
|
256 def _tokenize(self, value, index): |
|
257 if index == 0: |
|
258 type = Setting._tokenize(self, value[1:-1], index) |
|
259 return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)] |
|
260 return Setting._tokenize(self, value, index) |
|
261 |
|
262 |
|
263 class KeywordSetting(TestCaseSetting): |
|
264 _keyword_settings = ('teardown',) |
|
265 _other_settings = ('documentation', 'arguments', 'return', 'timeout') |
|
266 |
|
267 |
|
268 class Variable(Tokenizer): |
|
269 _tokens = (SYNTAX, ARGUMENT) |
|
270 |
|
271 def _tokenize(self, value, index): |
|
272 if index == 0 and not self._is_assign(value): |
|
273 return ERROR |
|
274 return Tokenizer._tokenize(self, value, index) |
|
275 |
|
276 |
|
277 class KeywordCall(Tokenizer): |
|
278 _tokens = (KEYWORD, ARGUMENT) |
|
279 |
|
280 def __init__(self, support_assign=True): |
|
281 Tokenizer.__init__(self) |
|
282 self._keyword_found = not support_assign |
|
283 self._assigns = 0 |
|
284 |
|
285 def _tokenize(self, value, index): |
|
286 if not self._keyword_found and self._is_assign(value): |
|
287 self._assigns += 1 |
|
288 return SYNTAX # VariableTokenizer tokenizes this later. |
|
289 if self._keyword_found: |
|
290 return Tokenizer._tokenize(self, value, index - self._assigns) |
|
291 self._keyword_found = True |
|
292 return GherkinTokenizer().tokenize(value, KEYWORD) |
|
293 |
|
294 |
|
295 class GherkinTokenizer(object): |
|
296 _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE) |
|
297 |
|
298 def tokenize(self, value, token): |
|
299 match = self._gherkin_prefix.match(value) |
|
300 if not match: |
|
301 return [(value, token)] |
|
302 end = match.end() |
|
303 return [(value[:end], GHERKIN), (value[end:], token)] |
|
304 |
|
305 |
|
306 class TemplatedKeywordCall(Tokenizer): |
|
307 _tokens = (ARGUMENT,) |
|
308 |
|
309 |
|
310 class ForLoop(Tokenizer): |
|
311 |
|
312 def __init__(self): |
|
313 Tokenizer.__init__(self) |
|
314 self._in_arguments = False |
|
315 |
|
316 def _tokenize(self, value, index): |
|
317 token = self._in_arguments and ARGUMENT or SYNTAX |
|
318 if value.upper() in ('IN', 'IN RANGE'): |
|
319 self._in_arguments = True |
|
320 return token |
|
321 |
|
322 |
|
323 class _Table(object): |
|
324 _tokenizer_class = None |
|
325 |
|
326 def __init__(self, prev_tokenizer=None): |
|
327 self._tokenizer = self._tokenizer_class() |
|
328 self._prev_tokenizer = prev_tokenizer |
|
329 self._prev_values_on_row = [] |
|
330 |
|
331 def tokenize(self, value, index): |
|
332 if self._continues(value, index): |
|
333 self._tokenizer = self._prev_tokenizer |
|
334 yield value, SYNTAX |
|
335 else: |
|
336 for value_and_token in self._tokenize(value, index): |
|
337 yield value_and_token |
|
338 self._prev_values_on_row.append(value) |
|
339 |
|
340 def _continues(self, value, index): |
|
341 return value == '...' and all(self._is_empty(t) |
|
342 for t in self._prev_values_on_row) |
|
343 |
|
344 def _is_empty(self, value): |
|
345 return value in ('', '\\') |
|
346 |
|
347 def _tokenize(self, value, index): |
|
348 return self._tokenizer.tokenize(value) |
|
349 |
|
350 def end_row(self): |
|
351 self.__init__(prev_tokenizer=self._tokenizer) |
|
352 |
|
353 |
|
354 class UnknownTable(_Table): |
|
355 _tokenizer_class = Comment |
|
356 |
|
357 def _continues(self, value, index): |
|
358 return False |
|
359 |
|
360 |
|
361 class VariableTable(_Table): |
|
362 _tokenizer_class = Variable |
|
363 |
|
364 |
|
365 class SettingTable(_Table): |
|
366 _tokenizer_class = Setting |
|
367 |
|
368 def __init__(self, template_setter, prev_tokenizer=None): |
|
369 _Table.__init__(self, prev_tokenizer) |
|
370 self._template_setter = template_setter |
|
371 |
|
372 def _tokenize(self, value, index): |
|
373 if index == 0 and normalize(value) == 'testtemplate': |
|
374 self._tokenizer = Setting(self._template_setter) |
|
375 return _Table._tokenize(self, value, index) |
|
376 |
|
377 def end_row(self): |
|
378 self.__init__(self._template_setter, prev_tokenizer=self._tokenizer) |
|
379 |
|
380 |
|
381 class TestCaseTable(_Table): |
|
382 _setting_class = TestCaseSetting |
|
383 _test_template = None |
|
384 _default_template = None |
|
385 |
|
386 @property |
|
387 def _tokenizer_class(self): |
|
388 if self._test_template or (self._default_template and |
|
389 self._test_template is not False): |
|
390 return TemplatedKeywordCall |
|
391 return KeywordCall |
|
392 |
|
393 def _continues(self, value, index): |
|
394 return index > 0 and _Table._continues(self, value, index) |
|
395 |
|
396 def _tokenize(self, value, index): |
|
397 if index == 0: |
|
398 if value: |
|
399 self._test_template = None |
|
400 return GherkinTokenizer().tokenize(value, TC_KW_NAME) |
|
401 if index == 1 and self._is_setting(value): |
|
402 if self._is_template(value): |
|
403 self._test_template = False |
|
404 self._tokenizer = self._setting_class(self.set_test_template) |
|
405 else: |
|
406 self._tokenizer = self._setting_class() |
|
407 if index == 1 and self._is_for_loop(value): |
|
408 self._tokenizer = ForLoop() |
|
409 if index == 1 and self._is_empty(value): |
|
410 return [(value, SYNTAX)] |
|
411 return _Table._tokenize(self, value, index) |
|
412 |
|
413 def _is_setting(self, value): |
|
414 return value.startswith('[') and value.endswith(']') |
|
415 |
|
416 def _is_template(self, value): |
|
417 return normalize(value) == '[template]' |
|
418 |
|
419 def _is_for_loop(self, value): |
|
420 return value.startswith(':') and normalize(value, remove=':') == 'for' |
|
421 |
|
422 def set_test_template(self, template): |
|
423 self._test_template = self._is_template_set(template) |
|
424 |
|
425 def set_default_template(self, template): |
|
426 self._default_template = self._is_template_set(template) |
|
427 |
|
428 def _is_template_set(self, template): |
|
429 return normalize(template) not in ('', '\\', 'none', '${empty}') |
|
430 |
|
431 |
|
432 class KeywordTable(TestCaseTable): |
|
433 _tokenizer_class = KeywordCall |
|
434 _setting_class = KeywordSetting |
|
435 |
|
436 def _is_template(self, value): |
|
437 return False |
|
438 |
|
439 |
|
440 # Following code copied directly from Robot Framework 2.7.5. |
|
441 |
|
442 class VariableSplitter: |
|
443 |
|
444 def __init__(self, string, identifiers): |
|
445 self.identifier = None |
|
446 self.base = None |
|
447 self.index = None |
|
448 self.start = -1 |
|
449 self.end = -1 |
|
450 self._identifiers = identifiers |
|
451 self._may_have_internal_variables = False |
|
452 try: |
|
453 self._split(string) |
|
454 except ValueError: |
|
455 pass |
|
456 else: |
|
457 self._finalize() |
|
458 |
|
459 def get_replaced_base(self, variables): |
|
460 if self._may_have_internal_variables: |
|
461 return variables.replace_string(self.base) |
|
462 return self.base |
|
463 |
|
464 def _finalize(self): |
|
465 self.identifier = self._variable_chars[0] |
|
466 self.base = ''.join(self._variable_chars[2:-1]) |
|
467 self.end = self.start + len(self._variable_chars) |
|
468 if self._has_list_variable_index(): |
|
469 self.index = ''.join(self._list_variable_index_chars[1:-1]) |
|
470 self.end += len(self._list_variable_index_chars) |
|
471 |
|
472 def _has_list_variable_index(self): |
|
473 return self._list_variable_index_chars\ |
|
474 and self._list_variable_index_chars[-1] == ']' |
|
475 |
|
476 def _split(self, string): |
|
477 start_index, max_index = self._find_variable(string) |
|
478 self.start = start_index |
|
479 self._open_curly = 1 |
|
480 self._state = self._variable_state |
|
481 self._variable_chars = [string[start_index], '{'] |
|
482 self._list_variable_index_chars = [] |
|
483 self._string = string |
|
484 start_index += 2 |
|
485 for index, char in enumerate(string[start_index:]): |
|
486 index += start_index # Giving start to enumerate only in Py 2.6+ |
|
487 try: |
|
488 self._state(char, index) |
|
489 except StopIteration: |
|
490 return |
|
491 if index == max_index and not self._scanning_list_variable_index(): |
|
492 return |
|
493 |
|
494 def _scanning_list_variable_index(self): |
|
495 return self._state in [self._waiting_list_variable_index_state, |
|
496 self._list_variable_index_state] |
|
497 |
|
498 def _find_variable(self, string): |
|
499 max_end_index = string.rfind('}') |
|
500 if max_end_index == -1: |
|
501 raise ValueError('No variable end found') |
|
502 if self._is_escaped(string, max_end_index): |
|
503 return self._find_variable(string[:max_end_index]) |
|
504 start_index = self._find_start_index(string, 1, max_end_index) |
|
505 if start_index == -1: |
|
506 raise ValueError('No variable start found') |
|
507 return start_index, max_end_index |
|
508 |
|
509 def _find_start_index(self, string, start, end): |
|
510 index = string.find('{', start, end) - 1 |
|
511 if index < 0: |
|
512 return -1 |
|
513 if self._start_index_is_ok(string, index): |
|
514 return index |
|
515 return self._find_start_index(string, index+2, end) |
|
516 |
|
517 def _start_index_is_ok(self, string, index): |
|
518 return string[index] in self._identifiers\ |
|
519 and not self._is_escaped(string, index) |
|
520 |
|
521 def _is_escaped(self, string, index): |
|
522 escaped = False |
|
523 while index > 0 and string[index-1] == '\\': |
|
524 index -= 1 |
|
525 escaped = not escaped |
|
526 return escaped |
|
527 |
|
528 def _variable_state(self, char, index): |
|
529 self._variable_chars.append(char) |
|
530 if char == '}' and not self._is_escaped(self._string, index): |
|
531 self._open_curly -= 1 |
|
532 if self._open_curly == 0: |
|
533 if not self._is_list_variable(): |
|
534 raise StopIteration |
|
535 self._state = self._waiting_list_variable_index_state |
|
536 elif char in self._identifiers: |
|
537 self._state = self._internal_variable_start_state |
|
538 |
|
539 def _is_list_variable(self): |
|
540 return self._variable_chars[0] == '@' |
|
541 |
|
542 def _internal_variable_start_state(self, char, index): |
|
543 self._state = self._variable_state |
|
544 if char == '{': |
|
545 self._variable_chars.append(char) |
|
546 self._open_curly += 1 |
|
547 self._may_have_internal_variables = True |
|
548 else: |
|
549 self._variable_state(char, index) |
|
550 |
|
551 def _waiting_list_variable_index_state(self, char, index): |
|
552 if char != '[': |
|
553 raise StopIteration |
|
554 self._list_variable_index_chars.append(char) |
|
555 self._state = self._list_variable_index_state |
|
556 |
|
557 def _list_variable_index_state(self, char, index): |
|
558 self._list_variable_index_chars.append(char) |
|
559 if char == ']': |
|
560 raise StopIteration |