ThirdParty/Pygments/pygments/lexers/robotframework.py

changeset 4172
4f20dba37ab6
child 4697
c2e9bf425554
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers.robotframework
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexer for Robot Framework.
7
8 :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 # Copyright 2012 Nokia Siemens Networks Oyj
13 #
14 # Licensed under the Apache License, Version 2.0 (the "License");
15 # you may not use this file except in compliance with the License.
16 # You may obtain a copy of the License at
17 #
18 # http://www.apache.org/licenses/LICENSE-2.0
19 #
20 # Unless required by applicable law or agreed to in writing, software
21 # distributed under the License is distributed on an "AS IS" BASIS,
22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 # See the License for the specific language governing permissions and
24 # limitations under the License.
25
26 import re
27
28 from pygments.lexer import Lexer
29 from pygments.token import Token
30 from pygments.util import text_type
31
32 __all__ = ['RobotFrameworkLexer']
33
34
35 HEADING = Token.Generic.Heading
36 SETTING = Token.Keyword.Namespace
37 IMPORT = Token.Name.Namespace
38 TC_KW_NAME = Token.Generic.Subheading
39 KEYWORD = Token.Name.Function
40 ARGUMENT = Token.String
41 VARIABLE = Token.Name.Variable
42 COMMENT = Token.Comment
43 SEPARATOR = Token.Punctuation
44 SYNTAX = Token.Punctuation
45 GHERKIN = Token.Generic.Emph
46 ERROR = Token.Error
47
48
49 def normalize(string, remove=''):
50 string = string.lower()
51 for char in remove + ' ':
52 if char in string:
53 string = string.replace(char, '')
54 return string
55
56
57 class RobotFrameworkLexer(Lexer):
58 """
59 For `Robot Framework <http://robotframework.org>`_ test data.
60
61 Supports both space and pipe separated plain text formats.
62
63 .. versionadded:: 1.6
64 """
65 name = 'RobotFramework'
66 aliases = ['robotframework']
67 filenames = ['*.txt', '*.robot']
68 mimetypes = ['text/x-robotframework']
69
70 def __init__(self, **options):
71 options['tabsize'] = 2
72 options['encoding'] = 'UTF-8'
73 Lexer.__init__(self, **options)
74
75 def get_tokens_unprocessed(self, text):
76 row_tokenizer = RowTokenizer()
77 var_tokenizer = VariableTokenizer()
78 index = 0
79 for row in text.splitlines():
80 for value, token in row_tokenizer.tokenize(row):
81 for value, token in var_tokenizer.tokenize(value, token):
82 if value:
83 yield index, token, text_type(value)
84 index += len(value)
85
86
87 class VariableTokenizer(object):
88
89 def tokenize(self, string, token):
90 var = VariableSplitter(string, identifiers='$@%')
91 if var.start < 0 or token in (COMMENT, ERROR):
92 yield string, token
93 return
94 for value, token in self._tokenize(var, string, token):
95 if value:
96 yield value, token
97
98 def _tokenize(self, var, string, orig_token):
99 before = string[:var.start]
100 yield before, orig_token
101 yield var.identifier + '{', SYNTAX
102 for value, token in self.tokenize(var.base, VARIABLE):
103 yield value, token
104 yield '}', SYNTAX
105 if var.index:
106 yield '[', SYNTAX
107 for value, token in self.tokenize(var.index, VARIABLE):
108 yield value, token
109 yield ']', SYNTAX
110 for value, token in self.tokenize(string[var.end:], orig_token):
111 yield value, token
112
113
114 class RowTokenizer(object):
115
116 def __init__(self):
117 self._table = UnknownTable()
118 self._splitter = RowSplitter()
119 testcases = TestCaseTable()
120 settings = SettingTable(testcases.set_default_template)
121 variables = VariableTable()
122 keywords = KeywordTable()
123 self._tables = {'settings': settings, 'setting': settings,
124 'metadata': settings,
125 'variables': variables, 'variable': variables,
126 'testcases': testcases, 'testcase': testcases,
127 'keywords': keywords, 'keyword': keywords,
128 'userkeywords': keywords, 'userkeyword': keywords}
129
130 def tokenize(self, row):
131 commented = False
132 heading = False
133 for index, value in enumerate(self._splitter.split(row)):
134 # First value, and every second after that, is a separator.
135 index, separator = divmod(index-1, 2)
136 if value.startswith('#'):
137 commented = True
138 elif index == 0 and value.startswith('*'):
139 self._table = self._start_table(value)
140 heading = True
141 for value, token in self._tokenize(value, index, commented,
142 separator, heading):
143 yield value, token
144 self._table.end_row()
145
146 def _start_table(self, header):
147 name = normalize(header, remove='*')
148 return self._tables.get(name, UnknownTable())
149
150 def _tokenize(self, value, index, commented, separator, heading):
151 if commented:
152 yield value, COMMENT
153 elif separator:
154 yield value, SEPARATOR
155 elif heading:
156 yield value, HEADING
157 else:
158 for value, token in self._table.tokenize(value, index):
159 yield value, token
160
161
162 class RowSplitter(object):
163 _space_splitter = re.compile('( {2,})')
164 _pipe_splitter = re.compile('((?:^| +)\|(?: +|$))')
165
166 def split(self, row):
167 splitter = (row.startswith('| ') and self._split_from_pipes
168 or self._split_from_spaces)
169 for value in splitter(row):
170 yield value
171 yield '\n'
172
173 def _split_from_spaces(self, row):
174 yield '' # Start with (pseudo)separator similarly as with pipes
175 for value in self._space_splitter.split(row):
176 yield value
177
178 def _split_from_pipes(self, row):
179 _, separator, rest = self._pipe_splitter.split(row, 1)
180 yield separator
181 while self._pipe_splitter.search(rest):
182 cell, separator, rest = self._pipe_splitter.split(rest, 1)
183 yield cell
184 yield separator
185 yield rest
186
187
188 class Tokenizer(object):
189 _tokens = None
190
191 def __init__(self):
192 self._index = 0
193
194 def tokenize(self, value):
195 values_and_tokens = self._tokenize(value, self._index)
196 self._index += 1
197 if isinstance(values_and_tokens, type(Token)):
198 values_and_tokens = [(value, values_and_tokens)]
199 return values_and_tokens
200
201 def _tokenize(self, value, index):
202 index = min(index, len(self._tokens) - 1)
203 return self._tokens[index]
204
205 def _is_assign(self, value):
206 if value.endswith('='):
207 value = value[:-1].strip()
208 var = VariableSplitter(value, identifiers='$@')
209 return var.start == 0 and var.end == len(value)
210
211
212 class Comment(Tokenizer):
213 _tokens = (COMMENT,)
214
215
216 class Setting(Tokenizer):
217 _tokens = (SETTING, ARGUMENT)
218 _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
219 'suitepostcondition', 'testsetup', 'testprecondition',
220 'testteardown', 'testpostcondition', 'testtemplate')
221 _import_settings = ('library', 'resource', 'variables')
222 _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
223 'testtimeout')
224 _custom_tokenizer = None
225
226 def __init__(self, template_setter=None):
227 Tokenizer.__init__(self)
228 self._template_setter = template_setter
229
230 def _tokenize(self, value, index):
231 if index == 1 and self._template_setter:
232 self._template_setter(value)
233 if index == 0:
234 normalized = normalize(value)
235 if normalized in self._keyword_settings:
236 self._custom_tokenizer = KeywordCall(support_assign=False)
237 elif normalized in self._import_settings:
238 self._custom_tokenizer = ImportSetting()
239 elif normalized not in self._other_settings:
240 return ERROR
241 elif self._custom_tokenizer:
242 return self._custom_tokenizer.tokenize(value)
243 return Tokenizer._tokenize(self, value, index)
244
245
246 class ImportSetting(Tokenizer):
247 _tokens = (IMPORT, ARGUMENT)
248
249
250 class TestCaseSetting(Setting):
251 _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
252 'template')
253 _import_settings = ()
254 _other_settings = ('documentation', 'tags', 'timeout')
255
256 def _tokenize(self, value, index):
257 if index == 0:
258 type = Setting._tokenize(self, value[1:-1], index)
259 return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
260 return Setting._tokenize(self, value, index)
261
262
263 class KeywordSetting(TestCaseSetting):
264 _keyword_settings = ('teardown',)
265 _other_settings = ('documentation', 'arguments', 'return', 'timeout')
266
267
268 class Variable(Tokenizer):
269 _tokens = (SYNTAX, ARGUMENT)
270
271 def _tokenize(self, value, index):
272 if index == 0 and not self._is_assign(value):
273 return ERROR
274 return Tokenizer._tokenize(self, value, index)
275
276
277 class KeywordCall(Tokenizer):
278 _tokens = (KEYWORD, ARGUMENT)
279
280 def __init__(self, support_assign=True):
281 Tokenizer.__init__(self)
282 self._keyword_found = not support_assign
283 self._assigns = 0
284
285 def _tokenize(self, value, index):
286 if not self._keyword_found and self._is_assign(value):
287 self._assigns += 1
288 return SYNTAX # VariableTokenizer tokenizes this later.
289 if self._keyword_found:
290 return Tokenizer._tokenize(self, value, index - self._assigns)
291 self._keyword_found = True
292 return GherkinTokenizer().tokenize(value, KEYWORD)
293
294
295 class GherkinTokenizer(object):
296 _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
297
298 def tokenize(self, value, token):
299 match = self._gherkin_prefix.match(value)
300 if not match:
301 return [(value, token)]
302 end = match.end()
303 return [(value[:end], GHERKIN), (value[end:], token)]
304
305
306 class TemplatedKeywordCall(Tokenizer):
307 _tokens = (ARGUMENT,)
308
309
310 class ForLoop(Tokenizer):
311
312 def __init__(self):
313 Tokenizer.__init__(self)
314 self._in_arguments = False
315
316 def _tokenize(self, value, index):
317 token = self._in_arguments and ARGUMENT or SYNTAX
318 if value.upper() in ('IN', 'IN RANGE'):
319 self._in_arguments = True
320 return token
321
322
323 class _Table(object):
324 _tokenizer_class = None
325
326 def __init__(self, prev_tokenizer=None):
327 self._tokenizer = self._tokenizer_class()
328 self._prev_tokenizer = prev_tokenizer
329 self._prev_values_on_row = []
330
331 def tokenize(self, value, index):
332 if self._continues(value, index):
333 self._tokenizer = self._prev_tokenizer
334 yield value, SYNTAX
335 else:
336 for value_and_token in self._tokenize(value, index):
337 yield value_and_token
338 self._prev_values_on_row.append(value)
339
340 def _continues(self, value, index):
341 return value == '...' and all(self._is_empty(t)
342 for t in self._prev_values_on_row)
343
344 def _is_empty(self, value):
345 return value in ('', '\\')
346
347 def _tokenize(self, value, index):
348 return self._tokenizer.tokenize(value)
349
350 def end_row(self):
351 self.__init__(prev_tokenizer=self._tokenizer)
352
353
354 class UnknownTable(_Table):
355 _tokenizer_class = Comment
356
357 def _continues(self, value, index):
358 return False
359
360
361 class VariableTable(_Table):
362 _tokenizer_class = Variable
363
364
365 class SettingTable(_Table):
366 _tokenizer_class = Setting
367
368 def __init__(self, template_setter, prev_tokenizer=None):
369 _Table.__init__(self, prev_tokenizer)
370 self._template_setter = template_setter
371
372 def _tokenize(self, value, index):
373 if index == 0 and normalize(value) == 'testtemplate':
374 self._tokenizer = Setting(self._template_setter)
375 return _Table._tokenize(self, value, index)
376
377 def end_row(self):
378 self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
379
380
381 class TestCaseTable(_Table):
382 _setting_class = TestCaseSetting
383 _test_template = None
384 _default_template = None
385
386 @property
387 def _tokenizer_class(self):
388 if self._test_template or (self._default_template and
389 self._test_template is not False):
390 return TemplatedKeywordCall
391 return KeywordCall
392
393 def _continues(self, value, index):
394 return index > 0 and _Table._continues(self, value, index)
395
396 def _tokenize(self, value, index):
397 if index == 0:
398 if value:
399 self._test_template = None
400 return GherkinTokenizer().tokenize(value, TC_KW_NAME)
401 if index == 1 and self._is_setting(value):
402 if self._is_template(value):
403 self._test_template = False
404 self._tokenizer = self._setting_class(self.set_test_template)
405 else:
406 self._tokenizer = self._setting_class()
407 if index == 1 and self._is_for_loop(value):
408 self._tokenizer = ForLoop()
409 if index == 1 and self._is_empty(value):
410 return [(value, SYNTAX)]
411 return _Table._tokenize(self, value, index)
412
413 def _is_setting(self, value):
414 return value.startswith('[') and value.endswith(']')
415
416 def _is_template(self, value):
417 return normalize(value) == '[template]'
418
419 def _is_for_loop(self, value):
420 return value.startswith(':') and normalize(value, remove=':') == 'for'
421
422 def set_test_template(self, template):
423 self._test_template = self._is_template_set(template)
424
425 def set_default_template(self, template):
426 self._default_template = self._is_template_set(template)
427
428 def _is_template_set(self, template):
429 return normalize(template) not in ('', '\\', 'none', '${empty}')
430
431
432 class KeywordTable(TestCaseTable):
433 _tokenizer_class = KeywordCall
434 _setting_class = KeywordSetting
435
436 def _is_template(self, value):
437 return False
438
439
440 # Following code copied directly from Robot Framework 2.7.5.
441
442 class VariableSplitter:
443
444 def __init__(self, string, identifiers):
445 self.identifier = None
446 self.base = None
447 self.index = None
448 self.start = -1
449 self.end = -1
450 self._identifiers = identifiers
451 self._may_have_internal_variables = False
452 try:
453 self._split(string)
454 except ValueError:
455 pass
456 else:
457 self._finalize()
458
459 def get_replaced_base(self, variables):
460 if self._may_have_internal_variables:
461 return variables.replace_string(self.base)
462 return self.base
463
464 def _finalize(self):
465 self.identifier = self._variable_chars[0]
466 self.base = ''.join(self._variable_chars[2:-1])
467 self.end = self.start + len(self._variable_chars)
468 if self._has_list_variable_index():
469 self.index = ''.join(self._list_variable_index_chars[1:-1])
470 self.end += len(self._list_variable_index_chars)
471
472 def _has_list_variable_index(self):
473 return self._list_variable_index_chars\
474 and self._list_variable_index_chars[-1] == ']'
475
476 def _split(self, string):
477 start_index, max_index = self._find_variable(string)
478 self.start = start_index
479 self._open_curly = 1
480 self._state = self._variable_state
481 self._variable_chars = [string[start_index], '{']
482 self._list_variable_index_chars = []
483 self._string = string
484 start_index += 2
485 for index, char in enumerate(string[start_index:]):
486 index += start_index # Giving start to enumerate only in Py 2.6+
487 try:
488 self._state(char, index)
489 except StopIteration:
490 return
491 if index == max_index and not self._scanning_list_variable_index():
492 return
493
494 def _scanning_list_variable_index(self):
495 return self._state in [self._waiting_list_variable_index_state,
496 self._list_variable_index_state]
497
498 def _find_variable(self, string):
499 max_end_index = string.rfind('}')
500 if max_end_index == -1:
501 raise ValueError('No variable end found')
502 if self._is_escaped(string, max_end_index):
503 return self._find_variable(string[:max_end_index])
504 start_index = self._find_start_index(string, 1, max_end_index)
505 if start_index == -1:
506 raise ValueError('No variable start found')
507 return start_index, max_end_index
508
509 def _find_start_index(self, string, start, end):
510 index = string.find('{', start, end) - 1
511 if index < 0:
512 return -1
513 if self._start_index_is_ok(string, index):
514 return index
515 return self._find_start_index(string, index+2, end)
516
517 def _start_index_is_ok(self, string, index):
518 return string[index] in self._identifiers\
519 and not self._is_escaped(string, index)
520
521 def _is_escaped(self, string, index):
522 escaped = False
523 while index > 0 and string[index-1] == '\\':
524 index -= 1
525 escaped = not escaped
526 return escaped
527
528 def _variable_state(self, char, index):
529 self._variable_chars.append(char)
530 if char == '}' and not self._is_escaped(self._string, index):
531 self._open_curly -= 1
532 if self._open_curly == 0:
533 if not self._is_list_variable():
534 raise StopIteration
535 self._state = self._waiting_list_variable_index_state
536 elif char in self._identifiers:
537 self._state = self._internal_variable_start_state
538
539 def _is_list_variable(self):
540 return self._variable_chars[0] == '@'
541
542 def _internal_variable_start_state(self, char, index):
543 self._state = self._variable_state
544 if char == '{':
545 self._variable_chars.append(char)
546 self._open_curly += 1
547 self._may_have_internal_variables = True
548 else:
549 self._variable_state(char, index)
550
551 def _waiting_list_variable_index_state(self, char, index):
552 if char != '[':
553 raise StopIteration
554 self._list_variable_index_chars.append(char)
555 self._state = self._list_variable_index_state
556
557 def _list_variable_index_state(self, char, index):
558 self._list_variable_index_chars.append(char)
559 if char == ']':
560 raise StopIteration

eric ide

mercurial