ThirdParty/Pygments/pygments/lexers/_robotframeworklexer.py

changeset 4172
4f20dba37ab6
parent 4170
8bc578136279
child 4173
10336d4d1488
equal deleted inserted replaced
4170:8bc578136279 4172:4f20dba37ab6
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers._robotframeworklexer
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexer for Robot Framework.
7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 from __future__ import unicode_literals
13
14 # Copyright 2012 Nokia Siemens Networks Oyj
15 #
16 # Licensed under the Apache License, Version 2.0 (the "License");
17 # you may not use this file except in compliance with the License.
18 # You may obtain a copy of the License at
19 #
20 # http://www.apache.org/licenses/LICENSE-2.0
21 #
22 # Unless required by applicable law or agreed to in writing, software
23 # distributed under the License is distributed on an "AS IS" BASIS,
24 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 # See the License for the specific language governing permissions and
26 # limitations under the License.
27
28 import re
29
30 from pygments.lexer import Lexer
31 from pygments.token import Token
32
33
34 HEADING = Token.Generic.Heading
35 SETTING = Token.Keyword.Namespace
36 IMPORT = Token.Name.Namespace
37 TC_KW_NAME = Token.Generic.Subheading
38 KEYWORD = Token.Name.Function
39 ARGUMENT = Token.String
40 VARIABLE = Token.Name.Variable
41 COMMENT = Token.Comment
42 SEPARATOR = Token.Punctuation
43 SYNTAX = Token.Punctuation
44 GHERKIN = Token.Generic.Emph
45 ERROR = Token.Error
46
47
48 def normalize(string, remove=''):
49 string = string.lower()
50 for char in remove + ' ':
51 if char in string:
52 string = string.replace(char, '')
53 return string
54
55
56 class RobotFrameworkLexer(Lexer):
57 """
58 For `Robot Framework <http://robotframework.org>`_ test data.
59
60 Supports both space and pipe separated plain text formats.
61
62 *New in Pygments 1.6.*
63 """
64 name = 'RobotFramework'
65 aliases = ['RobotFramework', 'robotframework']
66 filenames = ['*.txt', '*.robot']
67 mimetypes = ['text/x-robotframework']
68
69 def __init__(self, **options):
70 options['tabsize'] = 2
71 options['encoding'] = 'UTF-8'
72 Lexer.__init__(self, **options)
73
74 def get_tokens_unprocessed(self, text):
75 row_tokenizer = RowTokenizer()
76 var_tokenizer = VariableTokenizer()
77 index = 0
78 for row in text.splitlines():
79 for value, token in row_tokenizer.tokenize(row):
80 for value, token in var_tokenizer.tokenize(value, token):
81 if value:
82 yield index, token, str(value)
83 index += len(value)
84
85
86 class VariableTokenizer(object):
87
88 def tokenize(self, string, token):
89 var = VariableSplitter(string, identifiers='$@%')
90 if var.start < 0 or token in (COMMENT, ERROR):
91 yield string, token
92 return
93 for value, token in self._tokenize(var, string, token):
94 if value:
95 yield value, token
96
97 def _tokenize(self, var, string, orig_token):
98 before = string[:var.start]
99 yield before, orig_token
100 yield var.identifier + '{', SYNTAX
101 for value, token in self.tokenize(var.base, VARIABLE):
102 yield value, token
103 yield '}', SYNTAX
104 if var.index:
105 yield '[', SYNTAX
106 for value, token in self.tokenize(var.index, VARIABLE):
107 yield value, token
108 yield ']', SYNTAX
109 for value, token in self.tokenize(string[var.end:], orig_token):
110 yield value, token
111
112
113 class RowTokenizer(object):
114
115 def __init__(self):
116 self._table = UnknownTable()
117 self._splitter = RowSplitter()
118 testcases = TestCaseTable()
119 settings = SettingTable(testcases.set_default_template)
120 variables = VariableTable()
121 keywords = KeywordTable()
122 self._tables = {'settings': settings, 'setting': settings,
123 'metadata': settings,
124 'variables': variables, 'variable': variables,
125 'testcases': testcases, 'testcase': testcases,
126 'keywords': keywords, 'keyword': keywords,
127 'userkeywords': keywords, 'userkeyword': keywords}
128
129 def tokenize(self, row):
130 commented = False
131 heading = False
132 for index, value in enumerate(self._splitter.split(row)):
133 # First value, and every second after that, is a separator.
134 index, separator = divmod(index-1, 2)
135 if value.startswith('#'):
136 commented = True
137 elif index == 0 and value.startswith('*'):
138 self._table = self._start_table(value)
139 heading = True
140 for value, token in self._tokenize(value, index, commented,
141 separator, heading):
142 yield value, token
143 self._table.end_row()
144
145 def _start_table(self, header):
146 name = normalize(header, remove='*')
147 return self._tables.get(name, UnknownTable())
148
149 def _tokenize(self, value, index, commented, separator, heading):
150 if commented:
151 yield value, COMMENT
152 elif separator:
153 yield value, SEPARATOR
154 elif heading:
155 yield value, HEADING
156 else:
157 for value, token in self._table.tokenize(value, index):
158 yield value, token
159
160
161 class RowSplitter(object):
162 _space_splitter = re.compile('( {2,})')
163 _pipe_splitter = re.compile('((?:^| +)\|(?: +|$))')
164
165 def split(self, row):
166 splitter = (row.startswith('| ') and self._split_from_pipes
167 or self._split_from_spaces)
168 for value in splitter(row.rstrip()):
169 yield value
170 yield '\n'
171
172 def _split_from_spaces(self, row):
173 yield '' # Start with (pseudo)separator similarly as with pipes
174 for value in self._space_splitter.split(row):
175 yield value
176
177 def _split_from_pipes(self, row):
178 _, separator, rest = self._pipe_splitter.split(row, 1)
179 yield separator
180 while self._pipe_splitter.search(rest):
181 cell, separator, rest = self._pipe_splitter.split(rest, 1)
182 yield cell
183 yield separator
184 yield rest
185
186
187 class Tokenizer(object):
188 _tokens = None
189
190 def __init__(self):
191 self._index = 0
192
193 def tokenize(self, value):
194 values_and_tokens = self._tokenize(value, self._index)
195 self._index += 1
196 if isinstance(values_and_tokens, type(Token)):
197 values_and_tokens = [(value, values_and_tokens)]
198 return values_and_tokens
199
200 def _tokenize(self, value, index):
201 index = min(index, len(self._tokens) - 1)
202 return self._tokens[index]
203
204 def _is_assign(self, value):
205 if value.endswith('='):
206 value = value[:-1].strip()
207 var = VariableSplitter(value, identifiers='$@')
208 return var.start == 0 and var.end == len(value)
209
210
211 class Comment(Tokenizer):
212 _tokens = (COMMENT,)
213
214
215 class Setting(Tokenizer):
216 _tokens = (SETTING, ARGUMENT)
217 _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
218 'suitepostcondition', 'testsetup', 'testprecondition',
219 'testteardown', 'testpostcondition', 'testtemplate')
220 _import_settings = ('library', 'resource', 'variables')
221 _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
222 'testtimeout')
223 _custom_tokenizer = None
224
225 def __init__(self, template_setter=None):
226 Tokenizer.__init__(self)
227 self._template_setter = template_setter
228
229 def _tokenize(self, value, index):
230 if index == 1 and self._template_setter:
231 self._template_setter(value)
232 if index == 0:
233 normalized = normalize(value)
234 if normalized in self._keyword_settings:
235 self._custom_tokenizer = KeywordCall(support_assign=False)
236 elif normalized in self._import_settings:
237 self._custom_tokenizer = ImportSetting()
238 elif normalized not in self._other_settings:
239 return ERROR
240 elif self._custom_tokenizer:
241 return self._custom_tokenizer.tokenize(value)
242 return Tokenizer._tokenize(self, value, index)
243
244
245 class ImportSetting(Tokenizer):
246 _tokens = (IMPORT, ARGUMENT)
247
248
249 class TestCaseSetting(Setting):
250 _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
251 'template')
252 _import_settings = ()
253 _other_settings = ('documentation', 'tags', 'timeout')
254
255 def _tokenize(self, value, index):
256 if index == 0:
257 type = Setting._tokenize(self, value[1:-1], index)
258 return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
259 return Setting._tokenize(self, value, index)
260
261
262 class KeywordSetting(TestCaseSetting):
263 _keyword_settings = ('teardown',)
264 _other_settings = ('documentation', 'arguments', 'return', 'timeout')
265
266
267 class Variable(Tokenizer):
268 _tokens = (SYNTAX, ARGUMENT)
269
270 def _tokenize(self, value, index):
271 if index == 0 and not self._is_assign(value):
272 return ERROR
273 return Tokenizer._tokenize(self, value, index)
274
275
276 class KeywordCall(Tokenizer):
277 _tokens = (KEYWORD, ARGUMENT)
278
279 def __init__(self, support_assign=True):
280 Tokenizer.__init__(self)
281 self._keyword_found = not support_assign
282 self._assigns = 0
283
284 def _tokenize(self, value, index):
285 if not self._keyword_found and self._is_assign(value):
286 self._assigns += 1
287 return SYNTAX # VariableTokenizer tokenizes this later.
288 if self._keyword_found:
289 return Tokenizer._tokenize(self, value, index - self._assigns)
290 self._keyword_found = True
291 return GherkinTokenizer().tokenize(value, KEYWORD)
292
293
294 class GherkinTokenizer(object):
295 _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
296
297 def tokenize(self, value, token):
298 match = self._gherkin_prefix.match(value)
299 if not match:
300 return [(value, token)]
301 end = match.end()
302 return [(value[:end], GHERKIN), (value[end:], token)]
303
304
305 class TemplatedKeywordCall(Tokenizer):
306 _tokens = (ARGUMENT,)
307
308
309 class ForLoop(Tokenizer):
310
311 def __init__(self):
312 Tokenizer.__init__(self)
313 self._in_arguments = False
314
315 def _tokenize(self, value, index):
316 token = self._in_arguments and ARGUMENT or SYNTAX
317 if value.upper() in ('IN', 'IN RANGE'):
318 self._in_arguments = True
319 return token
320
321
322 class _Table(object):
323 _tokenizer_class = None
324
325 def __init__(self, prev_tokenizer=None):
326 self._tokenizer = self._tokenizer_class()
327 self._prev_tokenizer = prev_tokenizer
328 self._prev_values_on_row = []
329
330 def tokenize(self, value, index):
331 if self._continues(value, index):
332 self._tokenizer = self._prev_tokenizer
333 yield value, SYNTAX
334 else:
335 for value_and_token in self._tokenize(value, index):
336 yield value_and_token
337 self._prev_values_on_row.append(value)
338
339 def _continues(self, value, index):
340 return value == '...' and all(self._is_empty(t)
341 for t in self._prev_values_on_row)
342
343 def _is_empty(self, value):
344 return value in ('', '\\')
345
346 def _tokenize(self, value, index):
347 return self._tokenizer.tokenize(value)
348
349 def end_row(self):
350 self.__init__(prev_tokenizer=self._tokenizer)
351
352
353 class UnknownTable(_Table):
354 _tokenizer_class = Comment
355
356 def _continues(self, value, index):
357 return False
358
359
360 class VariableTable(_Table):
361 _tokenizer_class = Variable
362
363
364 class SettingTable(_Table):
365 _tokenizer_class = Setting
366
367 def __init__(self, template_setter, prev_tokenizer=None):
368 _Table.__init__(self, prev_tokenizer)
369 self._template_setter = template_setter
370
371 def _tokenize(self, value, index):
372 if index == 0 and normalize(value) == 'testtemplate':
373 self._tokenizer = Setting(self._template_setter)
374 return _Table._tokenize(self, value, index)
375
376 def end_row(self):
377 self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
378
379
380 class TestCaseTable(_Table):
381 _setting_class = TestCaseSetting
382 _test_template = None
383 _default_template = None
384
385 @property
386 def _tokenizer_class(self):
387 if self._test_template or (self._default_template and
388 self._test_template is not False):
389 return TemplatedKeywordCall
390 return KeywordCall
391
392 def _continues(self, value, index):
393 return index > 0 and _Table._continues(self, value, index)
394
395 def _tokenize(self, value, index):
396 if index == 0:
397 if value:
398 self._test_template = None
399 return GherkinTokenizer().tokenize(value, TC_KW_NAME)
400 if index == 1 and self._is_setting(value):
401 if self._is_template(value):
402 self._test_template = False
403 self._tokenizer = self._setting_class(self.set_test_template)
404 else:
405 self._tokenizer = self._setting_class()
406 if index == 1 and self._is_for_loop(value):
407 self._tokenizer = ForLoop()
408 if index == 1 and self._is_empty(value):
409 return [(value, SYNTAX)]
410 return _Table._tokenize(self, value, index)
411
412 def _is_setting(self, value):
413 return value.startswith('[') and value.endswith(']')
414
415 def _is_template(self, value):
416 return normalize(value) == '[template]'
417
418 def _is_for_loop(self, value):
419 return value.startswith(':') and normalize(value, remove=':') == 'for'
420
421 def set_test_template(self, template):
422 self._test_template = self._is_template_set(template)
423
424 def set_default_template(self, template):
425 self._default_template = self._is_template_set(template)
426
427 def _is_template_set(self, template):
428 return normalize(template) not in ('', '\\', 'none', '${empty}')
429
430
431 class KeywordTable(TestCaseTable):
432 _tokenizer_class = KeywordCall
433 _setting_class = KeywordSetting
434
435 def _is_template(self, value):
436 return False
437
438
439 # Following code copied directly from Robot Framework 2.7.5.
440
441 class VariableSplitter:
442
443 def __init__(self, string, identifiers):
444 self.identifier = None
445 self.base = None
446 self.index = None
447 self.start = -1
448 self.end = -1
449 self._identifiers = identifiers
450 self._may_have_internal_variables = False
451 try:
452 self._split(string)
453 except ValueError:
454 pass
455 else:
456 self._finalize()
457
458 def get_replaced_base(self, variables):
459 if self._may_have_internal_variables:
460 return variables.replace_string(self.base)
461 return self.base
462
463 def _finalize(self):
464 self.identifier = self._variable_chars[0]
465 self.base = ''.join(self._variable_chars[2:-1])
466 self.end = self.start + len(self._variable_chars)
467 if self._has_list_variable_index():
468 self.index = ''.join(self._list_variable_index_chars[1:-1])
469 self.end += len(self._list_variable_index_chars)
470
471 def _has_list_variable_index(self):
472 return self._list_variable_index_chars\
473 and self._list_variable_index_chars[-1] == ']'
474
475 def _split(self, string):
476 start_index, max_index = self._find_variable(string)
477 self.start = start_index
478 self._open_curly = 1
479 self._state = self._variable_state
480 self._variable_chars = [string[start_index], '{']
481 self._list_variable_index_chars = []
482 self._string = string
483 start_index += 2
484 for index, char in enumerate(string[start_index:]):
485 index += start_index # Giving start to enumerate only in Py 2.6+
486 try:
487 self._state(char, index)
488 except StopIteration:
489 return
490 if index == max_index and not self._scanning_list_variable_index():
491 return
492
493 def _scanning_list_variable_index(self):
494 return self._state in [self._waiting_list_variable_index_state,
495 self._list_variable_index_state]
496
497 def _find_variable(self, string):
498 max_end_index = string.rfind('}')
499 if max_end_index == -1:
500 raise ValueError('No variable end found')
501 if self._is_escaped(string, max_end_index):
502 return self._find_variable(string[:max_end_index])
503 start_index = self._find_start_index(string, 1, max_end_index)
504 if start_index == -1:
505 raise ValueError('No variable start found')
506 return start_index, max_end_index
507
508 def _find_start_index(self, string, start, end):
509 index = string.find('{', start, end) - 1
510 if index < 0:
511 return -1
512 if self._start_index_is_ok(string, index):
513 return index
514 return self._find_start_index(string, index+2, end)
515
516 def _start_index_is_ok(self, string, index):
517 return string[index] in self._identifiers\
518 and not self._is_escaped(string, index)
519
520 def _is_escaped(self, string, index):
521 escaped = False
522 while index > 0 and string[index-1] == '\\':
523 index -= 1
524 escaped = not escaped
525 return escaped
526
527 def _variable_state(self, char, index):
528 self._variable_chars.append(char)
529 if char == '}' and not self._is_escaped(self._string, index):
530 self._open_curly -= 1
531 if self._open_curly == 0:
532 if not self._is_list_variable():
533 raise StopIteration
534 self._state = self._waiting_list_variable_index_state
535 elif char in self._identifiers:
536 self._state = self._internal_variable_start_state
537
538 def _is_list_variable(self):
539 return self._variable_chars[0] == '@'
540
541 def _internal_variable_start_state(self, char, index):
542 self._state = self._variable_state
543 if char == '{':
544 self._variable_chars.append(char)
545 self._open_curly += 1
546 self._may_have_internal_variables = True
547 else:
548 self._variable_state(char, index)
549
550 def _waiting_list_variable_index_state(self, char, index):
551 if char != '[':
552 raise StopIteration
553 self._list_variable_index_chars.append(char)
554 self._state = self._list_variable_index_state
555
556 def _list_variable_index_state(self, char, index):
557 self._list_variable_index_chars.append(char)
558 if char == ']':
559 raise StopIteration

eric ide

mercurial