ThirdParty/Pygments/pygments/lexers/_robotframeworklexer.py

changeset 2426
da76c71624de
child 2525
8b507a9a2d40
equal deleted inserted replaced
2425:ace8a08028f3 2426:da76c71624de
1 # -*- coding: utf-8 -*-
2 """
3 pygments.lexers._robotframeworklexer
4 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
6 Lexer for Robot Framework.
7
8 :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
9 :license: BSD, see LICENSE for details.
10 """
11
12 # Copyright 2012 Nokia Siemens Networks Oyj
13 #
14 # Licensed under the Apache License, Version 2.0 (the "License");
15 # you may not use this file except in compliance with the License.
16 # You may obtain a copy of the License at
17 #
18 # http://www.apache.org/licenses/LICENSE-2.0
19 #
20 # Unless required by applicable law or agreed to in writing, software
21 # distributed under the License is distributed on an "AS IS" BASIS,
22 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 # See the License for the specific language governing permissions and
24 # limitations under the License.
25
26 import re
27
28 from pygments.lexer import Lexer
29 from pygments.token import Token
30
31
32 HEADING = Token.Generic.Heading
33 SETTING = Token.Keyword.Namespace
34 IMPORT = Token.Name.Namespace
35 TC_KW_NAME = Token.Generic.Subheading
36 KEYWORD = Token.Name.Function
37 ARGUMENT = Token.String
38 VARIABLE = Token.Name.Variable
39 COMMENT = Token.Comment
40 SEPARATOR = Token.Punctuation
41 SYNTAX = Token.Punctuation
42 GHERKIN = Token.Generic.Emph
43 ERROR = Token.Error
44
45
46 def normalize(string, remove=''):
47 string = string.lower()
48 for char in remove + ' ':
49 if char in string:
50 string = string.replace(char, '')
51 return string
52
53
54 class RobotFrameworkLexer(Lexer):
55 """
56 For `Robot Framework <http://robotframework.org>`_ test data.
57
58 Supports both space and pipe separated plain text formats.
59
60 *New in Pygments 1.6.*
61 """
62 name = 'RobotFramework'
63 aliases = ['RobotFramework', 'robotframework']
64 filenames = ['*.txt', '*.robot']
65 mimetypes = ['text/x-robotframework']
66
67 def __init__(self, **options):
68 options['tabsize'] = 2
69 options['encoding'] = 'UTF-8'
70 Lexer.__init__(self, **options)
71
72 def get_tokens_unprocessed(self, text):
73 row_tokenizer = RowTokenizer()
74 var_tokenizer = VariableTokenizer()
75 index = 0
76 for row in text.splitlines():
77 for value, token in row_tokenizer.tokenize(row):
78 for value, token in var_tokenizer.tokenize(value, token):
79 if value:
80 yield index, token, str(value)
81 index += len(value)
82
83
84 class VariableTokenizer(object):
85
86 def tokenize(self, string, token):
87 var = VariableSplitter(string, identifiers='$@%')
88 if var.start < 0 or token in (COMMENT, ERROR):
89 yield string, token
90 return
91 for value, token in self._tokenize(var, string, token):
92 if value:
93 yield value, token
94
95 def _tokenize(self, var, string, orig_token):
96 before = string[:var.start]
97 yield before, orig_token
98 yield var.identifier + '{', SYNTAX
99 for value, token in self.tokenize(var.base, VARIABLE):
100 yield value, token
101 yield '}', SYNTAX
102 if var.index:
103 yield '[', SYNTAX
104 for value, token in self.tokenize(var.index, VARIABLE):
105 yield value, token
106 yield ']', SYNTAX
107 for value, token in self.tokenize(string[var.end:], orig_token):
108 yield value, token
109
110
111 class RowTokenizer(object):
112
113 def __init__(self):
114 self._table = UnknownTable()
115 self._splitter = RowSplitter()
116 testcases = TestCaseTable()
117 settings = SettingTable(testcases.set_default_template)
118 variables = VariableTable()
119 keywords = KeywordTable()
120 self._tables = {'settings': settings, 'setting': settings,
121 'metadata': settings,
122 'variables': variables, 'variable': variables,
123 'testcases': testcases, 'testcase': testcases,
124 'keywords': keywords, 'keyword': keywords,
125 'userkeywords': keywords, 'userkeyword': keywords}
126
127 def tokenize(self, row):
128 commented = False
129 heading = False
130 for index, value in enumerate(self._splitter.split(row)):
131 # First value, and every second after that, is a separator.
132 index, separator = divmod(index-1, 2)
133 if value.startswith('#'):
134 commented = True
135 elif index == 0 and value.startswith('*'):
136 self._table = self._start_table(value)
137 heading = True
138 for value, token in self._tokenize(value, index, commented,
139 separator, heading):
140 yield value, token
141 self._table.end_row()
142
143 def _start_table(self, header):
144 name = normalize(header, remove='*')
145 return self._tables.get(name, UnknownTable())
146
147 def _tokenize(self, value, index, commented, separator, heading):
148 if commented:
149 yield value, COMMENT
150 elif separator:
151 yield value, SEPARATOR
152 elif heading:
153 yield value, HEADING
154 else:
155 for value, token in self._table.tokenize(value, index):
156 yield value, token
157
158
159 class RowSplitter(object):
160 _space_splitter = re.compile('( {2,})')
161 _pipe_splitter = re.compile('((?:^| +)\|(?: +|$))')
162
163 def split(self, row):
164 splitter = (row.startswith('| ') and self._split_from_pipes
165 or self._split_from_spaces)
166 for value in splitter(row.rstrip()):
167 yield value
168 yield '\n'
169
170 def _split_from_spaces(self, row):
171 yield '' # Start with (pseudo)separator similarly as with pipes
172 for value in self._space_splitter.split(row):
173 yield value
174
175 def _split_from_pipes(self, row):
176 _, separator, rest = self._pipe_splitter.split(row, 1)
177 yield separator
178 while self._pipe_splitter.search(rest):
179 cell, separator, rest = self._pipe_splitter.split(rest, 1)
180 yield cell
181 yield separator
182 yield rest
183
184
185 class Tokenizer(object):
186 _tokens = None
187
188 def __init__(self):
189 self._index = 0
190
191 def tokenize(self, value):
192 values_and_tokens = self._tokenize(value, self._index)
193 self._index += 1
194 if isinstance(values_and_tokens, type(Token)):
195 values_and_tokens = [(value, values_and_tokens)]
196 return values_and_tokens
197
198 def _tokenize(self, value, index):
199 index = min(index, len(self._tokens) - 1)
200 return self._tokens[index]
201
202 def _is_assign(self, value):
203 if value.endswith('='):
204 value = value[:-1].strip()
205 var = VariableSplitter(value, identifiers='$@')
206 return var.start == 0 and var.end == len(value)
207
208
209 class Comment(Tokenizer):
210 _tokens = (COMMENT,)
211
212
213 class Setting(Tokenizer):
214 _tokens = (SETTING, ARGUMENT)
215 _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
216 'suitepostcondition', 'testsetup', 'testprecondition',
217 'testteardown', 'testpostcondition', 'testtemplate')
218 _import_settings = ('library', 'resource', 'variables')
219 _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
220 'testtimeout')
221 _custom_tokenizer = None
222
223 def __init__(self, template_setter=None):
224 Tokenizer.__init__(self)
225 self._template_setter = template_setter
226
227 def _tokenize(self, value, index):
228 if index == 1 and self._template_setter:
229 self._template_setter(value)
230 if index == 0:
231 normalized = normalize(value)
232 if normalized in self._keyword_settings:
233 self._custom_tokenizer = KeywordCall(support_assign=False)
234 elif normalized in self._import_settings:
235 self._custom_tokenizer = ImportSetting()
236 elif normalized not in self._other_settings:
237 return ERROR
238 elif self._custom_tokenizer:
239 return self._custom_tokenizer.tokenize(value)
240 return Tokenizer._tokenize(self, value, index)
241
242
243 class ImportSetting(Tokenizer):
244 _tokens = (IMPORT, ARGUMENT)
245
246
247 class TestCaseSetting(Setting):
248 _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
249 'template')
250 _import_settings = ()
251 _other_settings = ('documentation', 'tags', 'timeout')
252
253 def _tokenize(self, value, index):
254 if index == 0:
255 type = Setting._tokenize(self, value[1:-1], index)
256 return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
257 return Setting._tokenize(self, value, index)
258
259
260 class KeywordSetting(TestCaseSetting):
261 _keyword_settings = ('teardown',)
262 _other_settings = ('documentation', 'arguments', 'return', 'timeout')
263
264
265 class Variable(Tokenizer):
266 _tokens = (SYNTAX, ARGUMENT)
267
268 def _tokenize(self, value, index):
269 if index == 0 and not self._is_assign(value):
270 return ERROR
271 return Tokenizer._tokenize(self, value, index)
272
273
274 class KeywordCall(Tokenizer):
275 _tokens = (KEYWORD, ARGUMENT)
276
277 def __init__(self, support_assign=True):
278 Tokenizer.__init__(self)
279 self._keyword_found = not support_assign
280 self._assigns = 0
281
282 def _tokenize(self, value, index):
283 if not self._keyword_found and self._is_assign(value):
284 self._assigns += 1
285 return SYNTAX # VariableTokenizer tokenizes this later.
286 if self._keyword_found:
287 return Tokenizer._tokenize(self, value, index - self._assigns)
288 self._keyword_found = True
289 return GherkinTokenizer().tokenize(value, KEYWORD)
290
291
292 class GherkinTokenizer(object):
293 _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
294
295 def tokenize(self, value, token):
296 match = self._gherkin_prefix.match(value)
297 if not match:
298 return [(value, token)]
299 end = match.end()
300 return [(value[:end], GHERKIN), (value[end:], token)]
301
302
303 class TemplatedKeywordCall(Tokenizer):
304 _tokens = (ARGUMENT,)
305
306
307 class ForLoop(Tokenizer):
308
309 def __init__(self):
310 Tokenizer.__init__(self)
311 self._in_arguments = False
312
313 def _tokenize(self, value, index):
314 token = self._in_arguments and ARGUMENT or SYNTAX
315 if value.upper() in ('IN', 'IN RANGE'):
316 self._in_arguments = True
317 return token
318
319
320 class _Table(object):
321 _tokenizer_class = None
322
323 def __init__(self, prev_tokenizer=None):
324 self._tokenizer = self._tokenizer_class()
325 self._prev_tokenizer = prev_tokenizer
326 self._prev_values_on_row = []
327
328 def tokenize(self, value, index):
329 if self._continues(value, index):
330 self._tokenizer = self._prev_tokenizer
331 yield value, SYNTAX
332 else:
333 for value_and_token in self._tokenize(value, index):
334 yield value_and_token
335 self._prev_values_on_row.append(value)
336
337 def _continues(self, value, index):
338 return value == '...' and all(self._is_empty(t)
339 for t in self._prev_values_on_row)
340
341 def _is_empty(self, value):
342 return value in ('', '\\')
343
344 def _tokenize(self, value, index):
345 return self._tokenizer.tokenize(value)
346
347 def end_row(self):
348 self.__init__(prev_tokenizer=self._tokenizer)
349
350
351 class UnknownTable(_Table):
352 _tokenizer_class = Comment
353
354 def _continues(self, value, index):
355 return False
356
357
358 class VariableTable(_Table):
359 _tokenizer_class = Variable
360
361
362 class SettingTable(_Table):
363 _tokenizer_class = Setting
364
365 def __init__(self, template_setter, prev_tokenizer=None):
366 _Table.__init__(self, prev_tokenizer)
367 self._template_setter = template_setter
368
369 def _tokenize(self, value, index):
370 if index == 0 and normalize(value) == 'testtemplate':
371 self._tokenizer = Setting(self._template_setter)
372 return _Table._tokenize(self, value, index)
373
374 def end_row(self):
375 self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
376
377
378 class TestCaseTable(_Table):
379 _setting_class = TestCaseSetting
380 _test_template = None
381 _default_template = None
382
383 @property
384 def _tokenizer_class(self):
385 if self._test_template or (self._default_template and
386 self._test_template is not False):
387 return TemplatedKeywordCall
388 return KeywordCall
389
390 def _continues(self, value, index):
391 return index > 0 and _Table._continues(self, value, index)
392
393 def _tokenize(self, value, index):
394 if index == 0:
395 if value:
396 self._test_template = None
397 return GherkinTokenizer().tokenize(value, TC_KW_NAME)
398 if index == 1 and self._is_setting(value):
399 if self._is_template(value):
400 self._test_template = False
401 self._tokenizer = self._setting_class(self.set_test_template)
402 else:
403 self._tokenizer = self._setting_class()
404 if index == 1 and self._is_for_loop(value):
405 self._tokenizer = ForLoop()
406 if index == 1 and self._is_empty(value):
407 return [(value, SYNTAX)]
408 return _Table._tokenize(self, value, index)
409
410 def _is_setting(self, value):
411 return value.startswith('[') and value.endswith(']')
412
413 def _is_template(self, value):
414 return normalize(value) == '[template]'
415
416 def _is_for_loop(self, value):
417 return value.startswith(':') and normalize(value, remove=':') == 'for'
418
419 def set_test_template(self, template):
420 self._test_template = self._is_template_set(template)
421
422 def set_default_template(self, template):
423 self._default_template = self._is_template_set(template)
424
425 def _is_template_set(self, template):
426 return normalize(template) not in ('', '\\', 'none', '${empty}')
427
428
429 class KeywordTable(TestCaseTable):
430 _tokenizer_class = KeywordCall
431 _setting_class = KeywordSetting
432
433 def _is_template(self, value):
434 return False
435
436
437 # Following code copied directly from Robot Framework 2.7.5.
438
439 class VariableSplitter:
440
441 def __init__(self, string, identifiers):
442 self.identifier = None
443 self.base = None
444 self.index = None
445 self.start = -1
446 self.end = -1
447 self._identifiers = identifiers
448 self._may_have_internal_variables = False
449 try:
450 self._split(string)
451 except ValueError:
452 pass
453 else:
454 self._finalize()
455
456 def get_replaced_base(self, variables):
457 if self._may_have_internal_variables:
458 return variables.replace_string(self.base)
459 return self.base
460
461 def _finalize(self):
462 self.identifier = self._variable_chars[0]
463 self.base = ''.join(self._variable_chars[2:-1])
464 self.end = self.start + len(self._variable_chars)
465 if self._has_list_variable_index():
466 self.index = ''.join(self._list_variable_index_chars[1:-1])
467 self.end += len(self._list_variable_index_chars)
468
469 def _has_list_variable_index(self):
470 return self._list_variable_index_chars\
471 and self._list_variable_index_chars[-1] == ']'
472
473 def _split(self, string):
474 start_index, max_index = self._find_variable(string)
475 self.start = start_index
476 self._open_curly = 1
477 self._state = self._variable_state
478 self._variable_chars = [string[start_index], '{']
479 self._list_variable_index_chars = []
480 self._string = string
481 start_index += 2
482 for index, char in enumerate(string[start_index:]):
483 index += start_index # Giving start to enumerate only in Py 2.6+
484 try:
485 self._state(char, index)
486 except StopIteration:
487 return
488 if index == max_index and not self._scanning_list_variable_index():
489 return
490
491 def _scanning_list_variable_index(self):
492 return self._state in [self._waiting_list_variable_index_state,
493 self._list_variable_index_state]
494
495 def _find_variable(self, string):
496 max_end_index = string.rfind('}')
497 if max_end_index == -1:
498 raise ValueError('No variable end found')
499 if self._is_escaped(string, max_end_index):
500 return self._find_variable(string[:max_end_index])
501 start_index = self._find_start_index(string, 1, max_end_index)
502 if start_index == -1:
503 raise ValueError('No variable start found')
504 return start_index, max_end_index
505
506 def _find_start_index(self, string, start, end):
507 index = string.find('{', start, end) - 1
508 if index < 0:
509 return -1
510 if self._start_index_is_ok(string, index):
511 return index
512 return self._find_start_index(string, index+2, end)
513
514 def _start_index_is_ok(self, string, index):
515 return string[index] in self._identifiers\
516 and not self._is_escaped(string, index)
517
518 def _is_escaped(self, string, index):
519 escaped = False
520 while index > 0 and string[index-1] == '\\':
521 index -= 1
522 escaped = not escaped
523 return escaped
524
525 def _variable_state(self, char, index):
526 self._variable_chars.append(char)
527 if char == '}' and not self._is_escaped(self._string, index):
528 self._open_curly -= 1
529 if self._open_curly == 0:
530 if not self._is_list_variable():
531 raise StopIteration
532 self._state = self._waiting_list_variable_index_state
533 elif char in self._identifiers:
534 self._state = self._internal_variable_start_state
535
536 def _is_list_variable(self):
537 return self._variable_chars[0] == '@'
538
539 def _internal_variable_start_state(self, char, index):
540 self._state = self._variable_state
541 if char == '{':
542 self._variable_chars.append(char)
543 self._open_curly += 1
544 self._may_have_internal_variables = True
545 else:
546 self._variable_state(char, index)
547
548 def _waiting_list_variable_index_state(self, char, index):
549 if char != '[':
550 raise StopIteration
551 self._list_variable_index_chars.append(char)
552 self._state = self._list_variable_index_state
553
554 def _list_variable_index_state(self, char, index):
555 self._list_variable_index_chars.append(char)
556 if char == ']':
557 raise StopIteration

eric ide

mercurial