1"""
2 pygments.lexers.robotframework
3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexer for Robot Framework.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11# Copyright 2012 Nokia Siemens Networks Oyj
12#
13# Licensed under the Apache License, Version 2.0 (the "License");
14# you may not use this file except in compliance with the License.
15# You may obtain a copy of the License at
16#
17# http://www.apache.org/licenses/LICENSE-2.0
18#
19# Unless required by applicable law or agreed to in writing, software
20# distributed under the License is distributed on an "AS IS" BASIS,
21# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22# See the License for the specific language governing permissions and
23# limitations under the License.
24
25import re
26
27from pygments.lexer import Lexer
28from pygments.token import Token
29
30__all__ = ['RobotFrameworkLexer']
31
32
33HEADING = Token.Generic.Heading
34SETTING = Token.Keyword.Namespace
35IMPORT = Token.Name.Namespace
36TC_KW_NAME = Token.Generic.Subheading
37KEYWORD = Token.Name.Function
38ARGUMENT = Token.String
39VARIABLE = Token.Name.Variable
40COMMENT = Token.Comment
41SEPARATOR = Token.Punctuation
42SYNTAX = Token.Punctuation
43GHERKIN = Token.Generic.Emph
44ERROR = Token.Error
45
46
47def normalize(string, remove=''):
48 string = string.lower()
49 for char in remove + ' ':
50 if char in string:
51 string = string.replace(char, '')
52 return string
53
54
55class RobotFrameworkLexer(Lexer):
56 """
57 For Robot Framework test data.
58
59 Supports both space and pipe separated plain text formats.
60 """
61 name = 'RobotFramework'
62 url = 'http://robotframework.org'
63 aliases = ['robotframework']
64 filenames = ['*.robot', '*.resource']
65 mimetypes = ['text/x-robotframework']
66 version_added = '1.6'
67
68 def __init__(self, **options):
69 options['tabsize'] = 2
70 options['encoding'] = 'UTF-8'
71 Lexer.__init__(self, **options)
72
73 def get_tokens_unprocessed(self, text):
74 row_tokenizer = RowTokenizer()
75 var_tokenizer = VariableTokenizer()
76 index = 0
77 for row in text.splitlines():
78 for value, token in row_tokenizer.tokenize(row):
79 for value, token in var_tokenizer.tokenize(value, token):
80 if value:
81 yield index, token, str(value)
82 index += len(value)
83
84
85class VariableTokenizer:
86
87 def tokenize(self, string, token):
88 var = VariableSplitter(string, identifiers='$@%&')
89 if var.start < 0 or token in (COMMENT, ERROR):
90 yield string, token
91 return
92 for value, token in self._tokenize(var, string, token):
93 if value:
94 yield value, token
95
96 def _tokenize(self, var, string, orig_token):
97 before = string[:var.start]
98 yield before, orig_token
99 yield var.identifier + '{', SYNTAX
100 yield from self.tokenize(var.base, VARIABLE)
101 yield '}', SYNTAX
102 if var.index is not None:
103 yield '[', SYNTAX
104 yield from self.tokenize(var.index, VARIABLE)
105 yield ']', SYNTAX
106 yield from self.tokenize(string[var.end:], orig_token)
107
108
109class RowTokenizer:
110
111 def __init__(self):
112 self._table = UnknownTable()
113 self._splitter = RowSplitter()
114 testcases = TestCaseTable()
115 settings = SettingTable(testcases.set_default_template)
116 variables = VariableTable()
117 keywords = KeywordTable()
118 self._tables = {'settings': settings, 'setting': settings,
119 'metadata': settings,
120 'variables': variables, 'variable': variables,
121 'testcases': testcases, 'testcase': testcases,
122 'tasks': testcases, 'task': testcases,
123 'keywords': keywords, 'keyword': keywords,
124 'userkeywords': keywords, 'userkeyword': keywords}
125
126 def tokenize(self, row):
127 commented = False
128 heading = False
129 for index, value in enumerate(self._splitter.split(row)):
130 # First value, and every second after that, is a separator.
131 index, separator = divmod(index-1, 2)
132 if value.startswith('#'):
133 commented = True
134 elif index == 0 and value.startswith('*'):
135 self._table = self._start_table(value)
136 heading = True
137 yield from self._tokenize(value, index, commented,
138 separator, heading)
139 self._table.end_row()
140
141 def _start_table(self, header):
142 name = normalize(header, remove='*')
143 return self._tables.get(name, UnknownTable())
144
145 def _tokenize(self, value, index, commented, separator, heading):
146 if commented:
147 yield value, COMMENT
148 elif separator:
149 yield value, SEPARATOR
150 elif heading:
151 yield value, HEADING
152 else:
153 yield from self._table.tokenize(value, index)
154
155
156class RowSplitter:
157 _space_splitter = re.compile('( {2,})')
158 _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))')
159
160 def split(self, row):
161 splitter = (row.startswith('| ') and self._split_from_pipes
162 or self._split_from_spaces)
163 yield from splitter(row)
164 yield '\n'
165
166 def _split_from_spaces(self, row):
167 yield '' # Start with (pseudo)separator similarly as with pipes
168 yield from self._space_splitter.split(row)
169
170 def _split_from_pipes(self, row):
171 _, separator, rest = self._pipe_splitter.split(row, 1)
172 yield separator
173 while self._pipe_splitter.search(rest):
174 cell, separator, rest = self._pipe_splitter.split(rest, 1)
175 yield cell
176 yield separator
177 yield rest
178
179
180class Tokenizer:
181 _tokens = None
182
183 def __init__(self):
184 self._index = 0
185
186 def tokenize(self, value):
187 values_and_tokens = self._tokenize(value, self._index)
188 self._index += 1
189 if isinstance(values_and_tokens, type(Token)):
190 values_and_tokens = [(value, values_and_tokens)]
191 return values_and_tokens
192
193 def _tokenize(self, value, index):
194 index = min(index, len(self._tokens) - 1)
195 return self._tokens[index]
196
197 def _is_assign(self, value):
198 if value.endswith('='):
199 value = value[:-1].strip()
200 var = VariableSplitter(value, identifiers='$@&')
201 return var.start == 0 and var.end == len(value)
202
203
204class Comment(Tokenizer):
205 _tokens = (COMMENT,)
206
207
208class Setting(Tokenizer):
209 _tokens = (SETTING, ARGUMENT)
210 _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
211 'suitepostcondition', 'testsetup', 'tasksetup', 'testprecondition',
212 'testteardown','taskteardown', 'testpostcondition', 'testtemplate', 'tasktemplate')
213 _import_settings = ('library', 'resource', 'variables')
214 _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
215 'testtimeout','tasktimeout')
216 _custom_tokenizer = None
217
218 def __init__(self, template_setter=None):
219 Tokenizer.__init__(self)
220 self._template_setter = template_setter
221
222 def _tokenize(self, value, index):
223 if index == 1 and self._template_setter:
224 self._template_setter(value)
225 if index == 0:
226 normalized = normalize(value)
227 if normalized in self._keyword_settings:
228 self._custom_tokenizer = KeywordCall(support_assign=False)
229 elif normalized in self._import_settings:
230 self._custom_tokenizer = ImportSetting()
231 elif normalized not in self._other_settings:
232 return ERROR
233 elif self._custom_tokenizer:
234 return self._custom_tokenizer.tokenize(value)
235 return Tokenizer._tokenize(self, value, index)
236
237
238class ImportSetting(Tokenizer):
239 _tokens = (IMPORT, ARGUMENT)
240
241
242class TestCaseSetting(Setting):
243 _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
244 'template')
245 _import_settings = ()
246 _other_settings = ('documentation', 'tags', 'timeout')
247
248 def _tokenize(self, value, index):
249 if index == 0:
250 type = Setting._tokenize(self, value[1:-1], index)
251 return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
252 return Setting._tokenize(self, value, index)
253
254
255class KeywordSetting(TestCaseSetting):
256 _keyword_settings = ('teardown',)
257 _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags')
258
259
260class Variable(Tokenizer):
261 _tokens = (SYNTAX, ARGUMENT)
262
263 def _tokenize(self, value, index):
264 if index == 0 and not self._is_assign(value):
265 return ERROR
266 return Tokenizer._tokenize(self, value, index)
267
268
269class KeywordCall(Tokenizer):
270 _tokens = (KEYWORD, ARGUMENT)
271
272 def __init__(self, support_assign=True):
273 Tokenizer.__init__(self)
274 self._keyword_found = not support_assign
275 self._assigns = 0
276
277 def _tokenize(self, value, index):
278 if not self._keyword_found and self._is_assign(value):
279 self._assigns += 1
280 return SYNTAX # VariableTokenizer tokenizes this later.
281 if self._keyword_found:
282 return Tokenizer._tokenize(self, value, index - self._assigns)
283 self._keyword_found = True
284 return GherkinTokenizer().tokenize(value, KEYWORD)
285
286
287class GherkinTokenizer:
288 _gherkin_prefix = re.compile('^(Given|When|Then|And|But) ', re.IGNORECASE)
289
290 def tokenize(self, value, token):
291 match = self._gherkin_prefix.match(value)
292 if not match:
293 return [(value, token)]
294 end = match.end()
295 return [(value[:end], GHERKIN), (value[end:], token)]
296
297
298class TemplatedKeywordCall(Tokenizer):
299 _tokens = (ARGUMENT,)
300
301
302class ForLoop(Tokenizer):
303
304 def __init__(self):
305 Tokenizer.__init__(self)
306 self._in_arguments = False
307
308 def _tokenize(self, value, index):
309 token = self._in_arguments and ARGUMENT or SYNTAX
310 if value.upper() in ('IN', 'IN RANGE'):
311 self._in_arguments = True
312 return token
313
314
315class _Table:
316 _tokenizer_class = None
317
318 def __init__(self, prev_tokenizer=None):
319 self._tokenizer = self._tokenizer_class()
320 self._prev_tokenizer = prev_tokenizer
321 self._prev_values_on_row = []
322
323 def tokenize(self, value, index):
324 if self._continues(value, index):
325 self._tokenizer = self._prev_tokenizer
326 yield value, SYNTAX
327 else:
328 yield from self._tokenize(value, index)
329 self._prev_values_on_row.append(value)
330
331 def _continues(self, value, index):
332 return value == '...' and all(self._is_empty(t)
333 for t in self._prev_values_on_row)
334
335 def _is_empty(self, value):
336 return value in ('', '\\')
337
338 def _tokenize(self, value, index):
339 return self._tokenizer.tokenize(value)
340
341 def end_row(self):
342 self.__init__(prev_tokenizer=self._tokenizer)
343
344
345class UnknownTable(_Table):
346 _tokenizer_class = Comment
347
348 def _continues(self, value, index):
349 return False
350
351
352class VariableTable(_Table):
353 _tokenizer_class = Variable
354
355
356class SettingTable(_Table):
357 _tokenizer_class = Setting
358
359 def __init__(self, template_setter, prev_tokenizer=None):
360 _Table.__init__(self, prev_tokenizer)
361 self._template_setter = template_setter
362
363 def _tokenize(self, value, index):
364 if index == 0 and normalize(value) == 'testtemplate':
365 self._tokenizer = Setting(self._template_setter)
366 return _Table._tokenize(self, value, index)
367
368 def end_row(self):
369 self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
370
371
372class TestCaseTable(_Table):
373 _setting_class = TestCaseSetting
374 _test_template = None
375 _default_template = None
376
377 @property
378 def _tokenizer_class(self):
379 if self._test_template or (self._default_template and
380 self._test_template is not False):
381 return TemplatedKeywordCall
382 return KeywordCall
383
384 def _continues(self, value, index):
385 return index > 0 and _Table._continues(self, value, index)
386
387 def _tokenize(self, value, index):
388 if index == 0:
389 if value:
390 self._test_template = None
391 return GherkinTokenizer().tokenize(value, TC_KW_NAME)
392 if index == 1 and self._is_setting(value):
393 if self._is_template(value):
394 self._test_template = False
395 self._tokenizer = self._setting_class(self.set_test_template)
396 else:
397 self._tokenizer = self._setting_class()
398 if index == 1 and self._is_for_loop(value):
399 self._tokenizer = ForLoop()
400 if index == 1 and self._is_empty(value):
401 return [(value, SYNTAX)]
402 return _Table._tokenize(self, value, index)
403
404 def _is_setting(self, value):
405 return value.startswith('[') and value.endswith(']')
406
407 def _is_template(self, value):
408 return normalize(value) == '[template]'
409
410 def _is_for_loop(self, value):
411 return value.startswith(':') and normalize(value, remove=':') == 'for'
412
413 def set_test_template(self, template):
414 self._test_template = self._is_template_set(template)
415
416 def set_default_template(self, template):
417 self._default_template = self._is_template_set(template)
418
419 def _is_template_set(self, template):
420 return normalize(template) not in ('', '\\', 'none', '${empty}')
421
422
423class KeywordTable(TestCaseTable):
424 _tokenizer_class = KeywordCall
425 _setting_class = KeywordSetting
426
427 def _is_template(self, value):
428 return False
429
430
431# Following code copied directly from Robot Framework 2.7.5.
432
433class VariableSplitter:
434
435 def __init__(self, string, identifiers):
436 self.identifier = None
437 self.base = None
438 self.index = None
439 self.start = -1
440 self.end = -1
441 self._identifiers = identifiers
442 self._may_have_internal_variables = False
443 try:
444 self._split(string)
445 except ValueError:
446 pass
447 else:
448 self._finalize()
449
450 def get_replaced_base(self, variables):
451 if self._may_have_internal_variables:
452 return variables.replace_string(self.base)
453 return self.base
454
455 def _finalize(self):
456 self.identifier = self._variable_chars[0]
457 self.base = ''.join(self._variable_chars[2:-1])
458 self.end = self.start + len(self._variable_chars)
459 if self._has_list_or_dict_variable_index():
460 self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1])
461 self.end += len(self._list_and_dict_variable_index_chars)
462
463 def _has_list_or_dict_variable_index(self):
464 return self._list_and_dict_variable_index_chars\
465 and self._list_and_dict_variable_index_chars[-1] == ']'
466
467 def _split(self, string):
468 start_index, max_index = self._find_variable(string)
469 self.start = start_index
470 self._open_curly = 1
471 self._state = self._variable_state
472 self._variable_chars = [string[start_index], '{']
473 self._list_and_dict_variable_index_chars = []
474 self._string = string
475 start_index += 2
476 for index, char in enumerate(string[start_index:]):
477 index += start_index # Giving start to enumerate only in Py 2.6+
478 try:
479 self._state(char, index)
480 except StopIteration:
481 return
482 if index == max_index and not self._scanning_list_variable_index():
483 return
484
485 def _scanning_list_variable_index(self):
486 return self._state in [self._waiting_list_variable_index_state,
487 self._list_variable_index_state]
488
489 def _find_variable(self, string):
490 max_end_index = string.rfind('}')
491 if max_end_index == -1:
492 raise ValueError('No variable end found')
493 if self._is_escaped(string, max_end_index):
494 return self._find_variable(string[:max_end_index])
495 start_index = self._find_start_index(string, 1, max_end_index)
496 if start_index == -1:
497 raise ValueError('No variable start found')
498 return start_index, max_end_index
499
500 def _find_start_index(self, string, start, end):
501 index = string.find('{', start, end) - 1
502 if index < 0:
503 return -1
504 if self._start_index_is_ok(string, index):
505 return index
506 return self._find_start_index(string, index+2, end)
507
508 def _start_index_is_ok(self, string, index):
509 return string[index] in self._identifiers\
510 and not self._is_escaped(string, index)
511
512 def _is_escaped(self, string, index):
513 escaped = False
514 while index > 0 and string[index-1] == '\\':
515 index -= 1
516 escaped = not escaped
517 return escaped
518
519 def _variable_state(self, char, index):
520 self._variable_chars.append(char)
521 if char == '}' and not self._is_escaped(self._string, index):
522 self._open_curly -= 1
523 if self._open_curly == 0:
524 if not self._is_list_or_dict_variable():
525 raise StopIteration
526 self._state = self._waiting_list_variable_index_state
527 elif char in self._identifiers:
528 self._state = self._internal_variable_start_state
529
530 def _is_list_or_dict_variable(self):
531 return self._variable_chars[0] in ('@','&')
532
533 def _internal_variable_start_state(self, char, index):
534 self._state = self._variable_state
535 if char == '{':
536 self._variable_chars.append(char)
537 self._open_curly += 1
538 self._may_have_internal_variables = True
539 else:
540 self._variable_state(char, index)
541
542 def _waiting_list_variable_index_state(self, char, index):
543 if char != '[':
544 raise StopIteration
545 self._list_and_dict_variable_index_chars.append(char)
546 self._state = self._list_variable_index_state
547
548 def _list_variable_index_state(self, char, index):
549 self._list_and_dict_variable_index_chars.append(char)
550 if char == ']':
551 raise StopIteration