1"""
2 pygments.lexers.dsls
3 ~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for various domain-specific languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import ExtendedRegexLexer, RegexLexer, bygroups, words, \
14 include, default, this, using, combined
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Whitespace
17
18__all__ = ['ProtoBufLexer', 'ZeekLexer', 'PuppetLexer', 'RslLexer',
19 'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer',
20 'CrmshLexer', 'ThriftLexer', 'FlatlineLexer', 'SnowballLexer']
21
22
23class ProtoBufLexer(RegexLexer):
24 """
25 Lexer for Protocol Buffer definition files.
26 """
27
28 name = 'Protocol Buffer'
29 url = 'https://developers.google.com/protocol-buffers/'
30 aliases = ['protobuf', 'proto']
31 filenames = ['*.proto']
32 version_added = '1.4'
33
34 tokens = {
35 'root': [
36 (r'[ \t]+', Whitespace),
37 (r'[,;{}\[\]()<>]', Punctuation),
38 (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
39 (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
40 (words((
41 'import', 'option', 'optional', 'required', 'repeated',
42 'reserved', 'default', 'packed', 'ctype', 'extensions', 'to',
43 'max', 'rpc', 'returns', 'oneof', 'syntax'), prefix=r'\b', suffix=r'\b'),
44 Keyword),
45 (words((
46 'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64',
47 'fixed32', 'fixed64', 'sfixed32', 'sfixed64',
48 'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'),
49 Keyword.Type),
50 (r'(true|false)\b', Keyword.Constant),
51 (r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'package'),
52 (r'(message|extend)(\s+)',
53 bygroups(Keyword.Declaration, Whitespace), 'message'),
54 (r'(enum|group|service)(\s+)',
55 bygroups(Keyword.Declaration, Whitespace), 'type'),
56 (r'\".*?\"', String),
57 (r'\'.*?\'', String),
58 (r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float),
59 (r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float),
60 (r'(\-?(inf|nan))\b', Number.Float),
61 (r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex),
62 (r'0[0-7]+[LlUu]*', Number.Oct),
63 (r'\d+[LlUu]*', Number.Integer),
64 (r'[+-=]', Operator),
65 (r'([a-zA-Z_][\w.]*)([ \t]*)(=)',
66 bygroups(Name.Attribute, Whitespace, Operator)),
67 (r'[a-zA-Z_][\w.]*', Name),
68 ],
69 'package': [
70 (r'[a-zA-Z_]\w*', Name.Namespace, '#pop'),
71 default('#pop'),
72 ],
73 'message': [
74 (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
75 default('#pop'),
76 ],
77 'type': [
78 (r'[a-zA-Z_]\w*', Name, '#pop'),
79 default('#pop'),
80 ],
81 }
82
83
84class ThriftLexer(RegexLexer):
85 """
86 For Thrift interface definitions.
87 """
88 name = 'Thrift'
89 url = 'https://thrift.apache.org/'
90 aliases = ['thrift']
91 filenames = ['*.thrift']
92 mimetypes = ['application/x-thrift']
93 version_added = '2.1'
94
95 tokens = {
96 'root': [
97 include('whitespace'),
98 include('comments'),
99 (r'"', String.Double, combined('stringescape', 'dqs')),
100 (r'\'', String.Single, combined('stringescape', 'sqs')),
101 (r'(namespace)(\s+)',
102 bygroups(Keyword.Namespace, Whitespace), 'namespace'),
103 (r'(enum|union|struct|service|exception)(\s+)',
104 bygroups(Keyword.Declaration, Whitespace), 'class'),
105 (r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)' # return arguments
106 r'((?:[^\W\d]|\$)[\w$]*)' # method name
107 r'(\s*)(\()', # signature start
108 bygroups(using(this), Name.Function, Whitespace, Operator)),
109 include('keywords'),
110 include('numbers'),
111 (r'[&=]', Operator),
112 (r'[:;,{}()<>\[\]]', Punctuation),
113 (r'[a-zA-Z_](\.\w|\w)*', Name),
114 ],
115 'whitespace': [
116 (r'\n', Whitespace),
117 (r'\s+', Whitespace),
118 ],
119 'comments': [
120 (r'#.*$', Comment),
121 (r'//.*?\n', Comment),
122 (r'/\*[\w\W]*?\*/', Comment.Multiline),
123 ],
124 'stringescape': [
125 (r'\\([\\nrt"\'])', String.Escape),
126 ],
127 'dqs': [
128 (r'"', String.Double, '#pop'),
129 (r'[^\\"\n]+', String.Double),
130 ],
131 'sqs': [
132 (r"'", String.Single, '#pop'),
133 (r'[^\\\'\n]+', String.Single),
134 ],
135 'namespace': [
136 (r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'),
137 default('#pop'),
138 ],
139 'class': [
140 (r'[a-zA-Z_]\w*', Name.Class, '#pop'),
141 default('#pop'),
142 ],
143 'keywords': [
144 (r'(async|oneway|extends|throws|required|optional)\b', Keyword),
145 (r'(true|false)\b', Keyword.Constant),
146 (r'(const|typedef)\b', Keyword.Declaration),
147 (words((
148 'cpp_namespace', 'cpp_include', 'cpp_type', 'java_package',
149 'cocoa_prefix', 'csharp_namespace', 'delphi_namespace',
150 'php_namespace', 'py_module', 'perl_package',
151 'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix',
152 'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace',
153 'xsd_attrs', 'include'), suffix=r'\b'),
154 Keyword.Namespace),
155 (words((
156 'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double',
157 'string', 'binary', 'map', 'list', 'set', 'slist',
158 'senum'), suffix=r'\b'),
159 Keyword.Type),
160 (words((
161 'BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__',
162 '__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__',
163 'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin',
164 'break', 'case', 'catch', 'class', 'clone', 'continue',
165 'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic',
166 'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare',
167 'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile',
168 'ensure', 'except', 'exec', 'finally', 'float', 'for',
169 'foreach', 'function', 'global', 'goto', 'if', 'implements',
170 'import', 'in', 'inline', 'instanceof', 'interface', 'is',
171 'lambda', 'module', 'native', 'new', 'next', 'nil', 'not',
172 'or', 'pass', 'public', 'print', 'private', 'protected',
173 'raise', 'redo', 'rescue', 'retry', 'register', 'return',
174 'self', 'sizeof', 'static', 'super', 'switch', 'synchronized',
175 'then', 'this', 'throw', 'transient', 'try', 'undef',
176 'unless', 'unsigned', 'until', 'use', 'var', 'virtual',
177 'volatile', 'when', 'while', 'with', 'xor', 'yield'),
178 prefix=r'\b', suffix=r'\b'),
179 Keyword.Reserved),
180 ],
181 'numbers': [
182 (r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)', Number.Float),
183 (r'[+-]?0x[0-9A-Fa-f]+', Number.Hex),
184 (r'[+-]?[0-9]+', Number.Integer),
185 ],
186 }
187
188
189class ZeekLexer(RegexLexer):
190 """
191 For Zeek scripts.
192 """
193 name = 'Zeek'
194 url = 'https://www.zeek.org/'
195 aliases = ['zeek', 'bro']
196 filenames = ['*.zeek', '*.bro']
197 version_added = '2.5'
198
199 _hex = r'[0-9a-fA-F]'
200 _float = r'((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?'
201 _h = r'[A-Za-z0-9][-A-Za-z0-9]*'
202
203 tokens = {
204 'root': [
205 include('whitespace'),
206 include('comments'),
207 include('directives'),
208 include('attributes'),
209 include('types'),
210 include('keywords'),
211 include('literals'),
212 include('operators'),
213 include('punctuation'),
214 (r'((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)(?=\s*\()',
215 Name.Function),
216 include('identifiers'),
217 ],
218
219 'whitespace': [
220 (r'\n', Whitespace),
221 (r'\s+', Whitespace),
222 (r'(\\)(\n)', bygroups(Text, Whitespace)),
223 ],
224
225 'comments': [
226 (r'#.*$', Comment),
227 ],
228
229 'directives': [
230 (r'@(load-plugin|load-sigs|load|unload)\b.*$', Comment.Preproc),
231 (r'@(DEBUG|DIR|FILENAME|deprecated|if|ifdef|ifndef|else|endif)\b', Comment.Preproc),
232 (r'(@prefixes)(\s*)((\+?=).*)$', bygroups(Comment.Preproc,
233 Whitespace, Comment.Preproc)),
234 ],
235
236 'attributes': [
237 (words(('redef', 'priority', 'log', 'optional', 'default', 'add_func',
238 'delete_func', 'expire_func', 'read_expire', 'write_expire',
239 'create_expire', 'synchronized', 'persistent', 'rotate_interval',
240 'rotate_size', 'encrypt', 'raw_output', 'mergeable', 'error_handler',
241 'type_column', 'deprecated'),
242 prefix=r'&', suffix=r'\b'),
243 Keyword.Pseudo),
244 ],
245
246 'types': [
247 (words(('any',
248 'enum', 'record', 'set', 'table', 'vector',
249 'function', 'hook', 'event',
250 'addr', 'bool', 'count', 'double', 'file', 'int', 'interval',
251 'pattern', 'port', 'string', 'subnet', 'time'),
252 suffix=r'\b'),
253 Keyword.Type),
254
255 (r'(opaque)(\s+)(of)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)\b',
256 bygroups(Keyword.Type, Whitespace, Operator.Word, Whitespace, Keyword.Type)),
257
258 (r'(type)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)(\s*)(:)(\s*)\b(record|enum)\b',
259 bygroups(Keyword, Whitespace, Name.Class, Whitespace, Operator, Whitespace, Keyword.Type)),
260
261 (r'(type)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)(\s*)(:)',
262 bygroups(Keyword, Whitespace, Name, Whitespace, Operator)),
263
264 (r'(redef)(\s+)(record|enum)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)\b',
265 bygroups(Keyword, Whitespace, Keyword.Type, Whitespace, Name.Class)),
266 ],
267
268 'keywords': [
269 (words(('redef', 'export', 'if', 'else', 'for', 'while',
270 'return', 'break', 'next', 'continue', 'fallthrough',
271 'switch', 'default', 'case',
272 'add', 'delete',
273 'when', 'timeout', 'schedule'),
274 suffix=r'\b'),
275 Keyword),
276 (r'(print)\b', Keyword),
277 (r'(global|local|const|option)\b', Keyword.Declaration),
278 (r'(module)(\s+)(([A-Za-z_]\w*)(?:::([A-Za-z_]\w*))*)\b',
279 bygroups(Keyword.Namespace, Whitespace, Name.Namespace)),
280 ],
281
282 'literals': [
283 (r'"', String, 'string'),
284
285 # Not the greatest match for patterns, but generally helps
286 # disambiguate between start of a pattern and just a division
287 # operator.
288 (r'/(?=.*/)', String.Regex, 'regex'),
289
290 (r'(T|F)\b', Keyword.Constant),
291
292 # Port
293 (r'\d{1,5}/(udp|tcp|icmp|unknown)\b', Number),
294
295 # IPv4 Address
296 (r'(\d{1,3}.){3}(\d{1,3})\b', Number),
297
298 # IPv6 Address
299 (r'\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((\d{1,3}.){3}(\d{1,3}))?\]', Number),
300
301 # Numeric
302 (r'0[xX]' + _hex + r'+\b', Number.Hex),
303 (_float + r'\s*(day|hr|min|sec|msec|usec)s?\b', Number.Float),
304 (_float + r'\b', Number.Float),
305 (r'(\d+)\b', Number.Integer),
306
307 # Hostnames
308 (_h + r'(\.' + _h + r')+', String),
309 ],
310
311 'operators': [
312 (r'[!%*/+<=>~|&^-]', Operator),
313 (r'([-+=&|]{2}|[+=!><-]=)', Operator),
314 (r'(in|as|is|of)\b', Operator.Word),
315 (r'\??\$', Operator),
316 ],
317
318 'punctuation': [
319 (r'[{}()\[\],;.]', Punctuation),
320 # The "ternary if", which uses '?' and ':', could instead be
321 # treated as an Operator, but colons are more frequently used to
322 # separate field/identifier names from their types, so the (often)
323 # less-prominent Punctuation is used even with '?' for consistency.
324 (r'[?:]', Punctuation),
325 ],
326
327 'identifiers': [
328 (r'([a-zA-Z_]\w*)(::)', bygroups(Name, Punctuation)),
329 (r'[a-zA-Z_]\w*', Name)
330 ],
331
332 'string': [
333 (r'\\.', String.Escape),
334 (r'%-?[0-9]*(\.[0-9]+)?[DTd-gsx]', String.Escape),
335 (r'"', String, '#pop'),
336 (r'.', String),
337 ],
338
339 'regex': [
340 (r'\\.', String.Escape),
341 (r'/', String.Regex, '#pop'),
342 (r'.', String.Regex),
343 ],
344 }
345
346
347BroLexer = ZeekLexer
348
349
350class PuppetLexer(RegexLexer):
351 """
352 For Puppet configuration DSL.
353 """
354 name = 'Puppet'
355 url = 'https://puppet.com/'
356 aliases = ['puppet']
357 filenames = ['*.pp']
358 version_added = '1.6'
359
360 tokens = {
361 'root': [
362 include('comments'),
363 include('keywords'),
364 include('names'),
365 include('numbers'),
366 include('operators'),
367 include('strings'),
368
369 (r'[]{}:(),;[]', Punctuation),
370 (r'\s+', Whitespace),
371 ],
372
373 'comments': [
374 (r'(\s*)(#.*)$', bygroups(Whitespace, Comment)),
375 (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
376 ],
377
378 'operators': [
379 (r'(=>|\?|<|>|=|\+|-|/|\*|~|!|\|)', Operator),
380 (r'(in|and|or|not)\b', Operator.Word),
381 ],
382
383 'names': [
384 (r'[a-zA-Z_]\w*', Name.Attribute),
385 (r'(\$\S+)(\[)(\S+)(\])', bygroups(Name.Variable, Punctuation,
386 String, Punctuation)),
387 (r'\$\S+', Name.Variable),
388 ],
389
390 'numbers': [
391 # Copypasta from the Python lexer
392 (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
393 (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
394 (r'0[0-7]+j?', Number.Oct),
395 (r'0[xX][a-fA-F0-9]+', Number.Hex),
396 (r'\d+L', Number.Integer.Long),
397 (r'\d+j?', Number.Integer)
398 ],
399
400 'keywords': [
401 # Left out 'group' and 'require'
402 # Since they're often used as attributes
403 (words((
404 'absent', 'alert', 'alias', 'audit', 'augeas', 'before', 'case',
405 'check', 'class', 'computer', 'configured', 'contained',
406 'create_resources', 'crit', 'cron', 'debug', 'default',
407 'define', 'defined', 'directory', 'else', 'elsif', 'emerg',
408 'err', 'exec', 'extlookup', 'fail', 'false', 'file',
409 'filebucket', 'fqdn_rand', 'generate', 'host', 'if', 'import',
410 'include', 'info', 'inherits', 'inline_template', 'installed',
411 'interface', 'k5login', 'latest', 'link', 'loglevel',
412 'macauthorization', 'mailalias', 'maillist', 'mcx', 'md5',
413 'mount', 'mounted', 'nagios_command', 'nagios_contact',
414 'nagios_contactgroup', 'nagios_host', 'nagios_hostdependency',
415 'nagios_hostescalation', 'nagios_hostextinfo', 'nagios_hostgroup',
416 'nagios_service', 'nagios_servicedependency', 'nagios_serviceescalation',
417 'nagios_serviceextinfo', 'nagios_servicegroup', 'nagios_timeperiod',
418 'node', 'noop', 'notice', 'notify', 'package', 'present', 'purged',
419 'realize', 'regsubst', 'resources', 'role', 'router', 'running',
420 'schedule', 'scheduled_task', 'search', 'selboolean', 'selmodule',
421 'service', 'sha1', 'shellquote', 'split', 'sprintf',
422 'ssh_authorized_key', 'sshkey', 'stage', 'stopped', 'subscribe',
423 'tag', 'tagged', 'template', 'tidy', 'true', 'undef', 'unmounted',
424 'user', 'versioncmp', 'vlan', 'warning', 'yumrepo', 'zfs', 'zone',
425 'zpool'), prefix='(?i)', suffix=r'\b'),
426 Keyword),
427 ],
428
429 'strings': [
430 (r'"([^"])*"', String),
431 (r"'(\\'|[^'])*'", String),
432 ],
433
434 }
435
436
437class RslLexer(RegexLexer):
438 """
439 RSL is the formal specification
440 language used in RAISE (Rigorous Approach to Industrial Software Engineering)
441 method.
442 """
443 name = 'RSL'
444 url = 'http://en.wikipedia.org/wiki/RAISE'
445 aliases = ['rsl']
446 filenames = ['*.rsl']
447 mimetypes = ['text/rsl']
448 version_added = '2.0'
449
450 flags = re.MULTILINE | re.DOTALL
451
452 tokens = {
453 'root': [
454 (words((
455 'Bool', 'Char', 'Int', 'Nat', 'Real', 'Text', 'Unit', 'abs',
456 'all', 'always', 'any', 'as', 'axiom', 'card', 'case', 'channel',
457 'chaos', 'class', 'devt_relation', 'dom', 'elems', 'else', 'elif',
458 'end', 'exists', 'extend', 'false', 'for', 'hd', 'hide', 'if',
459 'in', 'is', 'inds', 'initialise', 'int', 'inter', 'isin', 'len',
460 'let', 'local', 'ltl_assertion', 'object', 'of', 'out', 'post',
461 'pre', 'read', 'real', 'rng', 'scheme', 'skip', 'stop', 'swap',
462 'then', 'theory', 'test_case', 'tl', 'transition_system', 'true',
463 'type', 'union', 'until', 'use', 'value', 'variable', 'while',
464 'with', 'write', '~isin', '-inflist', '-infset', '-list',
465 '-set'), prefix=r'\b', suffix=r'\b'),
466 Keyword),
467 (r'(variable|value)\b', Keyword.Declaration),
468 (r'--.*?\n', Comment),
469 (r'<:.*?:>', Comment),
470 (r'\{!.*?!\}', Comment),
471 (r'/\*.*?\*/', Comment),
472 (r'^([ \t]*)([\w]+)([ \t]*)(:[^:])', bygroups(Whitespace,
473 Name.Function, Whitespace, Name.Function)),
474 (r'(^[ \t]*)([\w]+)([ \t]*)(\([\w\s,]*\))([ \t]*)(is|as)',
475 bygroups(Whitespace, Name.Function, Whitespace, Text,
476 Whitespace, Keyword)),
477 (r'\b[A-Z]\w*\b', Keyword.Type),
478 (r'(true|false)\b', Keyword.Constant),
479 (r'".*"', String),
480 (r'\'.\'', String.Char),
481 (r'(><|->|-m->|/\\|<=|<<=|<\.|\|\||\|\^\||-~->|-~m->|\\/|>=|>>|'
482 r'\.>|\+\+|-\\|<->|=>|:-|~=|\*\*|<<|>>=|\+>|!!|\|=\||#)',
483 Operator),
484 (r'[0-9]+\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
485 (r'0x[0-9a-f]+', Number.Hex),
486 (r'[0-9]+', Number.Integer),
487 (r'\s+', Whitespace),
488 (r'.', Text),
489 ],
490 }
491
492 def analyse_text(text):
493 """
494 Check for the most common text in the beginning of a RSL file.
495 """
496 if re.search(r'scheme\s*.*?=\s*class\s*type', text, re.I) is not None:
497 return 1.0
498
499
500class MscgenLexer(RegexLexer):
501 """
502 For Mscgen files.
503 """
504 name = 'Mscgen'
505 url = 'http://www.mcternan.me.uk/mscgen/'
506 aliases = ['mscgen', 'msc']
507 filenames = ['*.msc']
508 version_added = '1.6'
509
510 _var = r'(\w+|"(?:\\"|[^"])*")'
511
512 tokens = {
513 'root': [
514 (r'msc\b', Keyword.Type),
515 # Options
516 (r'(hscale|HSCALE|width|WIDTH|wordwraparcs|WORDWRAPARCS'
517 r'|arcgradient|ARCGRADIENT)\b', Name.Property),
518 # Operators
519 (r'(abox|ABOX|rbox|RBOX|box|BOX|note|NOTE)\b', Operator.Word),
520 (r'(\.|-|\|){3}', Keyword),
521 (r'(?:-|=|\.|:){2}'
522 r'|<<=>>|<->|<=>|<<>>|<:>'
523 r'|->|=>>|>>|=>|:>|-x|-X'
524 r'|<-|<<=|<<|<=|<:|x-|X-|=', Operator),
525 # Names
526 (r'\*', Name.Builtin),
527 (_var, Name.Variable),
528 # Other
529 (r'\[', Punctuation, 'attrs'),
530 (r'\{|\}|,|;', Punctuation),
531 include('comments')
532 ],
533 'attrs': [
534 (r'\]', Punctuation, '#pop'),
535 (_var + r'(\s*)(=)(\s*)' + _var,
536 bygroups(Name.Attribute, Whitespace, Operator, Whitespace,
537 String)),
538 (r',', Punctuation),
539 include('comments')
540 ],
541 'comments': [
542 (r'(?://|#).*?\n', Comment.Single),
543 (r'/\*(?:.|\n)*?\*/', Comment.Multiline),
544 (r'[ \t\r\n]+', Whitespace)
545 ]
546 }
547
548
549class VGLLexer(RegexLexer):
550 """
551 For SampleManager VGL source code.
552 """
553 name = 'VGL'
554 url = 'http://www.thermoscientific.com/samplemanager'
555 aliases = ['vgl']
556 filenames = ['*.rpf']
557 version_added = '1.6'
558
559 flags = re.MULTILINE | re.DOTALL | re.IGNORECASE
560
561 tokens = {
562 'root': [
563 (r'\{[^}]*\}', Comment.Multiline),
564 (r'declare', Keyword.Constant),
565 (r'(if|then|else|endif|while|do|endwhile|and|or|prompt|object'
566 r'|create|on|line|with|global|routine|value|endroutine|constant'
567 r'|global|set|join|library|compile_option|file|exists|create|copy'
568 r'|delete|enable|windows|name|notprotected)(?! *[=<>.,()])',
569 Keyword),
570 (r'(true|false|null|empty|error|locked)', Keyword.Constant),
571 (r'[~^*#!%&\[\]()<>|+=:;,./?-]', Operator),
572 (r'"[^"]*"', String),
573 (r'(\.)([a-z_$][\w$]*)', bygroups(Operator, Name.Attribute)),
574 (r'[0-9][0-9]*(\.[0-9]+(e[+\-]?[0-9]+)?)?', Number),
575 (r'[a-z_$][\w$]*', Name),
576 (r'[\r\n]+', Whitespace),
577 (r'\s+', Whitespace)
578 ]
579 }
580
581
582class AlloyLexer(RegexLexer):
583 """
584 For Alloy source code.
585 """
586
587 name = 'Alloy'
588 url = 'http://alloy.mit.edu'
589 aliases = ['alloy']
590 filenames = ['*.als']
591 mimetypes = ['text/x-alloy']
592 version_added = '2.0'
593
594 flags = re.MULTILINE | re.DOTALL
595
596 iden_rex = r'[a-zA-Z_][\w]*"*'
597 string_rex = r'"\b(\\\\|\\[^\\]|[^"\\])*"'
598 text_tuple = (r'[^\S\n]+', Whitespace)
599
600 tokens = {
601 'sig': [
602 (r'(extends)\b', Keyword, '#pop'),
603 (iden_rex, Name),
604 text_tuple,
605 (r',', Punctuation),
606 (r'\{', Operator, '#pop'),
607 ],
608 'module': [
609 text_tuple,
610 (iden_rex, Name, '#pop'),
611 ],
612 'fun': [
613 text_tuple,
614 (r'\{', Operator, '#pop'),
615 (iden_rex, Name, '#pop'),
616 ],
617 'fact': [
618 include('fun'),
619 (string_rex, String, '#pop'),
620 ],
621 'root': [
622 (r'--.*?$', Comment.Single),
623 (r'//.*?$', Comment.Single),
624 (r'/\*.*?\*/', Comment.Multiline),
625 text_tuple,
626 (r'(module|open)(\s+)', bygroups(Keyword.Namespace, Whitespace),
627 'module'),
628 (r'(sig|enum)(\s+)', bygroups(Keyword.Declaration, Whitespace), 'sig'),
629 (r'(iden|univ|none)\b', Keyword.Constant),
630 (r'(int|Int)\b', Keyword.Type),
631 (r'(var|this|abstract|extends|set|seq|one|lone|let)\b', Keyword),
632 (r'(all|some|no|sum|disj|when|else)\b', Keyword),
633 (r'(run|check|for|but|exactly|expect|as|steps)\b', Keyword),
634 (r'(always|after|eventually|until|release)\b', Keyword), # future time operators
635 (r'(historically|before|once|since|triggered)\b', Keyword), # past time operators
636 (r'(and|or|implies|iff|in)\b', Operator.Word),
637 (r'(fun|pred|assert)(\s+)', bygroups(Keyword, Whitespace), 'fun'),
638 (r'(fact)(\s+)', bygroups(Keyword, Whitespace), 'fact'),
639 (r'!|#|&&|\+\+|<<|>>|>=|<=>|<=|\.\.|\.|->', Operator),
640 (r'[-+/*%=<>&!^|~{}\[\]().\';]', Operator),
641 (iden_rex, Name),
642 (r'[:,]', Punctuation),
643 (r'[0-9]+', Number.Integer),
644 (string_rex, String),
645 (r'\n', Whitespace),
646 ]
647 }
648
649
650class PanLexer(RegexLexer):
651 """
652 Lexer for pan source files.
653
654 Based on tcsh lexer.
655 """
656
657 name = 'Pan'
658 url = 'https://github.com/quattor/pan/'
659 aliases = ['pan']
660 filenames = ['*.pan']
661 version_added = '2.0'
662
663 tokens = {
664 'root': [
665 include('basic'),
666 (r'\(', Keyword, 'paren'),
667 (r'\{', Keyword, 'curly'),
668 include('data'),
669 ],
670 'basic': [
671 (words((
672 'if', 'for', 'with', 'else', 'type', 'bind', 'while', 'valid', 'final',
673 'prefix', 'unique', 'object', 'foreach', 'include', 'template',
674 'function', 'variable', 'structure', 'extensible', 'declaration'),
675 prefix=r'\b', suffix=r'\b'),
676 Keyword),
677 (words((
678 'file_contents', 'format', 'index', 'length', 'match', 'matches',
679 'replace', 'splice', 'split', 'substr', 'to_lowercase', 'to_uppercase',
680 'debug', 'error', 'traceback', 'deprecated', 'base64_decode',
681 'base64_encode', 'digest', 'escape', 'unescape', 'append', 'create',
682 'first', 'nlist', 'key', 'list', 'merge', 'next', 'prepend', 'is_boolean',
683 'is_defined', 'is_double', 'is_list', 'is_long', 'is_nlist', 'is_null',
684 'is_number', 'is_property', 'is_resource', 'is_string', 'to_boolean',
685 'to_double', 'to_long', 'to_string', 'clone', 'delete', 'exists',
686 'path_exists', 'if_exists', 'return', 'value'),
687 prefix=r'\b', suffix=r'\b'),
688 Name.Builtin),
689 (r'#.*', Comment),
690 (r'\\[\w\W]', String.Escape),
691 (r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Whitespace, Operator)),
692 (r'[\[\]{}()=]+', Operator),
693 (r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String),
694 (r';', Punctuation),
695 ],
696 'data': [
697 (r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double),
698 (r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single),
699 (r'\s+', Whitespace),
700 (r'[^=\s\[\]{}()$"\'`\\;#]+', Text),
701 (r'\d+(?= |\Z)', Number),
702 ],
703 'curly': [
704 (r'\}', Keyword, '#pop'),
705 (r':-', Keyword),
706 (r'\w+', Name.Variable),
707 (r'[^}:"\'`$]+', Punctuation),
708 (r':', Punctuation),
709 include('root'),
710 ],
711 'paren': [
712 (r'\)', Keyword, '#pop'),
713 include('root'),
714 ],
715 }
716
717
718class CrmshLexer(RegexLexer):
719 """
720 Lexer for crmsh configuration files for Pacemaker clusters.
721 """
722 name = 'Crmsh'
723 url = 'http://crmsh.github.io/'
724 aliases = ['crmsh', 'pcmk']
725 filenames = ['*.crmsh', '*.pcmk']
726 mimetypes = []
727 version_added = '2.1'
728
729 elem = words((
730 'node', 'primitive', 'group', 'clone', 'ms', 'location',
731 'colocation', 'order', 'fencing_topology', 'rsc_ticket',
732 'rsc_template', 'property', 'rsc_defaults',
733 'op_defaults', 'acl_target', 'acl_group', 'user', 'role',
734 'tag'), suffix=r'(?![\w#$-])')
735 sub = words((
736 'params', 'meta', 'operations', 'op', 'rule',
737 'attributes', 'utilization'), suffix=r'(?![\w#$-])')
738 acl = words(('read', 'write', 'deny'), suffix=r'(?![\w#$-])')
739 bin_rel = words(('and', 'or'), suffix=r'(?![\w#$-])')
740 un_ops = words(('defined', 'not_defined'), suffix=r'(?![\w#$-])')
741 date_exp = words(('in_range', 'date', 'spec', 'in'), suffix=r'(?![\w#$-])')
742 acl_mod = (r'(?:tag|ref|reference|attribute|type|xpath)')
743 bin_ops = (r'(?:lt|gt|lte|gte|eq|ne)')
744 val_qual = (r'(?:string|version|number)')
745 rsc_role_action = (r'(?:Master|Started|Slave|Stopped|'
746 r'start|promote|demote|stop)')
747
748 tokens = {
749 'root': [
750 (r'^(#.*)(\n)?', bygroups(Comment, Whitespace)),
751 # attr=value (nvpair)
752 (r'([\w#$-]+)(=)("(?:""|[^"])*"|\S+)',
753 bygroups(Name.Attribute, Punctuation, String)),
754 # need this construct, otherwise numeric node ids
755 # are matched as scores
756 # elem id:
757 (r'(node)(\s+)([\w#$-]+)(:)',
758 bygroups(Keyword, Whitespace, Name, Punctuation)),
759 # scores
760 (r'([+-]?([0-9]+|inf)):', Number),
761 # keywords (elements and other)
762 (elem, Keyword),
763 (sub, Keyword),
764 (acl, Keyword),
765 # binary operators
766 (rf'(?:{val_qual}:)?({bin_ops})(?![\w#$-])', Operator.Word),
767 # other operators
768 (bin_rel, Operator.Word),
769 (un_ops, Operator.Word),
770 (date_exp, Operator.Word),
771 # builtin attributes (e.g. #uname)
772 (r'#[a-z]+(?![\w#$-])', Name.Builtin),
773 # acl_mod:blah
774 (rf'({acl_mod})(:)("(?:""|[^"])*"|\S+)',
775 bygroups(Keyword, Punctuation, Name)),
776 # rsc_id[:(role|action)]
777 # NB: this matches all other identifiers
778 (rf'([\w#$-]+)(?:(:)({rsc_role_action}))?(?![\w#$-])',
779 bygroups(Name, Punctuation, Operator.Word)),
780 # punctuation
781 (r'(\\(?=\n)|[\[\](){}/:@])', Punctuation),
782 (r'\s+|\n', Whitespace),
783 ],
784 }
785
786
787class FlatlineLexer(RegexLexer):
788 """
789 Lexer for Flatline expressions.
790 """
791 name = 'Flatline'
792 url = 'https://github.com/bigmlcom/flatline'
793 aliases = ['flatline']
794 filenames = []
795 mimetypes = ['text/x-flatline']
796 version_added = '2.2'
797
798 special_forms = ('let',)
799
800 builtins = (
801 "!=", "*", "+", "-", "<", "<=", "=", ">", ">=", "abs", "acos", "all",
802 "all-but", "all-with-defaults", "all-with-numeric-default", "and",
803 "asin", "atan", "avg", "avg-window", "bin-center", "bin-count", "call",
804 "category-count", "ceil", "cond", "cond-window", "cons", "cos", "cosh",
805 "count", "diff-window", "div", "ensure-value", "ensure-weighted-value",
806 "epoch", "epoch-day", "epoch-fields", "epoch-hour", "epoch-millisecond",
807 "epoch-minute", "epoch-month", "epoch-second", "epoch-weekday",
808 "epoch-year", "exp", "f", "field", "field-prop", "fields", "filter",
809 "first", "floor", "head", "if", "in", "integer", "language", "length",
810 "levenshtein", "linear-regression", "list", "ln", "log", "log10", "map",
811 "matches", "matches?", "max", "maximum", "md5", "mean", "median", "min",
812 "minimum", "missing", "missing-count", "missing?", "missing_count",
813 "mod", "mode", "normalize", "not", "nth", "occurrences", "or",
814 "percentile", "percentile-label", "population", "population-fraction",
815 "pow", "preferred", "preferred?", "quantile-label", "rand", "rand-int",
816 "random-value", "re-quote", "real", "replace", "replace-first", "rest",
817 "round", "row-number", "segment-label", "sha1", "sha256", "sin", "sinh",
818 "sqrt", "square", "standard-deviation", "standard_deviation", "str",
819 "subs", "sum", "sum-squares", "sum-window", "sum_squares", "summary",
820 "summary-no", "summary-str", "tail", "tan", "tanh", "to-degrees",
821 "to-radians", "variance", "vectorize", "weighted-random-value", "window",
822 "winnow", "within-percentiles?", "z-score",
823 )
824
825 valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+'
826
827 tokens = {
828 'root': [
829 # whitespaces - usually not relevant
830 (r'[,]+', Text),
831 (r'\s+', Whitespace),
832
833 # numbers
834 (r'-?\d+\.\d+', Number.Float),
835 (r'-?\d+', Number.Integer),
836 (r'0x-?[a-f\d]+', Number.Hex),
837
838 # strings, symbols and characters
839 (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
840 (r"\\(.|[a-z]+)", String.Char),
841
842 # expression template placeholder
843 (r'_', String.Symbol),
844
845 # highlight the special forms
846 (words(special_forms, suffix=' '), Keyword),
847
848 # highlight the builtins
849 (words(builtins, suffix=' '), Name.Builtin),
850
851 # the remaining functions
852 (r'(?<=\()' + valid_name, Name.Function),
853
854 # find the remaining variables
855 (valid_name, Name.Variable),
856
857 # parentheses
858 (r'(\(|\))', Punctuation),
859 ],
860 }
861
862
863class SnowballLexer(ExtendedRegexLexer):
864 """
865 Lexer for Snowball source code.
866 """
867
868 name = 'Snowball'
869 url = 'https://snowballstem.org/'
870 aliases = ['snowball']
871 filenames = ['*.sbl']
872 version_added = '2.2'
873
874 _ws = r'\n\r\t '
875
876 def __init__(self, **options):
877 self._reset_stringescapes()
878 ExtendedRegexLexer.__init__(self, **options)
879
880 def _reset_stringescapes(self):
881 self._start = "'"
882 self._end = "'"
883
884 def _string(do_string_first):
885 def callback(lexer, match, ctx):
886 s = match.start()
887 text = match.group()
888 string = re.compile(rf'([^{re.escape(lexer._start)}]*)(.)').match
889 escape = re.compile(rf'([^{re.escape(lexer._end)}]*)(.)').match
890 pos = 0
891 do_string = do_string_first
892 while pos < len(text):
893 if do_string:
894 match = string(text, pos)
895 yield s + match.start(1), String.Single, match.group(1)
896 if match.group(2) == "'":
897 yield s + match.start(2), String.Single, match.group(2)
898 ctx.stack.pop()
899 break
900 yield s + match.start(2), String.Escape, match.group(2)
901 pos = match.end()
902 match = escape(text, pos)
903 yield s + match.start(), String.Escape, match.group()
904 if match.group(2) != lexer._end:
905 ctx.stack[-1] = 'escape'
906 break
907 pos = match.end()
908 do_string = True
909 ctx.pos = s + match.end()
910 return callback
911
912 def _stringescapes(lexer, match, ctx):
913 lexer._start = match.group(3)
914 lexer._end = match.group(5)
915 return bygroups(Keyword.Reserved, Whitespace, String.Escape, Whitespace,
916 String.Escape)(lexer, match, ctx)
917
918 tokens = {
919 'root': [
920 (r'len\b', Name.Builtin),
921 (r'lenof\b', Operator.Word),
922 include('root1'),
923 ],
924 'root1': [
925 (rf'[{_ws}]+', Whitespace),
926 (r'\d+', Number.Integer),
927 (r"'", String.Single, 'string'),
928 (r'[()]', Punctuation),
929 (r'/\*[\w\W]*?\*/', Comment.Multiline),
930 (r'//.*', Comment.Single),
931 (r'[!*+\-/<=>]=|[-=]>|<[+-]|[$*+\-/<=>?\[\]]', Operator),
932 (words(('as', 'get', 'hex', 'among', 'define', 'decimal',
933 'backwardmode'), suffix=r'\b'),
934 Keyword.Reserved),
935 (words(('strings', 'booleans', 'integers', 'routines', 'externals',
936 'groupings'), suffix=r'\b'),
937 Keyword.Reserved, 'declaration'),
938 (words(('do', 'or', 'and', 'for', 'hop', 'non', 'not', 'set', 'try',
939 'fail', 'goto', 'loop', 'next', 'test', 'true',
940 'false', 'unset', 'atmark', 'attach', 'delete', 'gopast',
941 'insert', 'repeat', 'sizeof', 'tomark', 'atleast',
942 'atlimit', 'reverse', 'setmark', 'tolimit', 'setlimit',
943 'backwards', 'substring'), suffix=r'\b'),
944 Operator.Word),
945 (words(('size', 'limit', 'cursor', 'maxint', 'minint'),
946 suffix=r'\b'),
947 Name.Builtin),
948 (rf'(stringdef\b)([{_ws}]*)([^{_ws}]+)',
949 bygroups(Keyword.Reserved, Whitespace, String.Escape)),
950 (rf'(stringescapes\b)([{_ws}]*)(.)([{_ws}]*)(.)',
951 _stringescapes),
952 (r'[A-Za-z]\w*', Name),
953 ],
954 'declaration': [
955 (r'\)', Punctuation, '#pop'),
956 (words(('len', 'lenof'), suffix=r'\b'), Name,
957 ('root1', 'declaration')),
958 include('root1'),
959 ],
960 'string': [
961 (r"[^']*'", _string(True)),
962 ],
963 'escape': [
964 (r"[^']*'", _string(False)),
965 ],
966 }
967
968 def get_tokens_unprocessed(self, text=None, context=None):
969 self._reset_stringescapes()
970 return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context)