1"""
2 pygments.lexers.parsers
3 ~~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for parser generators.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import RegexLexer, DelegatingLexer, \
14 include, bygroups, using
15from pygments.token import Punctuation, Other, Text, Comment, Operator, \
16 Keyword, Name, String, Number, Whitespace
17from pygments.lexers.jvm import JavaLexer
18from pygments.lexers.c_cpp import CLexer, CppLexer
19from pygments.lexers.objective import ObjectiveCLexer
20from pygments.lexers.d import DLexer
21from pygments.lexers.dotnet import CSharpLexer
22from pygments.lexers.ruby import RubyLexer
23from pygments.lexers.python import PythonLexer
24from pygments.lexers.perl import PerlLexer
25
26__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
27 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
28 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
29 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
30 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
31 'AntlrJavaLexer', 'AntlrActionScriptLexer',
32 'TreetopLexer', 'EbnfLexer']
33
34
35class RagelLexer(RegexLexer):
36 """A pure `Ragel <www.colm.net/open-source/ragel>`_ lexer. Use this
37 for fragments of Ragel. For ``.rl`` files, use
38 :class:`RagelEmbeddedLexer` instead (or one of the
39 language-specific subclasses).
40
41 """
42
43 name = 'Ragel'
44 url = 'http://www.colm.net/open-source/ragel/'
45 aliases = ['ragel']
46 filenames = []
47 version_added = '1.1'
48
49 tokens = {
50 'whitespace': [
51 (r'\s+', Whitespace)
52 ],
53 'comments': [
54 (r'\#.*$', Comment),
55 ],
56 'keywords': [
57 (r'(access|action|alphtype)\b', Keyword),
58 (r'(getkey|write|machine|include)\b', Keyword),
59 (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
60 (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
61 ],
62 'numbers': [
63 (r'0x[0-9A-Fa-f]+', Number.Hex),
64 (r'[+-]?[0-9]+', Number.Integer),
65 ],
66 'literals': [
67 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
68 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
69 (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
70 (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
71 ],
72 'identifiers': [
73 (r'[a-zA-Z_]\w*', Name.Variable),
74 ],
75 'operators': [
76 (r',', Operator), # Join
77 (r'\||&|--?', Operator), # Union, Intersection and Subtraction
78 (r'\.|<:|:>>?', Operator), # Concatention
79 (r':', Operator), # Label
80 (r'->', Operator), # Epsilon Transition
81 (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
82 (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
83 (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
84 (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
85 (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
86 (r'>|@|\$|%', Operator), # Transition Actions and Priorities
87 (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
88 (r'!|\^', Operator), # Negation
89 (r'\(|\)', Operator), # Grouping
90 ],
91 'root': [
92 include('literals'),
93 include('whitespace'),
94 include('comments'),
95 include('keywords'),
96 include('numbers'),
97 include('identifiers'),
98 include('operators'),
99 (r'\{', Punctuation, 'host'),
100 (r'=', Operator),
101 (r';', Punctuation),
102 ],
103 'host': [
104 (r'(' + r'|'.join(( # keep host code in largest possible chunks
105 r'[^{}\'"/#]+', # exclude unsafe characters
106 r'[^\\]\\[{}]', # allow escaped { or }
107
108 # strings and comments may safely contain unsafe characters
109 r'"(\\\\|\\[^\\]|[^"\\])*"',
110 r"'(\\\\|\\[^\\]|[^'\\])*'",
111 r'//.*$\n?', # single line comment
112 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
113 r'\#.*$\n?', # ruby comment
114
115 # regular expression: There's no reason for it to start
116 # with a * and this stops confusion with comments.
117 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
118
119 # / is safe now that we've handled regex and javadoc comments
120 r'/',
121 )) + r')+', Other),
122
123 (r'\{', Punctuation, '#push'),
124 (r'\}', Punctuation, '#pop'),
125 ],
126 }
127
128
129class RagelEmbeddedLexer(RegexLexer):
130 """
131 A lexer for Ragel embedded in a host language file.
132
133 This will only highlight Ragel statements. If you want host language
134 highlighting then call the language-specific Ragel lexer.
135 """
136
137 name = 'Embedded Ragel'
138 aliases = ['ragel-em']
139 filenames = ['*.rl']
140 url = 'http://www.colm.net/open-source/ragel/'
141 version_added = '1.1'
142
143 tokens = {
144 'root': [
145 (r'(' + r'|'.join(( # keep host code in largest possible chunks
146 r'[^%\'"/#]+', # exclude unsafe characters
147 r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
148
149 # strings and comments may safely contain unsafe characters
150 r'"(\\\\|\\[^\\]|[^"\\])*"',
151 r"'(\\\\|\\[^\\]|[^'\\])*'",
152 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
153 r'//.*$\n?', # single line comment
154 r'\#.*$\n?', # ruby/ragel comment
155 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
156
157 # / is safe now that we've handled regex and javadoc comments
158 r'/',
159 )) + r')+', Other),
160
161 # Single Line FSM.
162 # Please don't put a quoted newline in a single line FSM.
163 # That's just mean. It will break this.
164 (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
165 using(RagelLexer),
166 Punctuation, Text)),
167
168 # Multi Line FSM.
169 (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
170 ],
171 'multi-line-fsm': [
172 (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
173 r'(' + r'|'.join((
174 r'[^}\'"\[/#]', # exclude unsafe characters
175 r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
176 r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
177 r'[^\\]\\[{}]', # ...and } is okay if it's escaped
178
179 # allow / if it's preceded with one of these symbols
180 # (ragel EOF actions)
181 r'(>|\$|%|<|@|<>)/',
182
183 # specifically allow regex followed immediately by *
184 # so it doesn't get mistaken for a comment
185 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
186
187 # allow / as long as it's not followed by another / or by a *
188 r'/(?=[^/*]|$)',
189
190 # We want to match as many of these as we can in one block.
191 # Not sure if we need the + sign here,
192 # does it help performance?
193 )) + r')+',
194
195 # strings and comments may safely contain unsafe characters
196 r'"(\\\\|\\[^\\]|[^"\\])*"',
197 r"'(\\\\|\\[^\\]|[^'\\])*'",
198 r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
199 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
200 r'//.*$\n?', # single line comment
201 r'\#.*$\n?', # ruby/ragel comment
202 )) + r')+', using(RagelLexer)),
203
204 (r'\}%%', Punctuation, '#pop'),
205 ]
206 }
207
208 def analyse_text(text):
209 return '@LANG: indep' in text
210
211
212class RagelRubyLexer(DelegatingLexer):
213 """
214 A lexer for Ragel in a Ruby host file.
215 """
216
217 name = 'Ragel in Ruby Host'
218 aliases = ['ragel-ruby', 'ragel-rb']
219 filenames = ['*.rl']
220 url = 'http://www.colm.net/open-source/ragel/'
221 version_added = '1.1'
222
223 def __init__(self, **options):
224 super().__init__(RubyLexer, RagelEmbeddedLexer, **options)
225
226 def analyse_text(text):
227 return '@LANG: ruby' in text
228
229
230class RagelCLexer(DelegatingLexer):
231 """
232 A lexer for Ragel in a C host file.
233 """
234
235 name = 'Ragel in C Host'
236 aliases = ['ragel-c']
237 filenames = ['*.rl']
238 url = 'http://www.colm.net/open-source/ragel/'
239 version_added = '1.1'
240
241 def __init__(self, **options):
242 super().__init__(CLexer, RagelEmbeddedLexer, **options)
243
244 def analyse_text(text):
245 return '@LANG: c' in text
246
247
248class RagelDLexer(DelegatingLexer):
249 """
250 A lexer for Ragel in a D host file.
251 """
252
253 name = 'Ragel in D Host'
254 aliases = ['ragel-d']
255 filenames = ['*.rl']
256 url = 'http://www.colm.net/open-source/ragel/'
257 version_added = '1.1'
258
259 def __init__(self, **options):
260 super().__init__(DLexer, RagelEmbeddedLexer, **options)
261
262 def analyse_text(text):
263 return '@LANG: d' in text
264
265
266class RagelCppLexer(DelegatingLexer):
267 """
268 A lexer for Ragel in a C++ host file.
269 """
270
271 name = 'Ragel in CPP Host'
272 aliases = ['ragel-cpp']
273 filenames = ['*.rl']
274 url = 'http://www.colm.net/open-source/ragel/'
275 version_added = '1.1'
276
277 def __init__(self, **options):
278 super().__init__(CppLexer, RagelEmbeddedLexer, **options)
279
280 def analyse_text(text):
281 return '@LANG: c++' in text
282
283
284class RagelObjectiveCLexer(DelegatingLexer):
285 """
286 A lexer for Ragel in an Objective C host file.
287 """
288
289 name = 'Ragel in Objective C Host'
290 aliases = ['ragel-objc']
291 filenames = ['*.rl']
292 url = 'http://www.colm.net/open-source/ragel/'
293 version_added = '1.1'
294
295 def __init__(self, **options):
296 super().__init__(ObjectiveCLexer, RagelEmbeddedLexer, **options)
297
298 def analyse_text(text):
299 return '@LANG: objc' in text
300
301
302class RagelJavaLexer(DelegatingLexer):
303 """
304 A lexer for Ragel in a Java host file.
305 """
306
307 name = 'Ragel in Java Host'
308 aliases = ['ragel-java']
309 filenames = ['*.rl']
310 url = 'http://www.colm.net/open-source/ragel/'
311 version_added = '1.1'
312
313 def __init__(self, **options):
314 super().__init__(JavaLexer, RagelEmbeddedLexer, **options)
315
316 def analyse_text(text):
317 return '@LANG: java' in text
318
319
320class AntlrLexer(RegexLexer):
321 """
322 Generic ANTLR Lexer.
323 Should not be called directly, instead
324 use DelegatingLexer for your target language.
325 """
326
327 name = 'ANTLR'
328 aliases = ['antlr']
329 filenames = []
330 url = 'https://www.antlr.org'
331 version_added = '1.1'
332
333 _id = r'[A-Za-z]\w*'
334 _TOKEN_REF = r'[A-Z]\w*'
335 _RULE_REF = r'[a-z]\w*'
336 _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
337 _INT = r'[0-9]+'
338
339 tokens = {
340 'whitespace': [
341 (r'\s+', Whitespace),
342 ],
343 'comments': [
344 (r'//.*$', Comment),
345 (r'/\*(.|\n)*?\*/', Comment),
346 ],
347 'root': [
348 include('whitespace'),
349 include('comments'),
350
351 (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
352 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
353 Punctuation)),
354 # optionsSpec
355 (r'options\b', Keyword, 'options'),
356 # tokensSpec
357 (r'tokens\b', Keyword, 'tokens'),
358 # attrScope
359 (r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
360 bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
361 Punctuation), 'action'),
362 # exception
363 (r'(catch|finally)\b', Keyword, 'exception'),
364 # action
365 (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
366 bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
367 Name.Label, Whitespace, Punctuation), 'action'),
368 # rule
369 (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
370 bygroups(Keyword, Whitespace, Name.Label, Punctuation),
371 ('rule-alts', 'rule-prelims')),
372 ],
373 'exception': [
374 (r'\n', Whitespace, '#pop'),
375 (r'\s', Whitespace),
376 include('comments'),
377
378 (r'\[', Punctuation, 'nested-arg-action'),
379 (r'\{', Punctuation, 'action'),
380 ],
381 'rule-prelims': [
382 include('whitespace'),
383 include('comments'),
384
385 (r'returns\b', Keyword),
386 (r'\[', Punctuation, 'nested-arg-action'),
387 (r'\{', Punctuation, 'action'),
388 # throwsSpec
389 (r'(throws)(\s+)(' + _id + ')',
390 bygroups(Keyword, Whitespace, Name.Label)),
391 (r'(,)(\s*)(' + _id + ')',
392 bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
393 # optionsSpec
394 (r'options\b', Keyword, 'options'),
395 # ruleScopeSpec - scope followed by target language code or name of action
396 # TODO finish implementing other possibilities for scope
397 # L173 ANTLRv3.g from ANTLR book
398 (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
399 'action'),
400 (r'(scope)(\s+)(' + _id + r')(\s*)(;)',
401 bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
402 # ruleAction
403 (r'(@' + _id + r')(\s*)(\{)',
404 bygroups(Name.Label, Whitespace, Punctuation), 'action'),
405 # finished prelims, go to rule alts!
406 (r':', Punctuation, '#pop')
407 ],
408 'rule-alts': [
409 include('whitespace'),
410 include('comments'),
411
412 # These might need to go in a separate 'block' state triggered by (
413 (r'options\b', Keyword, 'options'),
414 (r':', Punctuation),
415
416 # literals
417 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
418 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
419 (r'<<([^>]|>[^>])>>', String),
420 # identifiers
421 # Tokens start with capital letter.
422 (r'\$?[A-Z_]\w*', Name.Constant),
423 # Rules start with small letter.
424 (r'\$?[a-z_]\w*', Name.Variable),
425 # operators
426 (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
427 (r',', Punctuation),
428 (r'\[', Punctuation, 'nested-arg-action'),
429 (r'\{', Punctuation, 'action'),
430 (r';', Punctuation, '#pop')
431 ],
432 'tokens': [
433 include('whitespace'),
434 include('comments'),
435 (r'\{', Punctuation),
436 (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
437 + r')?(\s*)(;)',
438 bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
439 String, Whitespace, Punctuation)),
440 (r'\}', Punctuation, '#pop'),
441 ],
442 'options': [
443 include('whitespace'),
444 include('comments'),
445 (r'\{', Punctuation),
446 (r'(' + _id + r')(\s*)(=)(\s*)(' +
447 '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
448 bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
449 Text, Whitespace, Punctuation)),
450 (r'\}', Punctuation, '#pop'),
451 ],
452 'action': [
453 (r'(' + r'|'.join(( # keep host code in largest possible chunks
454 r'[^${}\'"/\\]+', # exclude unsafe characters
455
456 # strings and comments may safely contain unsafe characters
457 r'"(\\\\|\\[^\\]|[^"\\])*"',
458 r"'(\\\\|\\[^\\]|[^'\\])*'",
459 r'//.*$\n?', # single line comment
460 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
461
462 # regular expression: There's no reason for it to start
463 # with a * and this stops confusion with comments.
464 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
465
466 # backslashes are okay, as long as we are not backslashing a %
467 r'\\(?!%)',
468
469 # Now that we've handled regex and javadoc comments
470 # it's safe to let / through.
471 r'/',
472 )) + r')+', Other),
473 (r'(\\)(%)', bygroups(Punctuation, Other)),
474 (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
475 bygroups(Name.Variable, Punctuation, Name.Property)),
476 (r'\{', Punctuation, '#push'),
477 (r'\}', Punctuation, '#pop'),
478 ],
479 'nested-arg-action': [
480 (r'(' + r'|'.join(( # keep host code in largest possible chunks.
481 r'[^$\[\]\'"/]+', # exclude unsafe characters
482
483 # strings and comments may safely contain unsafe characters
484 r'"(\\\\|\\[^\\]|[^"\\])*"',
485 r"'(\\\\|\\[^\\]|[^'\\])*'",
486 r'//.*$\n?', # single line comment
487 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
488
489 # regular expression: There's no reason for it to start
490 # with a * and this stops confusion with comments.
491 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
492
493 # Now that we've handled regex and javadoc comments
494 # it's safe to let / through.
495 r'/',
496 )) + r')+', Other),
497
498
499 (r'\[', Punctuation, '#push'),
500 (r'\]', Punctuation, '#pop'),
501 (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
502 bygroups(Name.Variable, Punctuation, Name.Property)),
503 (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
504 ]
505 }
506
507 def analyse_text(text):
508 return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
509
510
511# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
512
513class AntlrCppLexer(DelegatingLexer):
514 """
515 ANTLR with C++ Target
516 """
517
518 name = 'ANTLR With CPP Target'
519 aliases = ['antlr-cpp']
520 filenames = ['*.G', '*.g']
521 url = 'https://www.antlr.org'
522 version_added = '1.1'
523
524 def __init__(self, **options):
525 super().__init__(CppLexer, AntlrLexer, **options)
526
527 def analyse_text(text):
528 return AntlrLexer.analyse_text(text) and \
529 re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
530
531
532class AntlrObjectiveCLexer(DelegatingLexer):
533 """
534 ANTLR with Objective-C Target
535 """
536
537 name = 'ANTLR With ObjectiveC Target'
538 aliases = ['antlr-objc']
539 filenames = ['*.G', '*.g']
540 url = 'https://www.antlr.org'
541 version_added = '1.1'
542
543 def __init__(self, **options):
544 super().__init__(ObjectiveCLexer, AntlrLexer, **options)
545
546 def analyse_text(text):
547 return AntlrLexer.analyse_text(text) and \
548 re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
549
550
551class AntlrCSharpLexer(DelegatingLexer):
552 """
553 ANTLR with C# Target
554 """
555
556 name = 'ANTLR With C# Target'
557 aliases = ['antlr-csharp', 'antlr-c#']
558 filenames = ['*.G', '*.g']
559 url = 'https://www.antlr.org'
560 version_added = '1.1'
561
562 def __init__(self, **options):
563 super().__init__(CSharpLexer, AntlrLexer, **options)
564
565 def analyse_text(text):
566 return AntlrLexer.analyse_text(text) and \
567 re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
568
569
570class AntlrPythonLexer(DelegatingLexer):
571 """
572 ANTLR with Python Target
573 """
574
575 name = 'ANTLR With Python Target'
576 aliases = ['antlr-python']
577 filenames = ['*.G', '*.g']
578 url = 'https://www.antlr.org'
579 version_added = '1.1'
580
581 def __init__(self, **options):
582 super().__init__(PythonLexer, AntlrLexer, **options)
583
584 def analyse_text(text):
585 return AntlrLexer.analyse_text(text) and \
586 re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
587
588
589class AntlrJavaLexer(DelegatingLexer):
590 """
591 ANTLR with Java Target
592 """
593
594 name = 'ANTLR With Java Target'
595 aliases = ['antlr-java']
596 filenames = ['*.G', '*.g']
597 url = 'https://www.antlr.org'
598 version_added = '1.1'
599
600 def __init__(self, **options):
601 super().__init__(JavaLexer, AntlrLexer, **options)
602
603 def analyse_text(text):
604 # Antlr language is Java by default
605 return AntlrLexer.analyse_text(text) and 0.9
606
607
608class AntlrRubyLexer(DelegatingLexer):
609 """
610 ANTLR with Ruby Target
611 """
612
613 name = 'ANTLR With Ruby Target'
614 aliases = ['antlr-ruby', 'antlr-rb']
615 filenames = ['*.G', '*.g']
616 url = 'https://www.antlr.org'
617 version_added = '1.1'
618
619 def __init__(self, **options):
620 super().__init__(RubyLexer, AntlrLexer, **options)
621
622 def analyse_text(text):
623 return AntlrLexer.analyse_text(text) and \
624 re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
625
626
627class AntlrPerlLexer(DelegatingLexer):
628 """
629 ANTLR with Perl Target
630 """
631
632 name = 'ANTLR With Perl Target'
633 aliases = ['antlr-perl']
634 filenames = ['*.G', '*.g']
635 url = 'https://www.antlr.org'
636 version_added = '1.1'
637
638 def __init__(self, **options):
639 super().__init__(PerlLexer, AntlrLexer, **options)
640
641 def analyse_text(text):
642 return AntlrLexer.analyse_text(text) and \
643 re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
644
645
646class AntlrActionScriptLexer(DelegatingLexer):
647 """
648 ANTLR with ActionScript Target
649 """
650
651 name = 'ANTLR With ActionScript Target'
652 aliases = ['antlr-actionscript', 'antlr-as']
653 filenames = ['*.G', '*.g']
654 url = 'https://www.antlr.org'
655 version_added = '1.1'
656
657 def __init__(self, **options):
658 from pygments.lexers.actionscript import ActionScriptLexer
659 super().__init__(ActionScriptLexer, AntlrLexer, **options)
660
661 def analyse_text(text):
662 return AntlrLexer.analyse_text(text) and \
663 re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
664
665
666class TreetopBaseLexer(RegexLexer):
667 """
668 A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
669 Not for direct use; use :class:`TreetopLexer` instead.
670
671 .. versionadded:: 1.6
672 """
673
674 tokens = {
675 'root': [
676 include('space'),
677 (r'require[ \t]+[^\n\r]+[\n\r]', Other),
678 (r'module\b', Keyword.Namespace, 'module'),
679 (r'grammar\b', Keyword, 'grammar'),
680 ],
681 'module': [
682 include('space'),
683 include('end'),
684 (r'module\b', Keyword, '#push'),
685 (r'grammar\b', Keyword, 'grammar'),
686 (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
687 ],
688 'grammar': [
689 include('space'),
690 include('end'),
691 (r'rule\b', Keyword, 'rule'),
692 (r'include\b', Keyword, 'include'),
693 (r'[A-Z]\w*', Name),
694 ],
695 'include': [
696 include('space'),
697 (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
698 ],
699 'rule': [
700 include('space'),
701 include('end'),
702 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
703 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
704 (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
705 (r'[A-Za-z_]\w*', Name),
706 (r'[()]', Punctuation),
707 (r'[?+*/&!~]', Operator),
708 (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
709 (r'([0-9]*)(\.\.)([0-9]*)',
710 bygroups(Number.Integer, Operator, Number.Integer)),
711 (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
712 (r'\{', Punctuation, 'inline_module'),
713 (r'\.', String.Regex),
714 ],
715 'inline_module': [
716 (r'\{', Other, 'ruby'),
717 (r'\}', Punctuation, '#pop'),
718 (r'[^{}]+', Other),
719 ],
720 'ruby': [
721 (r'\{', Other, '#push'),
722 (r'\}', Other, '#pop'),
723 (r'[^{}]+', Other),
724 ],
725 'space': [
726 (r'[ \t\n\r]+', Whitespace),
727 (r'#[^\n]*', Comment.Single),
728 ],
729 'end': [
730 (r'end\b', Keyword, '#pop'),
731 ],
732 }
733
734
735class TreetopLexer(DelegatingLexer):
736 """
737 A lexer for Treetop grammars.
738 """
739
740 name = 'Treetop'
741 aliases = ['treetop']
742 filenames = ['*.treetop', '*.tt']
743 url = 'https://cjheath.github.io/treetop'
744 version_added = '1.6'
745
746 def __init__(self, **options):
747 super().__init__(RubyLexer, TreetopBaseLexer, **options)
748
749
750class EbnfLexer(RegexLexer):
751 """
752 Lexer for `ISO/IEC 14977 EBNF
753 <https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
754 grammars.
755 """
756
757 name = 'EBNF'
758 aliases = ['ebnf']
759 filenames = ['*.ebnf']
760 mimetypes = ['text/x-ebnf']
761 url = 'https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form'
762 version_added = '2.0'
763
764 tokens = {
765 'root': [
766 include('whitespace'),
767 include('comment_start'),
768 include('identifier'),
769 (r'=', Operator, 'production'),
770 ],
771 'production': [
772 include('whitespace'),
773 include('comment_start'),
774 include('identifier'),
775 (r'"[^"]*"', String.Double),
776 (r"'[^']*'", String.Single),
777 (r'(\?[^?]*\?)', Name.Entity),
778 (r'[\[\]{}(),|]', Punctuation),
779 (r'-', Operator),
780 (r';', Punctuation, '#pop'),
781 (r'\.', Punctuation, '#pop'),
782 ],
783 'whitespace': [
784 (r'\s+', Text),
785 ],
786 'comment_start': [
787 (r'\(\*', Comment.Multiline, 'comment'),
788 ],
789 'comment': [
790 (r'[^*)]', Comment.Multiline),
791 include('comment_start'),
792 (r'\*\)', Comment.Multiline, '#pop'),
793 (r'[*)]', Comment.Multiline),
794 ],
795 'identifier': [
796 (r'([a-zA-Z][\w \-]*)', Keyword),
797 ],
798 }