Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/parsers.py: 80%
164 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.parsers
3 ~~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for parser generators.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, DelegatingLexer, \
14 include, bygroups, using
15from pygments.token import Punctuation, Other, Text, Comment, Operator, \
16 Keyword, Name, String, Number, Whitespace
17from pygments.lexers.jvm import JavaLexer
18from pygments.lexers.c_cpp import CLexer, CppLexer
19from pygments.lexers.objective import ObjectiveCLexer
20from pygments.lexers.d import DLexer
21from pygments.lexers.dotnet import CSharpLexer
22from pygments.lexers.ruby import RubyLexer
23from pygments.lexers.python import PythonLexer
24from pygments.lexers.perl import PerlLexer
26__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
27 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
28 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
29 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
30 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
31 'AntlrJavaLexer', 'AntlrActionScriptLexer',
32 'TreetopLexer', 'EbnfLexer']
35class RagelLexer(RegexLexer):
36 """A pure `Ragel <www.colm.net/open-source/ragel>`_ lexer. Use this
37 for fragments of Ragel. For ``.rl`` files, use
38 :class:`RagelEmbeddedLexer` instead (or one of the
39 language-specific subclasses).
41 .. versionadded:: 1.1
43 """
45 name = 'Ragel'
46 url = 'http://www.colm.net/open-source/ragel/'
47 aliases = ['ragel']
48 filenames = []
50 tokens = {
51 'whitespace': [
52 (r'\s+', Whitespace)
53 ],
54 'comments': [
55 (r'\#.*$', Comment),
56 ],
57 'keywords': [
58 (r'(access|action|alphtype)\b', Keyword),
59 (r'(getkey|write|machine|include)\b', Keyword),
60 (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
61 (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
62 ],
63 'numbers': [
64 (r'0x[0-9A-Fa-f]+', Number.Hex),
65 (r'[+-]?[0-9]+', Number.Integer),
66 ],
67 'literals': [
68 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
69 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
70 (r'\[(\\\\|\\[^\\]|[^\\\]])*\]', String), # square bracket literals
71 (r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', String.Regex), # regular expressions
72 ],
73 'identifiers': [
74 (r'[a-zA-Z_]\w*', Name.Variable),
75 ],
76 'operators': [
77 (r',', Operator), # Join
78 (r'\||&|--?', Operator), # Union, Intersection and Subtraction
79 (r'\.|<:|:>>?', Operator), # Concatention
80 (r':', Operator), # Label
81 (r'->', Operator), # Epsilon Transition
82 (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
83 (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
84 (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
85 (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
86 (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
87 (r'>|@|\$|%', Operator), # Transition Actions and Priorities
88 (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
89 (r'!|\^', Operator), # Negation
90 (r'\(|\)', Operator), # Grouping
91 ],
92 'root': [
93 include('literals'),
94 include('whitespace'),
95 include('comments'),
96 include('keywords'),
97 include('numbers'),
98 include('identifiers'),
99 include('operators'),
100 (r'\{', Punctuation, 'host'),
101 (r'=', Operator),
102 (r';', Punctuation),
103 ],
104 'host': [
105 (r'(' + r'|'.join(( # keep host code in largest possible chunks
106 r'[^{}\'"/#]+', # exclude unsafe characters
107 r'[^\\]\\[{}]', # allow escaped { or }
109 # strings and comments may safely contain unsafe characters
110 r'"(\\\\|\\[^\\]|[^"\\])*"',
111 r"'(\\\\|\\[^\\]|[^'\\])*'",
112 r'//.*$\n?', # single line comment
113 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
114 r'\#.*$\n?', # ruby comment
116 # regular expression: There's no reason for it to start
117 # with a * and this stops confusion with comments.
118 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
120 # / is safe now that we've handled regex and javadoc comments
121 r'/',
122 )) + r')+', Other),
124 (r'\{', Punctuation, '#push'),
125 (r'\}', Punctuation, '#pop'),
126 ],
127 }
130class RagelEmbeddedLexer(RegexLexer):
131 """
132 A lexer for Ragel embedded in a host language file.
134 This will only highlight Ragel statements. If you want host language
135 highlighting then call the language-specific Ragel lexer.
137 .. versionadded:: 1.1
138 """
140 name = 'Embedded Ragel'
141 aliases = ['ragel-em']
142 filenames = ['*.rl']
144 tokens = {
145 'root': [
146 (r'(' + r'|'.join(( # keep host code in largest possible chunks
147 r'[^%\'"/#]+', # exclude unsafe characters
148 r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
150 # strings and comments may safely contain unsafe characters
151 r'"(\\\\|\\[^\\]|[^"\\])*"',
152 r"'(\\\\|\\[^\\]|[^'\\])*'",
153 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
154 r'//.*$\n?', # single line comment
155 r'\#.*$\n?', # ruby/ragel comment
156 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/', # regular expression
158 # / is safe now that we've handled regex and javadoc comments
159 r'/',
160 )) + r')+', Other),
162 # Single Line FSM.
163 # Please don't put a quoted newline in a single line FSM.
164 # That's just mean. It will break this.
165 (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
166 using(RagelLexer),
167 Punctuation, Text)),
169 # Multi Line FSM.
170 (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
171 ],
172 'multi-line-fsm': [
173 (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
174 r'(' + r'|'.join((
175 r'[^}\'"\[/#]', # exclude unsafe characters
176 r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
177 r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
178 r'[^\\]\\[{}]', # ...and } is okay if it's escaped
180 # allow / if it's preceded with one of these symbols
181 # (ragel EOF actions)
182 r'(>|\$|%|<|@|<>)/',
184 # specifically allow regex followed immediately by *
185 # so it doesn't get mistaken for a comment
186 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/\*',
188 # allow / as long as it's not followed by another / or by a *
189 r'/(?=[^/*]|$)',
191 # We want to match as many of these as we can in one block.
192 # Not sure if we need the + sign here,
193 # does it help performance?
194 )) + r')+',
196 # strings and comments may safely contain unsafe characters
197 r'"(\\\\|\\[^\\]|[^"\\])*"',
198 r"'(\\\\|\\[^\\]|[^'\\])*'",
199 r"\[(\\\\|\\[^\\]|[^\]\\])*\]", # square bracket literal
200 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
201 r'//.*$\n?', # single line comment
202 r'\#.*$\n?', # ruby/ragel comment
203 )) + r')+', using(RagelLexer)),
205 (r'\}%%', Punctuation, '#pop'),
206 ]
207 }
209 def analyse_text(text):
210 return '@LANG: indep' in text
213class RagelRubyLexer(DelegatingLexer):
214 """
215 A lexer for Ragel in a Ruby host file.
217 .. versionadded:: 1.1
218 """
220 name = 'Ragel in Ruby Host'
221 aliases = ['ragel-ruby', 'ragel-rb']
222 filenames = ['*.rl']
224 def __init__(self, **options):
225 super().__init__(RubyLexer, RagelEmbeddedLexer, **options)
227 def analyse_text(text):
228 return '@LANG: ruby' in text
231class RagelCLexer(DelegatingLexer):
232 """
233 A lexer for Ragel in a C host file.
235 .. versionadded:: 1.1
236 """
238 name = 'Ragel in C Host'
239 aliases = ['ragel-c']
240 filenames = ['*.rl']
242 def __init__(self, **options):
243 super().__init__(CLexer, RagelEmbeddedLexer, **options)
245 def analyse_text(text):
246 return '@LANG: c' in text
249class RagelDLexer(DelegatingLexer):
250 """
251 A lexer for Ragel in a D host file.
253 .. versionadded:: 1.1
254 """
256 name = 'Ragel in D Host'
257 aliases = ['ragel-d']
258 filenames = ['*.rl']
260 def __init__(self, **options):
261 super().__init__(DLexer, RagelEmbeddedLexer, **options)
263 def analyse_text(text):
264 return '@LANG: d' in text
267class RagelCppLexer(DelegatingLexer):
268 """
269 A lexer for Ragel in a C++ host file.
271 .. versionadded:: 1.1
272 """
274 name = 'Ragel in CPP Host'
275 aliases = ['ragel-cpp']
276 filenames = ['*.rl']
278 def __init__(self, **options):
279 super().__init__(CppLexer, RagelEmbeddedLexer, **options)
281 def analyse_text(text):
282 return '@LANG: c++' in text
285class RagelObjectiveCLexer(DelegatingLexer):
286 """
287 A lexer for Ragel in an Objective C host file.
289 .. versionadded:: 1.1
290 """
292 name = 'Ragel in Objective C Host'
293 aliases = ['ragel-objc']
294 filenames = ['*.rl']
296 def __init__(self, **options):
297 super().__init__(ObjectiveCLexer, RagelEmbeddedLexer, **options)
299 def analyse_text(text):
300 return '@LANG: objc' in text
303class RagelJavaLexer(DelegatingLexer):
304 """
305 A lexer for Ragel in a Java host file.
307 .. versionadded:: 1.1
308 """
310 name = 'Ragel in Java Host'
311 aliases = ['ragel-java']
312 filenames = ['*.rl']
314 def __init__(self, **options):
315 super().__init__(JavaLexer, RagelEmbeddedLexer, **options)
317 def analyse_text(text):
318 return '@LANG: java' in text
321class AntlrLexer(RegexLexer):
322 """
323 Generic `ANTLR`_ Lexer.
324 Should not be called directly, instead
325 use DelegatingLexer for your target language.
327 .. versionadded:: 1.1
329 .. _ANTLR: http://www.antlr.org/
330 """
332 name = 'ANTLR'
333 aliases = ['antlr']
334 filenames = []
336 _id = r'[A-Za-z]\w*'
337 _TOKEN_REF = r'[A-Z]\w*'
338 _RULE_REF = r'[a-z]\w*'
339 _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
340 _INT = r'[0-9]+'
342 tokens = {
343 'whitespace': [
344 (r'\s+', Whitespace),
345 ],
346 'comments': [
347 (r'//.*$', Comment),
348 (r'/\*(.|\n)*?\*/', Comment),
349 ],
350 'root': [
351 include('whitespace'),
352 include('comments'),
354 (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
355 bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
356 Punctuation)),
357 # optionsSpec
358 (r'options\b', Keyword, 'options'),
359 # tokensSpec
360 (r'tokens\b', Keyword, 'tokens'),
361 # attrScope
362 (r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
363 bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
364 Punctuation), 'action'),
365 # exception
366 (r'(catch|finally)\b', Keyword, 'exception'),
367 # action
368 (r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
369 bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
370 Name.Label, Whitespace, Punctuation), 'action'),
371 # rule
372 (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
373 bygroups(Keyword, Whitespace, Name.Label, Punctuation),
374 ('rule-alts', 'rule-prelims')),
375 ],
376 'exception': [
377 (r'\n', Whitespace, '#pop'),
378 (r'\s', Whitespace),
379 include('comments'),
381 (r'\[', Punctuation, 'nested-arg-action'),
382 (r'\{', Punctuation, 'action'),
383 ],
384 'rule-prelims': [
385 include('whitespace'),
386 include('comments'),
388 (r'returns\b', Keyword),
389 (r'\[', Punctuation, 'nested-arg-action'),
390 (r'\{', Punctuation, 'action'),
391 # throwsSpec
392 (r'(throws)(\s+)(' + _id + ')',
393 bygroups(Keyword, Whitespace, Name.Label)),
394 (r'(,)(\s*)(' + _id + ')',
395 bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
396 # optionsSpec
397 (r'options\b', Keyword, 'options'),
398 # ruleScopeSpec - scope followed by target language code or name of action
399 # TODO finish implementing other possibilities for scope
400 # L173 ANTLRv3.g from ANTLR book
401 (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
402 'action'),
403 (r'(scope)(\s+)(' + _id + r')(\s*)(;)',
404 bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
405 # ruleAction
406 (r'(@' + _id + r')(\s*)(\{)',
407 bygroups(Name.Label, Whitespace, Punctuation), 'action'),
408 # finished prelims, go to rule alts!
409 (r':', Punctuation, '#pop')
410 ],
411 'rule-alts': [
412 include('whitespace'),
413 include('comments'),
415 # These might need to go in a separate 'block' state triggered by (
416 (r'options\b', Keyword, 'options'),
417 (r':', Punctuation),
419 # literals
420 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
421 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
422 (r'<<([^>]|>[^>])>>', String),
423 # identifiers
424 # Tokens start with capital letter.
425 (r'\$?[A-Z_]\w*', Name.Constant),
426 # Rules start with small letter.
427 (r'\$?[a-z_]\w*', Name.Variable),
428 # operators
429 (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
430 (r',', Punctuation),
431 (r'\[', Punctuation, 'nested-arg-action'),
432 (r'\{', Punctuation, 'action'),
433 (r';', Punctuation, '#pop')
434 ],
435 'tokens': [
436 include('whitespace'),
437 include('comments'),
438 (r'\{', Punctuation),
439 (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
440 + r')?(\s*)(;)',
441 bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
442 String, Whitespace, Punctuation)),
443 (r'\}', Punctuation, '#pop'),
444 ],
445 'options': [
446 include('whitespace'),
447 include('comments'),
448 (r'\{', Punctuation),
449 (r'(' + _id + r')(\s*)(=)(\s*)(' +
450 '|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
451 bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
452 Text, Whitespace, Punctuation)),
453 (r'\}', Punctuation, '#pop'),
454 ],
455 'action': [
456 (r'(' + r'|'.join(( # keep host code in largest possible chunks
457 r'[^${}\'"/\\]+', # exclude unsafe characters
459 # strings and comments may safely contain unsafe characters
460 r'"(\\\\|\\[^\\]|[^"\\])*"',
461 r"'(\\\\|\\[^\\]|[^'\\])*'",
462 r'//.*$\n?', # single line comment
463 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
465 # regular expression: There's no reason for it to start
466 # with a * and this stops confusion with comments.
467 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
469 # backslashes are okay, as long as we are not backslashing a %
470 r'\\(?!%)',
472 # Now that we've handled regex and javadoc comments
473 # it's safe to let / through.
474 r'/',
475 )) + r')+', Other),
476 (r'(\\)(%)', bygroups(Punctuation, Other)),
477 (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
478 bygroups(Name.Variable, Punctuation, Name.Property)),
479 (r'\{', Punctuation, '#push'),
480 (r'\}', Punctuation, '#pop'),
481 ],
482 'nested-arg-action': [
483 (r'(' + r'|'.join(( # keep host code in largest possible chunks.
484 r'[^$\[\]\'"/]+', # exclude unsafe characters
486 # strings and comments may safely contain unsafe characters
487 r'"(\\\\|\\[^\\]|[^"\\])*"',
488 r"'(\\\\|\\[^\\]|[^'\\])*'",
489 r'//.*$\n?', # single line comment
490 r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
492 # regular expression: There's no reason for it to start
493 # with a * and this stops confusion with comments.
494 r'/(?!\*)(\\\\|\\[^\\]|[^/\\])*/',
496 # Now that we've handled regex and javadoc comments
497 # it's safe to let / through.
498 r'/',
499 )) + r')+', Other),
502 (r'\[', Punctuation, '#push'),
503 (r'\]', Punctuation, '#pop'),
504 (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
505 bygroups(Name.Variable, Punctuation, Name.Property)),
506 (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
507 ]
508 }
510 def analyse_text(text):
511 return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
514# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
516class AntlrCppLexer(DelegatingLexer):
517 """
518 ANTLR with C++ Target
520 .. versionadded:: 1.1
521 """
523 name = 'ANTLR With CPP Target'
524 aliases = ['antlr-cpp']
525 filenames = ['*.G', '*.g']
527 def __init__(self, **options):
528 super().__init__(CppLexer, AntlrLexer, **options)
530 def analyse_text(text):
531 return AntlrLexer.analyse_text(text) and \
532 re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
535class AntlrObjectiveCLexer(DelegatingLexer):
536 """
537 ANTLR with Objective-C Target
539 .. versionadded:: 1.1
540 """
542 name = 'ANTLR With ObjectiveC Target'
543 aliases = ['antlr-objc']
544 filenames = ['*.G', '*.g']
546 def __init__(self, **options):
547 super().__init__(ObjectiveCLexer, AntlrLexer, **options)
549 def analyse_text(text):
550 return AntlrLexer.analyse_text(text) and \
551 re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
554class AntlrCSharpLexer(DelegatingLexer):
555 """
556 ANTLR with C# Target
558 .. versionadded:: 1.1
559 """
561 name = 'ANTLR With C# Target'
562 aliases = ['antlr-csharp', 'antlr-c#']
563 filenames = ['*.G', '*.g']
565 def __init__(self, **options):
566 super().__init__(CSharpLexer, AntlrLexer, **options)
568 def analyse_text(text):
569 return AntlrLexer.analyse_text(text) and \
570 re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
573class AntlrPythonLexer(DelegatingLexer):
574 """
575 ANTLR with Python Target
577 .. versionadded:: 1.1
578 """
580 name = 'ANTLR With Python Target'
581 aliases = ['antlr-python']
582 filenames = ['*.G', '*.g']
584 def __init__(self, **options):
585 super().__init__(PythonLexer, AntlrLexer, **options)
587 def analyse_text(text):
588 return AntlrLexer.analyse_text(text) and \
589 re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
592class AntlrJavaLexer(DelegatingLexer):
593 """
594 ANTLR with Java Target
596 .. versionadded:: 1.
597 """
599 name = 'ANTLR With Java Target'
600 aliases = ['antlr-java']
601 filenames = ['*.G', '*.g']
603 def __init__(self, **options):
604 super().__init__(JavaLexer, AntlrLexer, **options)
606 def analyse_text(text):
607 # Antlr language is Java by default
608 return AntlrLexer.analyse_text(text) and 0.9
611class AntlrRubyLexer(DelegatingLexer):
612 """
613 ANTLR with Ruby Target
615 .. versionadded:: 1.1
616 """
618 name = 'ANTLR With Ruby Target'
619 aliases = ['antlr-ruby', 'antlr-rb']
620 filenames = ['*.G', '*.g']
622 def __init__(self, **options):
623 super().__init__(RubyLexer, AntlrLexer, **options)
625 def analyse_text(text):
626 return AntlrLexer.analyse_text(text) and \
627 re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
630class AntlrPerlLexer(DelegatingLexer):
631 """
632 ANTLR with Perl Target
634 .. versionadded:: 1.1
635 """
637 name = 'ANTLR With Perl Target'
638 aliases = ['antlr-perl']
639 filenames = ['*.G', '*.g']
641 def __init__(self, **options):
642 super().__init__(PerlLexer, AntlrLexer, **options)
644 def analyse_text(text):
645 return AntlrLexer.analyse_text(text) and \
646 re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
649class AntlrActionScriptLexer(DelegatingLexer):
650 """
651 ANTLR with ActionScript Target
653 .. versionadded:: 1.1
654 """
656 name = 'ANTLR With ActionScript Target'
657 aliases = ['antlr-actionscript', 'antlr-as']
658 filenames = ['*.G', '*.g']
660 def __init__(self, **options):
661 from pygments.lexers.actionscript import ActionScriptLexer
662 super().__init__(ActionScriptLexer, AntlrLexer, **options)
664 def analyse_text(text):
665 return AntlrLexer.analyse_text(text) and \
666 re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
669class TreetopBaseLexer(RegexLexer):
670 """
671 A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
672 Not for direct use; use :class:`TreetopLexer` instead.
674 .. versionadded:: 1.6
675 """
677 tokens = {
678 'root': [
679 include('space'),
680 (r'require[ \t]+[^\n\r]+[\n\r]', Other),
681 (r'module\b', Keyword.Namespace, 'module'),
682 (r'grammar\b', Keyword, 'grammar'),
683 ],
684 'module': [
685 include('space'),
686 include('end'),
687 (r'module\b', Keyword, '#push'),
688 (r'grammar\b', Keyword, 'grammar'),
689 (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
690 ],
691 'grammar': [
692 include('space'),
693 include('end'),
694 (r'rule\b', Keyword, 'rule'),
695 (r'include\b', Keyword, 'include'),
696 (r'[A-Z]\w*', Name),
697 ],
698 'include': [
699 include('space'),
700 (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
701 ],
702 'rule': [
703 include('space'),
704 include('end'),
705 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
706 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
707 (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
708 (r'[A-Za-z_]\w*', Name),
709 (r'[()]', Punctuation),
710 (r'[?+*/&!~]', Operator),
711 (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
712 (r'([0-9]*)(\.\.)([0-9]*)',
713 bygroups(Number.Integer, Operator, Number.Integer)),
714 (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
715 (r'\{', Punctuation, 'inline_module'),
716 (r'\.', String.Regex),
717 ],
718 'inline_module': [
719 (r'\{', Other, 'ruby'),
720 (r'\}', Punctuation, '#pop'),
721 (r'[^{}]+', Other),
722 ],
723 'ruby': [
724 (r'\{', Other, '#push'),
725 (r'\}', Other, '#pop'),
726 (r'[^{}]+', Other),
727 ],
728 'space': [
729 (r'[ \t\n\r]+', Whitespace),
730 (r'#[^\n]*', Comment.Single),
731 ],
732 'end': [
733 (r'end\b', Keyword, '#pop'),
734 ],
735 }
738class TreetopLexer(DelegatingLexer):
739 """
740 A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
742 .. versionadded:: 1.6
743 """
745 name = 'Treetop'
746 aliases = ['treetop']
747 filenames = ['*.treetop', '*.tt']
749 def __init__(self, **options):
750 super().__init__(RubyLexer, TreetopBaseLexer, **options)
753class EbnfLexer(RegexLexer):
754 """
755 Lexer for `ISO/IEC 14977 EBNF
756 <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
757 grammars.
759 .. versionadded:: 2.0
760 """
762 name = 'EBNF'
763 aliases = ['ebnf']
764 filenames = ['*.ebnf']
765 mimetypes = ['text/x-ebnf']
767 tokens = {
768 'root': [
769 include('whitespace'),
770 include('comment_start'),
771 include('identifier'),
772 (r'=', Operator, 'production'),
773 ],
774 'production': [
775 include('whitespace'),
776 include('comment_start'),
777 include('identifier'),
778 (r'"[^"]*"', String.Double),
779 (r"'[^']*'", String.Single),
780 (r'(\?[^?]*\?)', Name.Entity),
781 (r'[\[\]{}(),|]', Punctuation),
782 (r'-', Operator),
783 (r';', Punctuation, '#pop'),
784 (r'\.', Punctuation, '#pop'),
785 ],
786 'whitespace': [
787 (r'\s+', Text),
788 ],
789 'comment_start': [
790 (r'\(\*', Comment.Multiline, 'comment'),
791 ],
792 'comment': [
793 (r'[^*)]', Comment.Multiline),
794 include('comment_start'),
795 (r'\*\)', Comment.Multiline, '#pop'),
796 (r'[*)]', Comment.Multiline),
797 ],
798 'identifier': [
799 (r'([a-zA-Z][\w \-]*)', Keyword),
800 ],
801 }