Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/grammar_notation.py: 100%
31 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.grammar_notation
3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for grammar notations like BNF.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11from pygments.lexer import RegexLexer, bygroups, include, this, using, words
12from pygments.token import Comment, Keyword, Literal, Name, Number, \
13 Operator, Punctuation, String, Text, Whitespace
15__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
18class BnfLexer(RegexLexer):
19 """
20 This lexer is for grammar notations which are similar to
21 original BNF.
23 In order to maximize a number of targets of this lexer,
24 let's decide some designs:
26 * We don't distinguish `Terminal Symbol`.
28 * We do assume that `NonTerminal Symbol` are always enclosed
29 with arrow brackets.
31 * We do assume that `NonTerminal Symbol` may include
32 any printable characters except arrow brackets and ASCII 0x20.
33 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
35 * We do assume that target notation doesn't support comment.
37 * We don't distinguish any operators and punctuation except
38 `::=`.
40 Though these decision making might cause too minimal highlighting
41 and you might be disappointed, but it is reasonable for us.
43 .. versionadded:: 2.1
44 """
46 name = 'BNF'
47 aliases = ['bnf']
48 filenames = ['*.bnf']
49 mimetypes = ['text/x-bnf']
51 tokens = {
52 'root': [
53 (r'(<)([ -;=?-~]+)(>)',
54 bygroups(Punctuation, Name.Class, Punctuation)),
56 # an only operator
57 (r'::=', Operator),
59 # fallback
60 (r'[^<>:]+', Text), # for performance
61 (r'.', Text),
62 ],
63 }
66class AbnfLexer(RegexLexer):
67 """
68 Lexer for IETF 7405 ABNF.
70 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.
72 .. versionadded:: 2.1
73 """
75 name = 'ABNF'
76 url = 'http://www.ietf.org/rfc/rfc7405.txt'
77 aliases = ['abnf']
78 filenames = ['*.abnf']
79 mimetypes = ['text/x-abnf']
81 _core_rules = (
82 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
83 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
84 'SP', 'VCHAR', 'WSP')
86 tokens = {
87 'root': [
88 # comment
89 (r';.*$', Comment.Single),
91 # quoted
92 # double quote itself in this state, it is as '%x22'.
93 (r'(%[si])?"[^"]*"', Literal),
95 # binary (but i have never seen...)
96 (r'%b[01]+\-[01]+\b', Literal), # range
97 (r'%b[01]+(\.[01]+)*\b', Literal), # concat
99 # decimal
100 (r'%d[0-9]+\-[0-9]+\b', Literal), # range
101 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat
103 # hexadecimal
104 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
105 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat
107 # repetition (<a>*<b>element) including nRule
108 (r'\b[0-9]+\*[0-9]+', Operator),
109 (r'\b[0-9]+\*', Operator),
110 (r'\b[0-9]+', Operator),
111 (r'\*', Operator),
113 # Strictly speaking, these are not keyword but
114 # are called `Core Rule'.
115 (words(_core_rules, suffix=r'\b'), Keyword),
117 # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
118 (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),
120 # operators
121 (r'(=/|=|/)', Operator),
123 # punctuation
124 (r'[\[\]()]', Punctuation),
126 # fallback
127 (r'\s+', Whitespace),
128 (r'.', Text),
129 ],
130 }
133class JsgfLexer(RegexLexer):
134 """
135 For JSpeech Grammar Format grammars.
137 .. versionadded:: 2.2
138 """
139 name = 'JSGF'
140 url = 'https://www.w3.org/TR/jsgf/'
141 aliases = ['jsgf']
142 filenames = ['*.jsgf']
143 mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
145 tokens = {
146 'root': [
147 include('comments'),
148 include('non-comments'),
149 ],
150 'comments': [
151 (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
152 (r'/\*[\w\W]*?\*/', Comment.Multiline),
153 (r'//.*$', Comment.Single),
154 ],
155 'non-comments': [
156 (r'\A#JSGF[^;]*', Comment.Preproc),
157 (r'\s+', Whitespace),
158 (r';', Punctuation),
159 (r'[=|()\[\]*+]', Operator),
160 (r'/[^/]+/', Number.Float),
161 (r'"', String.Double, 'string'),
162 (r'\{', String.Other, 'tag'),
163 (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
164 (r'grammar\b', Keyword.Reserved, 'grammar name'),
165 (r'(<)(NULL|VOID)(>)',
166 bygroups(Punctuation, Name.Builtin, Punctuation)),
167 (r'<', Punctuation, 'rulename'),
168 (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
169 ],
170 'string': [
171 (r'"', String.Double, '#pop'),
172 (r'\\.', String.Escape),
173 (r'[^\\"]+', String.Double),
174 ],
175 'tag': [
176 (r'\}', String.Other, '#pop'),
177 (r'\\.', String.Escape),
178 (r'[^\\}]+', String.Other),
179 ],
180 'grammar name': [
181 (r';', Punctuation, '#pop'),
182 (r'\s+', Whitespace),
183 (r'\.', Punctuation),
184 (r'[^;\s.]+', Name.Namespace),
185 ],
186 'rulename': [
187 (r'>', Punctuation, '#pop'),
188 (r'\*', Punctuation),
189 (r'\s+', Whitespace),
190 (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
191 (r'[^.>]+', Name.Constant),
192 ],
193 'documentation comment': [
194 (r'\*/', Comment.Multiline, '#pop'),
195 (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
196 r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
197 bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,
198 Whitespace, using(this, state='example'))),
199 (r'(^\s*\*?\s*)(@\S*)',
200 bygroups(Comment.Multiline, Comment.Special)),
201 (r'[^*\n@]+|\w|\W', Comment.Multiline),
202 ],
203 'example': [
204 (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
205 include('non-comments'),
206 (r'.', Comment.Multiline),
207 ],
208 }
211class PegLexer(RegexLexer):
212 """
213 This lexer is for Parsing Expression Grammars (PEG).
215 Various implementations of PEG have made different decisions
216 regarding the syntax, so let's try to be accommodating:
218 * `<-`, `←`, `:`, and `=` are all accepted as rule operators.
220 * Both `|` and `/` are choice operators.
222 * `^`, `↑`, and `~` are cut operators.
224 * A single `a-z` character immediately before a string, or
225 multiple `a-z` characters following a string, are part of the
226 string (e.g., `r"..."` or `"..."ilmsuxa`).
228 .. versionadded:: 2.6
229 """
231 name = 'PEG'
232 url = 'https://bford.info/pub/lang/peg.pdf'
233 aliases = ['peg']
234 filenames = ['*.peg']
235 mimetypes = ['text/x-peg']
237 tokens = {
238 'root': [
239 # Comments
240 (r'#.*$', Comment.Single),
242 # All operators
243 (r'<-|[←:=/|&!?*+^↑~]', Operator),
245 # Other punctuation
246 (r'[()]', Punctuation),
248 # Keywords
249 (r'\.', Keyword),
251 # Character classes
252 (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
253 bygroups(Punctuation, String, Punctuation)),
255 # Single and double quoted strings (with optional modifiers)
256 (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
257 (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),
259 # Nonterminals are not whitespace, operators, or punctuation
260 (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),
262 # Fallback
263 (r'.', Text),
264 ],
265 }