Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/grammar_notation.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

37 statements  

1""" 

2 pygments.lexers.grammar_notation 

3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for grammar notations like BNF. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11from pygments.lexer import RegexLexer, bygroups, include, this, using, words 

12from pygments.token import Comment, Keyword, Literal, Name, Number, \ 

13 Operator, Punctuation, String, Text, Whitespace 

14 

15__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer'] 

16 

17 

18class BnfLexer(RegexLexer): 

19 """ 

20 This lexer is for grammar notations which are similar to 

21 original BNF. 

22 

23 In order to maximize a number of targets of this lexer, 

24 let's decide some designs: 

25 

26 * We don't distinguish `Terminal Symbol`. 

27 

28 * We do assume that `NonTerminal Symbol` are always enclosed 

29 with arrow brackets. 

30 

31 * We do assume that `NonTerminal Symbol` may include 

32 any printable characters except arrow brackets and ASCII 0x20. 

33 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_. 

34 

35 * We do assume that target notation doesn't support comment. 

36 

37 * We don't distinguish any operators and punctuation except 

38 `::=`. 

39 

40 Though these decision making might cause too minimal highlighting 

41 and you might be disappointed, but it is reasonable for us. 

42 """ 

43 

44 name = 'BNF' 

45 aliases = ['bnf'] 

46 filenames = ['*.bnf'] 

47 mimetypes = ['text/x-bnf'] 

48 url = 'https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form' 

49 version_added = '2.1' 

50 

51 tokens = { 

52 'root': [ 

53 (r'(<)([ -;=?-~]+)(>)', 

54 bygroups(Punctuation, Name.Class, Punctuation)), 

55 

56 # an only operator 

57 (r'::=', Operator), 

58 

59 # fallback 

60 (r'[^<>:]+', Text), # for performance 

61 (r'.', Text), 

62 ], 

63 } 

64 

65 

66class AbnfLexer(RegexLexer): 

67 """ 

68 Lexer for IETF 7405 ABNF. 

69 

70 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars. 

71 """ 

72 

73 name = 'ABNF' 

74 url = 'http://www.ietf.org/rfc/rfc7405.txt' 

75 aliases = ['abnf'] 

76 filenames = ['*.abnf'] 

77 mimetypes = ['text/x-abnf'] 

78 version_added = '2.1' 

79 

80 _core_rules = ( 

81 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', 

82 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', 

83 'SP', 'VCHAR', 'WSP') 

84 

85 tokens = { 

86 'root': [ 

87 # comment 

88 (r';.*$', Comment.Single), 

89 

90 # quoted 

91 # double quote itself in this state, it is as '%x22'. 

92 (r'(%[si])?"[^"]*"', Literal), 

93 

94 # binary (but i have never seen...) 

95 (r'%b[01]+\-[01]+\b', Literal), # range 

96 (r'%b[01]+(\.[01]+)*\b', Literal), # concat 

97 

98 # decimal 

99 (r'%d[0-9]+\-[0-9]+\b', Literal), # range 

100 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat 

101 

102 # hexadecimal 

103 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range 

104 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat 

105 

106 # repetition (<a>*<b>element) including nRule 

107 (r'\b[0-9]+\*[0-9]+', Operator), 

108 (r'\b[0-9]+\*', Operator), 

109 (r'\b[0-9]+', Operator), 

110 (r'\*', Operator), 

111 

112 # Strictly speaking, these are not keyword but 

113 # are called `Core Rule'. 

114 (words(_core_rules, suffix=r'\b'), Keyword), 

115 

116 # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) 

117 (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class), 

118 

119 # operators 

120 (r'(=/|=|/)', Operator), 

121 

122 # punctuation 

123 (r'[\[\]()]', Punctuation), 

124 

125 # fallback 

126 (r'\s+', Whitespace), 

127 (r'.', Text), 

128 ], 

129 } 

130 

131 

132class JsgfLexer(RegexLexer): 

133 """ 

134 For JSpeech Grammar Format grammars. 

135 """ 

136 name = 'JSGF' 

137 url = 'https://www.w3.org/TR/jsgf/' 

138 aliases = ['jsgf'] 

139 filenames = ['*.jsgf'] 

140 mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf'] 

141 version_added = '2.2' 

142 

143 tokens = { 

144 'root': [ 

145 include('comments'), 

146 include('non-comments'), 

147 ], 

148 'comments': [ 

149 (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'), 

150 (r'/\*[\w\W]*?\*/', Comment.Multiline), 

151 (r'//.*$', Comment.Single), 

152 ], 

153 'non-comments': [ 

154 (r'\A#JSGF[^;]*', Comment.Preproc), 

155 (r'\s+', Whitespace), 

156 (r';', Punctuation), 

157 (r'[=|()\[\]*+]', Operator), 

158 (r'/[^/]+/', Number.Float), 

159 (r'"', String.Double, 'string'), 

160 (r'\{', String.Other, 'tag'), 

161 (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved), 

162 (r'grammar\b', Keyword.Reserved, 'grammar name'), 

163 (r'(<)(NULL|VOID)(>)', 

164 bygroups(Punctuation, Name.Builtin, Punctuation)), 

165 (r'<', Punctuation, 'rulename'), 

166 (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text), 

167 ], 

168 'string': [ 

169 (r'"', String.Double, '#pop'), 

170 (r'\\.', String.Escape), 

171 (r'[^\\"]+', String.Double), 

172 ], 

173 'tag': [ 

174 (r'\}', String.Other, '#pop'), 

175 (r'\\.', String.Escape), 

176 (r'[^\\}]+', String.Other), 

177 ], 

178 'grammar name': [ 

179 (r';', Punctuation, '#pop'), 

180 (r'\s+', Whitespace), 

181 (r'\.', Punctuation), 

182 (r'[^;\s.]+', Name.Namespace), 

183 ], 

184 'rulename': [ 

185 (r'>', Punctuation, '#pop'), 

186 (r'\*', Punctuation), 

187 (r'\s+', Whitespace), 

188 (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)), 

189 (r'[^.>]+', Name.Constant), 

190 ], 

191 'documentation comment': [ 

192 (r'\*/', Comment.Multiline, '#pop'), 

193 (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)' 

194 r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))', 

195 bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special, 

196 Whitespace, using(this, state='example'))), 

197 (r'(^\s*\*?\s*)(@\S*)', 

198 bygroups(Comment.Multiline, Comment.Special)), 

199 (r'[^*\n@]+|\w|\W', Comment.Multiline), 

200 ], 

201 'example': [ 

202 (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)), 

203 include('non-comments'), 

204 (r'.', Comment.Multiline), 

205 ], 

206 } 

207 

208 

209class PegLexer(RegexLexer): 

210 """ 

211 This lexer is for Parsing Expression Grammars (PEG). 

212 

213 Various implementations of PEG have made different decisions 

214 regarding the syntax, so let's try to be accommodating: 

215 

216 * `<-`, `←`, `:`, and `=` are all accepted as rule operators. 

217 

218 * Both `|` and `/` are choice operators. 

219 

220 * `^`, `↑`, and `~` are cut operators. 

221 

222 * A single `a-z` character immediately before a string, or 

223 multiple `a-z` characters following a string, are part of the 

224 string (e.g., `r"..."` or `"..."ilmsuxa`). 

225 """ 

226 

227 name = 'PEG' 

228 url = 'https://bford.info/pub/lang/peg.pdf' 

229 aliases = ['peg'] 

230 filenames = ['*.peg'] 

231 mimetypes = ['text/x-peg'] 

232 version_added = '2.6' 

233 

234 tokens = { 

235 'root': [ 

236 # Comments 

237 (r'#.*$', Comment.Single), 

238 

239 # All operators 

240 (r'<-|[←:=/|&!?*+^↑~]', Operator), 

241 

242 # Other punctuation 

243 (r'[()]', Punctuation), 

244 

245 # Keywords 

246 (r'\.', Keyword), 

247 

248 # Character classes 

249 (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])', 

250 bygroups(Punctuation, String, Punctuation)), 

251 

252 # Single and double quoted strings (with optional modifiers) 

253 (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double), 

254 (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single), 

255 

256 # Nonterminals are not whitespace, operators, or punctuation 

257 (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class), 

258 

259 # Fallback 

260 (r'.', Text), 

261 ], 

262 }