Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/grammar

1"""

2 pygments.lexers.grammar_notation

3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for grammar notations like BNF.

8 :license: BSD, see LICENSE for details.

9"""

11from pygments.lexer import RegexLexer, bygroups, include, this, using, words

12from pygments.token import Comment, Keyword, Literal, Name, Number, \

13 Operator, Punctuation, String, Text, Whitespace

15__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']

18class BnfLexer(RegexLexer):

19 """

20 This lexer is for grammar notations which are similar to

21 original BNF.

23 In order to maximize a number of targets of this lexer,

24 let's decide some designs:

26 * We don't distinguish `Terminal Symbol`.

28 * We do assume that `NonTerminal Symbol` are always enclosed

29 with arrow brackets.

31 * We do assume that `NonTerminal Symbol` may include

32 any printable characters except arrow brackets and ASCII 0x20.

33 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.

35 * We do assume that target notation doesn't support comment.

37 * We don't distinguish any operators and punctuation except

38 `::=`.

40 Though these decision making might cause too minimal highlighting

41 and you might be disappointed, but it is reasonable for us.

42 """

44 name = 'BNF'

45 aliases = ['bnf']

46 filenames = ['*.bnf']

47 mimetypes = ['text/x-bnf']

48 url = 'https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form'

49 version_added = '2.1'

51 tokens = {

52 'root': [

53 (r'(<)([ -;=?-~]+)(>)',

54 bygroups(Punctuation, Name.Class, Punctuation)),

56 # an only operator

57 (r'::=', Operator),

59 # fallback

60 (r'[^<>:]+', Text), # for performance

61 (r'.', Text),

62 ],

63 }

66class AbnfLexer(RegexLexer):

67 """

68 Lexer for IETF 7405 ABNF.

70 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.

71 """

73 name = 'ABNF'

74 url = 'http://www.ietf.org/rfc/rfc7405.txt'

75 aliases = ['abnf']

76 filenames = ['*.abnf']

77 mimetypes = ['text/x-abnf']

78 version_added = '2.1'

80 _core_rules = (

81 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',

82 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',

83 'SP', 'VCHAR', 'WSP')

85 tokens = {

86 'root': [

87 # comment

88 (r';.*$', Comment.Single),

90 # quoted

91 # double quote itself in this state, it is as '%x22'.

92 (r'(%[si])?"[^"]*"', Literal),

94 # binary (but i have never seen...)

95 (r'%b[01]+\-[01]+\b', Literal), # range

96 (r'%b[01]+(\.[01]+)*\b', Literal), # concat

98 # decimal

99 (r'%d[0-9]+\-[0-9]+\b', Literal), # range

100 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat

101

102 # hexadecimal

103 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range

104 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat

105

106 # repetition (<a>*<b>element) including nRule

107 (r'\b[0-9]+\*[0-9]+', Operator),

108 (r'\b[0-9]+\*', Operator),

109 (r'\b[0-9]+', Operator),

110 (r'\*', Operator),

111

112 # Strictly speaking, these are not keyword but

113 # are called `Core Rule'.

114 (words(_core_rules, suffix=r'\b'), Keyword),

115

116 # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))

117 (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),

118

119 # operators

120 (r'(=/|=|/)', Operator),

121

122 # punctuation

123 (r'[\[\]()]', Punctuation),

124

125 # fallback

126 (r'\s+', Whitespace),

127 (r'.', Text),

128 ],

129 }

130

131

132class JsgfLexer(RegexLexer):

133 """

134 For JSpeech Grammar Format grammars.

135 """

136 name = 'JSGF'

137 url = 'https://www.w3.org/TR/jsgf/'

138 aliases = ['jsgf']

139 filenames = ['*.jsgf']

140 mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

141 version_added = '2.2'

142

143 tokens = {

144 'root': [

145 include('comments'),

146 include('non-comments'),

147 ],

148 'comments': [

149 (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),

150 (r'/\*[\w\W]*?\*/', Comment.Multiline),

151 (r'//.*$', Comment.Single),

152 ],

153 'non-comments': [

154 (r'\A#JSGF[^;]*', Comment.Preproc),

155 (r'\s+', Whitespace),

156 (r';', Punctuation),

157 (r'[=|()\[\]*+]', Operator),

158 (r'/[^/]+/', Number.Float),

159 (r'"', String.Double, 'string'),

160 (r'\{', String.Other, 'tag'),

161 (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),

162 (r'grammar\b', Keyword.Reserved, 'grammar name'),

163 (r'(<)(NULL|VOID)(>)',

164 bygroups(Punctuation, Name.Builtin, Punctuation)),

165 (r'<', Punctuation, 'rulename'),

166 (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),

167 ],

168 'string': [

169 (r'"', String.Double, '#pop'),

170 (r'\\.', String.Escape),

171 (r'[^\\"]+', String.Double),

172 ],

173 'tag': [

174 (r'\}', String.Other, '#pop'),

175 (r'\\.', String.Escape),

176 (r'[^\\}]+', String.Other),

177 ],

178 'grammar name': [

179 (r';', Punctuation, '#pop'),

180 (r'\s+', Whitespace),

181 (r'\.', Punctuation),

182 (r'[^;\s.]+', Name.Namespace),

183 ],

184 'rulename': [

185 (r'>', Punctuation, '#pop'),

186 (r'\*', Punctuation),

187 (r'\s+', Whitespace),

188 (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),

189 (r'[^.>]+', Name.Constant),

190 ],

191 'documentation comment': [

192 (r'\*/', Comment.Multiline, '#pop'),

193 (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'

194 r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',

195 bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,

196 Whitespace, using(this, state='example'))),

197 (r'(^\s*\*?\s*)(@\S*)',

198 bygroups(Comment.Multiline, Comment.Special)),

199 (r'[^*\n@]+|\w|\W', Comment.Multiline),

200 ],

201 'example': [

202 (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),

203 include('non-comments'),

204 (r'.', Comment.Multiline),

205 ],

206 }

207

208

209class PegLexer(RegexLexer):

210 """

211 This lexer is for Parsing Expression Grammars (PEG).

212

213 Various implementations of PEG have made different decisions

214 regarding the syntax, so let's try to be accommodating:

215

216 * `<-`, `←`, `:`, and `=` are all accepted as rule operators.

217

218 * Both `|` and `/` are choice operators.

219

220 * `^`, `↑`, and `~` are cut operators.

221

222 * A single `a-z` character immediately before a string, or

223 multiple `a-z` characters following a string, are part of the

224 string (e.g., `r"..."` or `"..."ilmsuxa`).

225 """

226

227 name = 'PEG'

228 url = 'https://bford.info/pub/lang/peg.pdf'

229 aliases = ['peg']

230 filenames = ['*.peg']

231 mimetypes = ['text/x-peg']

232 version_added = '2.6'

233

234 tokens = {

235 'root': [

236 # Comments

237 (r'#.*$', Comment.Single),

238

239 # All operators

240 (r'<-|[←:=/|&!?*+^↑~]', Operator),

241

242 # Other punctuation

243 (r'[()]', Punctuation),

244

245 # Keywords

246 (r'\.', Keyword),

247

248 # Character classes

249 (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',

250 bygroups(Punctuation, String, Punctuation)),

251

252 # Single and double quoted strings (with optional modifiers)

253 (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),

254 (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),

255

256 # Nonterminals are not whitespace, operators, or punctuation

257 (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),

258

259 # Fallback

260 (r'.', Text),

261 ],

262 }

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/grammar_notation.py: 100%

37 statements