Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/grammar

1"""

2 pygments.lexers.grammar_notation

3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for grammar notations like BNF.

8 :license: BSD, see LICENSE for details.

9"""

11from pygments.lexer import RegexLexer, bygroups, include, this, using, words

12from pygments.token import Comment, Keyword, Literal, Name, Number, \

13 Operator, Punctuation, String, Text, Whitespace

15__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']

18class BnfLexer(RegexLexer):

19 """

20 This lexer is for grammar notations which are similar to

21 original BNF.

23 In order to maximize a number of targets of this lexer,

24 let's decide some designs:

26 * We don't distinguish `Terminal Symbol`.

28 * We do assume that `NonTerminal Symbol` are always enclosed

29 with arrow brackets.

31 * We do assume that `NonTerminal Symbol` may include

32 any printable characters except arrow brackets and ASCII 0x20.

33 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.

35 * We do assume that target notation doesn't support comment.

37 * We don't distinguish any operators and punctuation except

38 `::=`.

40 Though these decision making might cause too minimal highlighting

41 and you might be disappointed, but it is reasonable for us.

43 .. versionadded:: 2.1

44 """

46 name = 'BNF'

47 aliases = ['bnf']

48 filenames = ['*.bnf']

49 mimetypes = ['text/x-bnf']

51 tokens = {

52 'root': [

53 (r'(<)([ -;=?-~]+)(>)',

54 bygroups(Punctuation, Name.Class, Punctuation)),

56 # an only operator

57 (r'::=', Operator),

59 # fallback

60 (r'[^<>:]+', Text), # for performance

61 (r'.', Text),

62 ],

63 }

66class AbnfLexer(RegexLexer):

67 """

68 Lexer for IETF 7405 ABNF.

70 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.

72 .. versionadded:: 2.1

73 """

75 name = 'ABNF'

76 url = 'http://www.ietf.org/rfc/rfc7405.txt'

77 aliases = ['abnf']

78 filenames = ['*.abnf']

79 mimetypes = ['text/x-abnf']

81 _core_rules = (

82 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',

83 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',

84 'SP', 'VCHAR', 'WSP')

86 tokens = {

87 'root': [

88 # comment

89 (r';.*$', Comment.Single),

91 # quoted

92 # double quote itself in this state, it is as '%x22'.

93 (r'(%[si])?"[^"]*"', Literal),

95 # binary (but i have never seen...)

96 (r'%b[01]+\-[01]+\b', Literal), # range

97 (r'%b[01]+(\.[01]+)*\b', Literal), # concat

99 # decimal

100 (r'%d[0-9]+\-[0-9]+\b', Literal), # range

101 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat

102

103 # hexadecimal

104 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range

105 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat

106

107 # repetition (<a>*<b>element) including nRule

108 (r'\b[0-9]+\*[0-9]+', Operator),

109 (r'\b[0-9]+\*', Operator),

110 (r'\b[0-9]+', Operator),

111 (r'\*', Operator),

112

113 # Strictly speaking, these are not keyword but

114 # are called `Core Rule'.

115 (words(_core_rules, suffix=r'\b'), Keyword),

116

117 # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))

118 (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),

119

120 # operators

121 (r'(=/|=|/)', Operator),

122

123 # punctuation

124 (r'[\[\]()]', Punctuation),

125

126 # fallback

127 (r'\s+', Whitespace),

128 (r'.', Text),

129 ],

130 }

131

132

133class JsgfLexer(RegexLexer):

134 """

135 For JSpeech Grammar Format grammars.

136

137 .. versionadded:: 2.2

138 """

139 name = 'JSGF'

140 url = 'https://www.w3.org/TR/jsgf/'

141 aliases = ['jsgf']

142 filenames = ['*.jsgf']

143 mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

144

145 tokens = {

146 'root': [

147 include('comments'),

148 include('non-comments'),

149 ],

150 'comments': [

151 (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),

152 (r'/\*[\w\W]*?\*/', Comment.Multiline),

153 (r'//.*$', Comment.Single),

154 ],

155 'non-comments': [

156 (r'\A#JSGF[^;]*', Comment.Preproc),

157 (r'\s+', Whitespace),

158 (r';', Punctuation),

159 (r'[=|()\[\]*+]', Operator),

160 (r'/[^/]+/', Number.Float),

161 (r'"', String.Double, 'string'),

162 (r'\{', String.Other, 'tag'),

163 (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),

164 (r'grammar\b', Keyword.Reserved, 'grammar name'),

165 (r'(<)(NULL|VOID)(>)',

166 bygroups(Punctuation, Name.Builtin, Punctuation)),

167 (r'<', Punctuation, 'rulename'),

168 (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),

169 ],

170 'string': [

171 (r'"', String.Double, '#pop'),

172 (r'\\.', String.Escape),

173 (r'[^\\"]+', String.Double),

174 ],

175 'tag': [

176 (r'\}', String.Other, '#pop'),

177 (r'\\.', String.Escape),

178 (r'[^\\}]+', String.Other),

179 ],

180 'grammar name': [

181 (r';', Punctuation, '#pop'),

182 (r'\s+', Whitespace),

183 (r'\.', Punctuation),

184 (r'[^;\s.]+', Name.Namespace),

185 ],

186 'rulename': [

187 (r'>', Punctuation, '#pop'),

188 (r'\*', Punctuation),

189 (r'\s+', Whitespace),

190 (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),

191 (r'[^.>]+', Name.Constant),

192 ],

193 'documentation comment': [

194 (r'\*/', Comment.Multiline, '#pop'),

195 (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'

196 r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',

197 bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special,

198 Whitespace, using(this, state='example'))),

199 (r'(^\s*\*?\s*)(@\S*)',

200 bygroups(Comment.Multiline, Comment.Special)),

201 (r'[^*\n@]+|\w|\W', Comment.Multiline),

202 ],

203 'example': [

204 (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),

205 include('non-comments'),

206 (r'.', Comment.Multiline),

207 ],

208 }

209

210

211class PegLexer(RegexLexer):

212 """

213 This lexer is for Parsing Expression Grammars (PEG).

214

215 Various implementations of PEG have made different decisions

216 regarding the syntax, so let's try to be accommodating:

217

218 * `<-`, `←`, `:`, and `=` are all accepted as rule operators.

219

220 * Both `|` and `/` are choice operators.

221

222 * `^`, `↑`, and `~` are cut operators.

223

224 * A single `a-z` character immediately before a string, or

225 multiple `a-z` characters following a string, are part of the

226 string (e.g., `r"..."` or `"..."ilmsuxa`).

227

228 .. versionadded:: 2.6

229 """

230

231 name = 'PEG'

232 url = 'https://bford.info/pub/lang/peg.pdf'

233 aliases = ['peg']

234 filenames = ['*.peg']

235 mimetypes = ['text/x-peg']

236

237 tokens = {

238 'root': [

239 # Comments

240 (r'#.*$', Comment.Single),

241

242 # All operators

243 (r'<-|[←:=/|&!?*+^↑~]', Operator),

244

245 # Other punctuation

246 (r'[()]', Punctuation),

247

248 # Keywords

249 (r'\.', Keyword),

250

251 # Character classes

252 (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',

253 bygroups(Punctuation, String, Punctuation)),

254

255 # Single and double quoted strings (with optional modifiers)

256 (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),

257 (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),

258

259 # Nonterminals are not whitespace, operators, or punctuation

260 (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),

261

262 # Fallback

263 (r'.', Text),

264 ],

265 }

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/grammar_notation.py: 100%

31 statements