Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/grammar_notation.py: 100%

31 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.grammar_notation 

3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for grammar notations like BNF. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11from pygments.lexer import RegexLexer, bygroups, include, this, using, words 

12from pygments.token import Comment, Keyword, Literal, Name, Number, \ 

13 Operator, Punctuation, String, Text, Whitespace 

14 

15__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer'] 

16 

17 

18class BnfLexer(RegexLexer): 

19 """ 

20 This lexer is for grammar notations which are similar to 

21 original BNF. 

22 

23 In order to maximize a number of targets of this lexer, 

24 let's decide some designs: 

25 

26 * We don't distinguish `Terminal Symbol`. 

27 

28 * We do assume that `NonTerminal Symbol` are always enclosed 

29 with arrow brackets. 

30 

31 * We do assume that `NonTerminal Symbol` may include 

32 any printable characters except arrow brackets and ASCII 0x20. 

33 This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_. 

34 

35 * We do assume that target notation doesn't support comment. 

36 

37 * We don't distinguish any operators and punctuation except 

38 `::=`. 

39 

40 Though these decision making might cause too minimal highlighting 

41 and you might be disappointed, but it is reasonable for us. 

42 

43 .. versionadded:: 2.1 

44 """ 

45 

46 name = 'BNF' 

47 aliases = ['bnf'] 

48 filenames = ['*.bnf'] 

49 mimetypes = ['text/x-bnf'] 

50 

51 tokens = { 

52 'root': [ 

53 (r'(<)([ -;=?-~]+)(>)', 

54 bygroups(Punctuation, Name.Class, Punctuation)), 

55 

56 # an only operator 

57 (r'::=', Operator), 

58 

59 # fallback 

60 (r'[^<>:]+', Text), # for performance 

61 (r'.', Text), 

62 ], 

63 } 

64 

65 

66class AbnfLexer(RegexLexer): 

67 """ 

68 Lexer for IETF 7405 ABNF. 

69 

70 (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars. 

71 

72 .. versionadded:: 2.1 

73 """ 

74 

75 name = 'ABNF' 

76 url = 'http://www.ietf.org/rfc/rfc7405.txt' 

77 aliases = ['abnf'] 

78 filenames = ['*.abnf'] 

79 mimetypes = ['text/x-abnf'] 

80 

81 _core_rules = ( 

82 'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT', 

83 'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET', 

84 'SP', 'VCHAR', 'WSP') 

85 

86 tokens = { 

87 'root': [ 

88 # comment 

89 (r';.*$', Comment.Single), 

90 

91 # quoted 

92 # double quote itself in this state, it is as '%x22'. 

93 (r'(%[si])?"[^"]*"', Literal), 

94 

95 # binary (but i have never seen...) 

96 (r'%b[01]+\-[01]+\b', Literal), # range 

97 (r'%b[01]+(\.[01]+)*\b', Literal), # concat 

98 

99 # decimal 

100 (r'%d[0-9]+\-[0-9]+\b', Literal), # range 

101 (r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat 

102 

103 # hexadecimal 

104 (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range 

105 (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat 

106 

107 # repetition (<a>*<b>element) including nRule 

108 (r'\b[0-9]+\*[0-9]+', Operator), 

109 (r'\b[0-9]+\*', Operator), 

110 (r'\b[0-9]+', Operator), 

111 (r'\*', Operator), 

112 

113 # Strictly speaking, these are not keyword but 

114 # are called `Core Rule'. 

115 (words(_core_rules, suffix=r'\b'), Keyword), 

116 

117 # nonterminals (ALPHA *(ALPHA / DIGIT / "-")) 

118 (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class), 

119 

120 # operators 

121 (r'(=/|=|/)', Operator), 

122 

123 # punctuation 

124 (r'[\[\]()]', Punctuation), 

125 

126 # fallback 

127 (r'\s+', Whitespace), 

128 (r'.', Text), 

129 ], 

130 } 

131 

132 

133class JsgfLexer(RegexLexer): 

134 """ 

135 For JSpeech Grammar Format grammars. 

136 

137 .. versionadded:: 2.2 

138 """ 

139 name = 'JSGF' 

140 url = 'https://www.w3.org/TR/jsgf/' 

141 aliases = ['jsgf'] 

142 filenames = ['*.jsgf'] 

143 mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf'] 

144 

145 tokens = { 

146 'root': [ 

147 include('comments'), 

148 include('non-comments'), 

149 ], 

150 'comments': [ 

151 (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'), 

152 (r'/\*[\w\W]*?\*/', Comment.Multiline), 

153 (r'//.*$', Comment.Single), 

154 ], 

155 'non-comments': [ 

156 (r'\A#JSGF[^;]*', Comment.Preproc), 

157 (r'\s+', Whitespace), 

158 (r';', Punctuation), 

159 (r'[=|()\[\]*+]', Operator), 

160 (r'/[^/]+/', Number.Float), 

161 (r'"', String.Double, 'string'), 

162 (r'\{', String.Other, 'tag'), 

163 (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved), 

164 (r'grammar\b', Keyword.Reserved, 'grammar name'), 

165 (r'(<)(NULL|VOID)(>)', 

166 bygroups(Punctuation, Name.Builtin, Punctuation)), 

167 (r'<', Punctuation, 'rulename'), 

168 (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text), 

169 ], 

170 'string': [ 

171 (r'"', String.Double, '#pop'), 

172 (r'\\.', String.Escape), 

173 (r'[^\\"]+', String.Double), 

174 ], 

175 'tag': [ 

176 (r'\}', String.Other, '#pop'), 

177 (r'\\.', String.Escape), 

178 (r'[^\\}]+', String.Other), 

179 ], 

180 'grammar name': [ 

181 (r';', Punctuation, '#pop'), 

182 (r'\s+', Whitespace), 

183 (r'\.', Punctuation), 

184 (r'[^;\s.]+', Name.Namespace), 

185 ], 

186 'rulename': [ 

187 (r'>', Punctuation, '#pop'), 

188 (r'\*', Punctuation), 

189 (r'\s+', Whitespace), 

190 (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)), 

191 (r'[^.>]+', Name.Constant), 

192 ], 

193 'documentation comment': [ 

194 (r'\*/', Comment.Multiline, '#pop'), 

195 (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)' 

196 r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))', 

197 bygroups(Whitespace, Comment.Multiline, Whitespace, Comment.Special, 

198 Whitespace, using(this, state='example'))), 

199 (r'(^\s*\*?\s*)(@\S*)', 

200 bygroups(Comment.Multiline, Comment.Special)), 

201 (r'[^*\n@]+|\w|\W', Comment.Multiline), 

202 ], 

203 'example': [ 

204 (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)), 

205 include('non-comments'), 

206 (r'.', Comment.Multiline), 

207 ], 

208 } 

209 

210 

211class PegLexer(RegexLexer): 

212 """ 

213 This lexer is for Parsing Expression Grammars (PEG). 

214 

215 Various implementations of PEG have made different decisions 

216 regarding the syntax, so let's try to be accommodating: 

217 

218 * `<-`, `←`, `:`, and `=` are all accepted as rule operators. 

219 

220 * Both `|` and `/` are choice operators. 

221 

222 * `^`, `↑`, and `~` are cut operators. 

223 

224 * A single `a-z` character immediately before a string, or 

225 multiple `a-z` characters following a string, are part of the 

226 string (e.g., `r"..."` or `"..."ilmsuxa`). 

227 

228 .. versionadded:: 2.6 

229 """ 

230 

231 name = 'PEG' 

232 url = 'https://bford.info/pub/lang/peg.pdf' 

233 aliases = ['peg'] 

234 filenames = ['*.peg'] 

235 mimetypes = ['text/x-peg'] 

236 

237 tokens = { 

238 'root': [ 

239 # Comments 

240 (r'#.*$', Comment.Single), 

241 

242 # All operators 

243 (r'<-|[←:=/|&!?*+^↑~]', Operator), 

244 

245 # Other punctuation 

246 (r'[()]', Punctuation), 

247 

248 # Keywords 

249 (r'\.', Keyword), 

250 

251 # Character classes 

252 (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])', 

253 bygroups(Punctuation, String, Punctuation)), 

254 

255 # Single and double quoted strings (with optional modifiers) 

256 (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double), 

257 (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single), 

258 

259 # Nonterminals are not whitespace, operators, or punctuation 

260 (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class), 

261 

262 # Fallback 

263 (r'.', Text), 

264 ], 

265 }