Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/rdf.py: 96%

80 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.rdf 

3 ~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for semantic web and RDF query languages and markup. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import RegexLexer, bygroups, default 

14from pygments.token import Keyword, Punctuation, String, Number, Operator, \ 

15 Generic, Whitespace, Name, Literal, Comment, Text 

16 

17__all__ = ['SparqlLexer', 'TurtleLexer', 'ShExCLexer'] 

18 

19 

20class SparqlLexer(RegexLexer): 

21 """ 

22 Lexer for `SPARQL <https://www.w3.org/TR/sparql11-query/>`_ query language. 

23 

24 .. versionadded:: 2.0 

25 """ 

26 name = 'SPARQL' 

27 aliases = ['sparql'] 

28 filenames = ['*.rq', '*.sparql'] 

29 mimetypes = ['application/sparql-query'] 

30 

31 # character group definitions :: 

32 

33 PN_CHARS_BASE_GRP = ('a-zA-Z' 

34 '\u00c0-\u00d6' 

35 '\u00d8-\u00f6' 

36 '\u00f8-\u02ff' 

37 '\u0370-\u037d' 

38 '\u037f-\u1fff' 

39 '\u200c-\u200d' 

40 '\u2070-\u218f' 

41 '\u2c00-\u2fef' 

42 '\u3001-\ud7ff' 

43 '\uf900-\ufdcf' 

44 '\ufdf0-\ufffd') 

45 

46 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') 

47 

48 PN_CHARS_GRP = (PN_CHARS_U_GRP + 

49 r'\-' + 

50 r'0-9' + 

51 '\u00b7' + 

52 '\u0300-\u036f' + 

53 '\u203f-\u2040') 

54 

55 HEX_GRP = '0-9A-Fa-f' 

56 

57 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' 

58 

59 # terminal productions :: 

60 

61 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' 

62 

63 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' 

64 

65 PN_CHARS = '[' + PN_CHARS_GRP + ']' 

66 

67 HEX = '[' + HEX_GRP + ']' 

68 

69 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' 

70 

71 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>' 

72 

73 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ 

74 '.]*' + PN_CHARS + ')?' 

75 

76 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' 

77 

78 VARNAME = '[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \ 

79 '0-9\u00b7\u0300-\u036f\u203f-\u2040]*' 

80 

81 PERCENT = '%' + HEX + HEX 

82 

83 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS 

84 

85 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' 

86 

87 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + 

88 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + 

89 PN_CHARS_GRP + ':]|' + PLX + '))?') 

90 

91 EXPONENT = r'[eE][+-]?\d+' 

92 

93 # Lexer token definitions :: 

94 

95 tokens = { 

96 'root': [ 

97 (r'\s+', Text), 

98 # keywords :: 

99 (r'(?i)(select|construct|describe|ask|where|filter|group\s+by|minus|' 

100 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|' 

101 r'offset|values|bindings|load|into|clear|drop|create|add|move|copy|' 

102 r'insert\s+data|delete\s+data|delete\s+where|with|delete|insert|' 

103 r'using\s+named|using|graph|default|named|all|optional|service|' 

104 r'silent|bind|undef|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword), 

105 (r'(a)\b', Keyword), 

106 # IRIs :: 

107 ('(' + IRIREF + ')', Name.Label), 

108 # blank nodes :: 

109 ('(' + BLANK_NODE_LABEL + ')', Name.Label), 

110 # # variables :: 

111 ('[?$]' + VARNAME, Name.Variable), 

112 # prefixed names :: 

113 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?', 

114 bygroups(Name.Namespace, Punctuation, Name.Tag)), 

115 # function names :: 

116 (r'(?i)(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|' 

117 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|' 

118 r'contains|strstarts|strends|strbefore|strafter|year|month|day|' 

119 r'hours|minutes|seconds|timezone|tz|now|uuid|struuid|md5|sha1|sha256|sha384|' 

120 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|' 

121 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|' 

122 r'count|sum|min|max|avg|sample|group_concat|separator)\b', 

123 Name.Function), 

124 # boolean literals :: 

125 (r'(true|false)', Keyword.Constant), 

126 # double literals :: 

127 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float), 

128 # decimal literals :: 

129 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), 

130 # integer literals :: 

131 (r'[+\-]?\d+', Number.Integer), 

132 # operators :: 

133 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator), 

134 # punctuation characters :: 

135 (r'[(){}.;,:^\[\]]', Punctuation), 

136 # line comments :: 

137 (r'#[^\n]*', Comment), 

138 # strings :: 

139 (r'"""', String, 'triple-double-quoted-string'), 

140 (r'"', String, 'single-double-quoted-string'), 

141 (r"'''", String, 'triple-single-quoted-string'), 

142 (r"'", String, 'single-single-quoted-string'), 

143 ], 

144 'triple-double-quoted-string': [ 

145 (r'"""', String, 'end-of-string'), 

146 (r'[^\\]+', String), 

147 (r'\\', String, 'string-escape'), 

148 ], 

149 'single-double-quoted-string': [ 

150 (r'"', String, 'end-of-string'), 

151 (r'[^"\\\n]+', String), 

152 (r'\\', String, 'string-escape'), 

153 ], 

154 'triple-single-quoted-string': [ 

155 (r"'''", String, 'end-of-string'), 

156 (r'[^\\]+', String), 

157 (r'\\', String.Escape, 'string-escape'), 

158 ], 

159 'single-single-quoted-string': [ 

160 (r"'", String, 'end-of-string'), 

161 (r"[^'\\\n]+", String), 

162 (r'\\', String, 'string-escape'), 

163 ], 

164 'string-escape': [ 

165 (r'u' + HEX + '{4}', String.Escape, '#pop'), 

166 (r'U' + HEX + '{8}', String.Escape, '#pop'), 

167 (r'.', String.Escape, '#pop'), 

168 ], 

169 'end-of-string': [ 

170 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', 

171 bygroups(Operator, Name.Function), '#pop:2'), 

172 (r'\^\^', Operator, '#pop:2'), 

173 default('#pop:2'), 

174 ], 

175 } 

176 

177 

178class TurtleLexer(RegexLexer): 

179 """ 

180 Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language. 

181 

182 .. versionadded:: 2.1 

183 """ 

184 name = 'Turtle' 

185 aliases = ['turtle'] 

186 filenames = ['*.ttl'] 

187 mimetypes = ['text/turtle', 'application/x-turtle'] 

188 

189 # character group definitions :: 

190 PN_CHARS_BASE_GRP = ('a-zA-Z' 

191 '\u00c0-\u00d6' 

192 '\u00d8-\u00f6' 

193 '\u00f8-\u02ff' 

194 '\u0370-\u037d' 

195 '\u037f-\u1fff' 

196 '\u200c-\u200d' 

197 '\u2070-\u218f' 

198 '\u2c00-\u2fef' 

199 '\u3001-\ud7ff' 

200 '\uf900-\ufdcf' 

201 '\ufdf0-\ufffd') 

202 

203 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') 

204 

205 PN_CHARS_GRP = (PN_CHARS_U_GRP + 

206 r'\-' + 

207 r'0-9' + 

208 '\u00b7' + 

209 '\u0300-\u036f' + 

210 '\u203f-\u2040') 

211 

212 PN_CHARS = '[' + PN_CHARS_GRP + ']' 

213 

214 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' 

215 

216 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' 

217 

218 HEX_GRP = '0-9A-Fa-f' 

219 

220 HEX = '[' + HEX_GRP + ']' 

221 

222 PERCENT = '%' + HEX + HEX 

223 

224 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' 

225 

226 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' 

227 

228 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS 

229 

230 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' 

231 

232 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + 

233 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + 

234 PN_CHARS_GRP + ':]|' + PLX + '))?') 

235 

236 patterns = { 

237 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range 

238 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' 

239 } 

240 

241 tokens = { 

242 'root': [ 

243 (r'\s+', Text), 

244 

245 # Base / prefix 

246 (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, 

247 bygroups(Keyword, Whitespace, Name.Variable, Whitespace, 

248 Punctuation)), 

249 (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, 

250 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, 

251 Name.Variable, Whitespace, Punctuation)), 

252 

253 # The shorthand predicate 'a' 

254 (r'(?<=\s)a(?=\s)', Keyword.Type), 

255 

256 # IRIREF 

257 (r'%(IRIREF)s' % patterns, Name.Variable), 

258 

259 # PrefixedName 

260 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?', 

261 bygroups(Name.Namespace, Punctuation, Name.Tag)), 

262 

263 # Comment 

264 (r'#[^\n]+', Comment), 

265 

266 (r'\b(true|false)\b', Literal), 

267 (r'[+\-]?\d*\.\d+', Number.Float), 

268 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float), 

269 (r'[+\-]?\d+', Number.Integer), 

270 (r'[\[\](){}.;,:^]', Punctuation), 

271 

272 (r'"""', String, 'triple-double-quoted-string'), 

273 (r'"', String, 'single-double-quoted-string'), 

274 (r"'''", String, 'triple-single-quoted-string'), 

275 (r"'", String, 'single-single-quoted-string'), 

276 ], 

277 'triple-double-quoted-string': [ 

278 (r'"""', String, 'end-of-string'), 

279 (r'[^\\]+', String), 

280 (r'\\', String, 'string-escape'), 

281 ], 

282 'single-double-quoted-string': [ 

283 (r'"', String, 'end-of-string'), 

284 (r'[^"\\\n]+', String), 

285 (r'\\', String, 'string-escape'), 

286 ], 

287 'triple-single-quoted-string': [ 

288 (r"'''", String, 'end-of-string'), 

289 (r'[^\\]+', String), 

290 (r'\\', String, 'string-escape'), 

291 ], 

292 'single-single-quoted-string': [ 

293 (r"'", String, 'end-of-string'), 

294 (r"[^'\\\n]+", String), 

295 (r'\\', String, 'string-escape'), 

296 ], 

297 'string-escape': [ 

298 (r'.', String, '#pop'), 

299 ], 

300 'end-of-string': [ 

301 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', 

302 bygroups(Operator, Generic.Emph), '#pop:2'), 

303 

304 (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), 

305 

306 default('#pop:2'), 

307 

308 ], 

309 } 

310 

311 # Turtle and Tera Term macro files share the same file extension 

312 # but each has a recognizable and distinct syntax. 

313 def analyse_text(text): 

314 for t in ('@base ', 'BASE ', '@prefix ', 'PREFIX '): 

315 if re.search(r'^\s*%s' % t, text): 

316 return 0.80 

317 

318 

319class ShExCLexer(RegexLexer): 

320 """ 

321 Lexer for `ShExC <https://shex.io/shex-semantics/#shexc>`_ shape expressions language syntax. 

322 """ 

323 name = 'ShExC' 

324 aliases = ['shexc', 'shex'] 

325 filenames = ['*.shex'] 

326 mimetypes = ['text/shex'] 

327 

328 # character group definitions :: 

329 

330 PN_CHARS_BASE_GRP = ('a-zA-Z' 

331 '\u00c0-\u00d6' 

332 '\u00d8-\u00f6' 

333 '\u00f8-\u02ff' 

334 '\u0370-\u037d' 

335 '\u037f-\u1fff' 

336 '\u200c-\u200d' 

337 '\u2070-\u218f' 

338 '\u2c00-\u2fef' 

339 '\u3001-\ud7ff' 

340 '\uf900-\ufdcf' 

341 '\ufdf0-\ufffd') 

342 

343 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') 

344 

345 PN_CHARS_GRP = (PN_CHARS_U_GRP + 

346 r'\-' + 

347 r'0-9' + 

348 '\u00b7' + 

349 '\u0300-\u036f' + 

350 '\u203f-\u2040') 

351 

352 HEX_GRP = '0-9A-Fa-f' 

353 

354 PN_LOCAL_ESC_CHARS_GRP = r"_~.\-!$&'()*+,;=/?#@%" 

355 

356 # terminal productions :: 

357 

358 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' 

359 

360 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']' 

361 

362 PN_CHARS = '[' + PN_CHARS_GRP + ']' 

363 

364 HEX = '[' + HEX_GRP + ']' 

365 

366 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' 

367 

368 UCHAR_NO_BACKSLASH = '(?:u' + HEX + '{4}|U' + HEX + '{8})' 

369 

370 UCHAR = r'\\' + UCHAR_NO_BACKSLASH 

371 

372 IRIREF = r'<(?:[^\x00-\x20<>"{}|^`\\]|' + UCHAR + ')*>' 

373 

374 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \ 

375 '.]*' + PN_CHARS + ')?' 

376 

377 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' 

378 

379 PERCENT = '%' + HEX + HEX 

380 

381 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS 

382 

383 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' 

384 

385 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + 

386 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + 

387 PN_CHARS_GRP + ':]|' + PLX + '))?') 

388 

389 EXPONENT = r'[eE][+-]?\d+' 

390 

391 # Lexer token definitions :: 

392 

393 tokens = { 

394 'root': [ 

395 (r'\s+', Text), 

396 # keywords :: 

397 (r'(?i)(base|prefix|start|external|' 

398 r'literal|iri|bnode|nonliteral|length|minlength|maxlength|' 

399 r'mininclusive|minexclusive|maxinclusive|maxexclusive|' 

400 r'totaldigits|fractiondigits|' 

401 r'closed|extra)\b', Keyword), 

402 (r'(a)\b', Keyword), 

403 # IRIs :: 

404 ('(' + IRIREF + ')', Name.Label), 

405 # blank nodes :: 

406 ('(' + BLANK_NODE_LABEL + ')', Name.Label), 

407 # prefixed names :: 

408 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + ')?', 

409 bygroups(Name.Namespace, Punctuation, Name.Tag)), 

410 # boolean literals :: 

411 (r'(true|false)', Keyword.Constant), 

412 # double literals :: 

413 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float), 

414 # decimal literals :: 

415 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float), 

416 # integer literals :: 

417 (r'[+\-]?\d+', Number.Integer), 

418 # operators :: 

419 (r'[@|$&=*+?^\-~]', Operator), 

420 # operator keywords :: 

421 (r'(?i)(and|or|not)\b', Operator.Word), 

422 # punctuation characters :: 

423 (r'[(){}.;,:^\[\]]', Punctuation), 

424 # line comments :: 

425 (r'#[^\n]*', Comment), 

426 # strings :: 

427 (r'"""', String, 'triple-double-quoted-string'), 

428 (r'"', String, 'single-double-quoted-string'), 

429 (r"'''", String, 'triple-single-quoted-string'), 

430 (r"'", String, 'single-single-quoted-string'), 

431 ], 

432 'triple-double-quoted-string': [ 

433 (r'"""', String, 'end-of-string'), 

434 (r'[^\\]+', String), 

435 (r'\\', String, 'string-escape'), 

436 ], 

437 'single-double-quoted-string': [ 

438 (r'"', String, 'end-of-string'), 

439 (r'[^"\\\n]+', String), 

440 (r'\\', String, 'string-escape'), 

441 ], 

442 'triple-single-quoted-string': [ 

443 (r"'''", String, 'end-of-string'), 

444 (r'[^\\]+', String), 

445 (r'\\', String.Escape, 'string-escape'), 

446 ], 

447 'single-single-quoted-string': [ 

448 (r"'", String, 'end-of-string'), 

449 (r"[^'\\\n]+", String), 

450 (r'\\', String, 'string-escape'), 

451 ], 

452 'string-escape': [ 

453 (UCHAR_NO_BACKSLASH, String.Escape, '#pop'), 

454 (r'.', String.Escape, '#pop'), 

455 ], 

456 'end-of-string': [ 

457 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', 

458 bygroups(Operator, Name.Function), '#pop:2'), 

459 (r'\^\^', Operator, '#pop:2'), 

460 default('#pop:2'), 

461 ], 

462 }