1"""
2 pygments.lexers.rdf
3 ~~~~~~~~~~~~~~~~~~~
4
5 Lexers for semantic web and RDF query languages and markup.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import RegexLexer, bygroups, default
14from pygments.token import Keyword, Punctuation, String, Number, Operator, \
15 Generic, Whitespace, Name, Literal, Comment, Text
16
17__all__ = ['SparqlLexer', 'TurtleLexer', 'ShExCLexer']
18
19
20class SparqlLexer(RegexLexer):
21 """
22 Lexer for SPARQL query language.
23 """
24 name = 'SPARQL'
25 aliases = ['sparql']
26 filenames = ['*.rq', '*.sparql']
27 mimetypes = ['application/sparql-query']
28 url = 'https://www.w3.org/TR/sparql11-query'
29 version_added = '2.0'
30
31 # character group definitions ::
32
33 PN_CHARS_BASE_GRP = ('a-zA-Z'
34 '\u00c0-\u00d6'
35 '\u00d8-\u00f6'
36 '\u00f8-\u02ff'
37 '\u0370-\u037d'
38 '\u037f-\u1fff'
39 '\u200c-\u200d'
40 '\u2070-\u218f'
41 '\u2c00-\u2fef'
42 '\u3001-\ud7ff'
43 '\uf900-\ufdcf'
44 '\ufdf0-\ufffd')
45
46 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
47
48 PN_CHARS_GRP = (PN_CHARS_U_GRP +
49 r'\-' +
50 r'0-9' +
51 '\u00b7' +
52 '\u0300-\u036f' +
53 '\u203f-\u2040')
54
55 HEX_GRP = '0-9A-Fa-f'
56
57 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
58
59 # terminal productions ::
60
61 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
62
63 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
64
65 PN_CHARS = '[' + PN_CHARS_GRP + ']'
66
67 HEX = '[' + HEX_GRP + ']'
68
69 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
70
71 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>'
72
73 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
74 '.]*' + PN_CHARS + ')?'
75
76 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
77
78 VARNAME = '[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \
79 '0-9\u00b7\u0300-\u036f\u203f-\u2040]*'
80
81 PERCENT = '%' + HEX + HEX
82
83 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
84
85 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
86
87 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
88 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
89 PN_CHARS_GRP + ':]|' + PLX + '))?')
90
91 EXPONENT = r'[eE][+-]?\d+'
92
93 # Lexer token definitions ::
94
95 tokens = {
96 'root': [
97 (r'\s+', Text),
98 # keywords ::
99 (r'(?i)(select|construct|describe|ask|where|filter|group\s+by|minus|'
100 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|'
101 r'offset|values|bindings|load|into|clear|drop|create|add|move|copy|'
102 r'insert\s+data|delete\s+data|delete\s+where|with|delete|insert|'
103 r'using\s+named|using|graph|default|named|all|optional|service|'
104 r'silent|bind|undef|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword),
105 (r'(a)\b', Keyword),
106 # IRIs ::
107 ('(' + IRIREF + ')', Name.Label),
108 # blank nodes ::
109 ('(' + BLANK_NODE_LABEL + ')', Name.Label),
110 # # variables ::
111 ('[?$]' + VARNAME, Name.Variable),
112 # prefixed names ::
113 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
114 bygroups(Name.Namespace, Punctuation, Name.Tag)),
115 # function names ::
116 (r'(?i)(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|'
117 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|'
118 r'contains|strstarts|strends|strbefore|strafter|year|month|day|'
119 r'hours|minutes|seconds|timezone|tz|now|uuid|struuid|md5|sha1|sha256|sha384|'
120 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|'
121 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|'
122 r'count|sum|min|max|avg|sample|group_concat|separator)\b',
123 Name.Function),
124 # boolean literals ::
125 (r'(true|false)', Keyword.Constant),
126 # double literals ::
127 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float),
128 # decimal literals ::
129 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
130 # integer literals ::
131 (r'[+\-]?\d+', Number.Integer),
132 # operators ::
133 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator),
134 # punctuation characters ::
135 (r'[(){}.;,:^\[\]]', Punctuation),
136 # line comments ::
137 (r'#[^\n]*', Comment),
138 # strings ::
139 (r'"""', String, 'triple-double-quoted-string'),
140 (r'"', String, 'single-double-quoted-string'),
141 (r"'''", String, 'triple-single-quoted-string'),
142 (r"'", String, 'single-single-quoted-string'),
143 ],
144 'triple-double-quoted-string': [
145 (r'"""', String, 'end-of-string'),
146 (r'[^\\]+', String),
147 (r'\\', String, 'string-escape'),
148 ],
149 'single-double-quoted-string': [
150 (r'"', String, 'end-of-string'),
151 (r'[^"\\\n]+', String),
152 (r'\\', String, 'string-escape'),
153 ],
154 'triple-single-quoted-string': [
155 (r"'''", String, 'end-of-string'),
156 (r'[^\\]+', String),
157 (r'\\', String.Escape, 'string-escape'),
158 ],
159 'single-single-quoted-string': [
160 (r"'", String, 'end-of-string'),
161 (r"[^'\\\n]+", String),
162 (r'\\', String, 'string-escape'),
163 ],
164 'string-escape': [
165 (r'u' + HEX + '{4}', String.Escape, '#pop'),
166 (r'U' + HEX + '{8}', String.Escape, '#pop'),
167 (r'.', String.Escape, '#pop'),
168 ],
169 'end-of-string': [
170 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
171 bygroups(Operator, Name.Function), '#pop:2'),
172 (r'\^\^', Operator, '#pop:2'),
173 default('#pop:2'),
174 ],
175 }
176
177
178class TurtleLexer(RegexLexer):
179 """
180 Lexer for Turtle data language.
181 """
182 name = 'Turtle'
183 aliases = ['turtle']
184 filenames = ['*.ttl']
185 mimetypes = ['text/turtle', 'application/x-turtle']
186 url = 'https://www.w3.org/TR/turtle'
187 version_added = '2.1'
188
189 # character group definitions ::
190 PN_CHARS_BASE_GRP = ('a-zA-Z'
191 '\u00c0-\u00d6'
192 '\u00d8-\u00f6'
193 '\u00f8-\u02ff'
194 '\u0370-\u037d'
195 '\u037f-\u1fff'
196 '\u200c-\u200d'
197 '\u2070-\u218f'
198 '\u2c00-\u2fef'
199 '\u3001-\ud7ff'
200 '\uf900-\ufdcf'
201 '\ufdf0-\ufffd')
202
203 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
204
205 PN_CHARS_GRP = (PN_CHARS_U_GRP +
206 r'\-' +
207 r'0-9' +
208 '\u00b7' +
209 '\u0300-\u036f' +
210 '\u203f-\u2040')
211
212 PN_CHARS = '[' + PN_CHARS_GRP + ']'
213
214 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
215
216 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
217
218 HEX_GRP = '0-9A-Fa-f'
219
220 HEX = '[' + HEX_GRP + ']'
221
222 PERCENT = '%' + HEX + HEX
223
224 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
225
226 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
227
228 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
229
230 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
231
232 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
233 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
234 PN_CHARS_GRP + ':]|' + PLX + '))?')
235
236 patterns = {
237 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
238 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
239 }
240
241 tokens = {
242 'root': [
243 (r'\s+', Text),
244
245 # Base / prefix
246 (r'(@base|BASE)(\s+){IRIREF}(\s*)(\.?)'.format(**patterns),
247 bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
248 Punctuation)),
249 (r'(@prefix|PREFIX)(\s+){PNAME_NS}(\s+){IRIREF}(\s*)(\.?)'.format(**patterns),
250 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace,
251 Name.Variable, Whitespace, Punctuation)),
252
253 # The shorthand predicate 'a'
254 (r'(?<=\s)a(?=\s)', Keyword.Type),
255
256 # IRIREF
257 (r'{IRIREF}'.format(**patterns), Name.Variable),
258
259 # PrefixedName
260 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
261 bygroups(Name.Namespace, Punctuation, Name.Tag)),
262
263 # BlankNodeLabel
264 (r'(_)(:)([' + PN_CHARS_U_GRP + r'0-9]([' + PN_CHARS_GRP + r'.]*' + PN_CHARS + ')?)',
265 bygroups(Name.Namespace, Punctuation, Name.Tag)),
266
267 # Comment
268 (r'#[^\n]+', Comment),
269
270 (r'\b(true|false)\b', Literal),
271 (r'[+\-]?\d*\.\d+', Number.Float),
272 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float),
273 (r'[+\-]?\d+', Number.Integer),
274 (r'[\[\](){}.;,:^]', Punctuation),
275
276 (r'"""', String, 'triple-double-quoted-string'),
277 (r'"', String, 'single-double-quoted-string'),
278 (r"'''", String, 'triple-single-quoted-string'),
279 (r"'", String, 'single-single-quoted-string'),
280 ],
281 'triple-double-quoted-string': [
282 (r'"""', String, 'end-of-string'),
283 (r'[^\\]+(?=""")', String),
284 (r'\\', String, 'string-escape'),
285 ],
286 'single-double-quoted-string': [
287 (r'"', String, 'end-of-string'),
288 (r'[^"\\\n]+', String),
289 (r'\\', String, 'string-escape'),
290 ],
291 'triple-single-quoted-string': [
292 (r"'''", String, 'end-of-string'),
293 (r"[^\\]+(?=''')", String),
294 (r'\\', String, 'string-escape'),
295 ],
296 'single-single-quoted-string': [
297 (r"'", String, 'end-of-string'),
298 (r"[^'\\\n]+", String),
299 (r'\\', String, 'string-escape'),
300 ],
301 'string-escape': [
302 (r'.', String, '#pop'),
303 ],
304 'end-of-string': [
305 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
306 bygroups(Operator, Generic.Emph), '#pop:2'),
307
308 (r'(\^\^){IRIREF}'.format(**patterns), bygroups(Operator, Generic.Emph), '#pop:2'),
309
310 default('#pop:2'),
311
312 ],
313 }
314
315 # Turtle and Tera Term macro files share the same file extension
316 # but each has a recognizable and distinct syntax.
317 def analyse_text(text):
318 for t in ('@base ', 'BASE ', '@prefix ', 'PREFIX '):
319 if re.search(rf'^\s*{t}', text):
320 return 0.80
321
322
323class ShExCLexer(RegexLexer):
324 """
325 Lexer for ShExC shape expressions language syntax.
326 """
327 name = 'ShExC'
328 aliases = ['shexc', 'shex']
329 filenames = ['*.shex']
330 mimetypes = ['text/shex']
331 url = 'https://shex.io/shex-semantics/#shexc'
332 version_added = ''
333
334 # character group definitions ::
335
336 PN_CHARS_BASE_GRP = ('a-zA-Z'
337 '\u00c0-\u00d6'
338 '\u00d8-\u00f6'
339 '\u00f8-\u02ff'
340 '\u0370-\u037d'
341 '\u037f-\u1fff'
342 '\u200c-\u200d'
343 '\u2070-\u218f'
344 '\u2c00-\u2fef'
345 '\u3001-\ud7ff'
346 '\uf900-\ufdcf'
347 '\ufdf0-\ufffd')
348
349 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
350
351 PN_CHARS_GRP = (PN_CHARS_U_GRP +
352 r'\-' +
353 r'0-9' +
354 '\u00b7' +
355 '\u0300-\u036f' +
356 '\u203f-\u2040')
357
358 HEX_GRP = '0-9A-Fa-f'
359
360 PN_LOCAL_ESC_CHARS_GRP = r"_~.\-!$&'()*+,;=/?#@%"
361
362 # terminal productions ::
363
364 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
365
366 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
367
368 PN_CHARS = '[' + PN_CHARS_GRP + ']'
369
370 HEX = '[' + HEX_GRP + ']'
371
372 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
373
374 UCHAR_NO_BACKSLASH = '(?:u' + HEX + '{4}|U' + HEX + '{8})'
375
376 UCHAR = r'\\' + UCHAR_NO_BACKSLASH
377
378 IRIREF = r'<(?:[^\x00-\x20<>"{}|^`\\]|' + UCHAR + ')*>'
379
380 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
381 '.]*' + PN_CHARS + ')?'
382
383 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
384
385 PERCENT = '%' + HEX + HEX
386
387 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
388
389 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
390
391 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
392 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
393 PN_CHARS_GRP + ':]|' + PLX + '))?')
394
395 EXPONENT = r'[eE][+-]?\d+'
396
397 # Lexer token definitions ::
398
399 tokens = {
400 'root': [
401 (r'\s+', Text),
402 # keywords ::
403 (r'(?i)(base|prefix|start|external|'
404 r'literal|iri|bnode|nonliteral|length|minlength|maxlength|'
405 r'mininclusive|minexclusive|maxinclusive|maxexclusive|'
406 r'totaldigits|fractiondigits|'
407 r'closed|extra)\b', Keyword),
408 (r'(a)\b', Keyword),
409 # IRIs ::
410 ('(' + IRIREF + ')', Name.Label),
411 # blank nodes ::
412 ('(' + BLANK_NODE_LABEL + ')', Name.Label),
413 # prefixed names ::
414 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + ')?',
415 bygroups(Name.Namespace, Punctuation, Name.Tag)),
416 # boolean literals ::
417 (r'(true|false)', Keyword.Constant),
418 # double literals ::
419 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float),
420 # decimal literals ::
421 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
422 # integer literals ::
423 (r'[+\-]?\d+', Number.Integer),
424 # operators ::
425 (r'[@|$&=*+?^\-~]', Operator),
426 # operator keywords ::
427 (r'(?i)(and|or|not)\b', Operator.Word),
428 # punctuation characters ::
429 (r'[(){}.;,:^\[\]]', Punctuation),
430 # line comments ::
431 (r'#[^\n]*', Comment),
432 # strings ::
433 (r'"""', String, 'triple-double-quoted-string'),
434 (r'"', String, 'single-double-quoted-string'),
435 (r"'''", String, 'triple-single-quoted-string'),
436 (r"'", String, 'single-single-quoted-string'),
437 ],
438 'triple-double-quoted-string': [
439 (r'"""', String, 'end-of-string'),
440 (r'[^\\]+', String),
441 (r'\\', String, 'string-escape'),
442 ],
443 'single-double-quoted-string': [
444 (r'"', String, 'end-of-string'),
445 (r'[^"\\\n]+', String),
446 (r'\\', String, 'string-escape'),
447 ],
448 'triple-single-quoted-string': [
449 (r"'''", String, 'end-of-string'),
450 (r'[^\\]+', String),
451 (r'\\', String.Escape, 'string-escape'),
452 ],
453 'single-single-quoted-string': [
454 (r"'", String, 'end-of-string'),
455 (r"[^'\\\n]+", String),
456 (r'\\', String, 'string-escape'),
457 ],
458 'string-escape': [
459 (UCHAR_NO_BACKSLASH, String.Escape, '#pop'),
460 (r'.', String.Escape, '#pop'),
461 ],
462 'end-of-string': [
463 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
464 bygroups(Operator, Name.Function), '#pop:2'),
465 (r'\^\^', Operator, '#pop:2'),
466 default('#pop:2'),
467 ],
468 }