Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/rdf.py: 96%
80 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.rdf
3 ~~~~~~~~~~~~~~~~~~~
5 Lexers for semantic web and RDF query languages and markup.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, bygroups, default
14from pygments.token import Keyword, Punctuation, String, Number, Operator, \
15 Generic, Whitespace, Name, Literal, Comment, Text
17__all__ = ['SparqlLexer', 'TurtleLexer', 'ShExCLexer']
20class SparqlLexer(RegexLexer):
21 """
22 Lexer for `SPARQL <https://www.w3.org/TR/sparql11-query/>`_ query language.
24 .. versionadded:: 2.0
25 """
26 name = 'SPARQL'
27 aliases = ['sparql']
28 filenames = ['*.rq', '*.sparql']
29 mimetypes = ['application/sparql-query']
31 # character group definitions ::
33 PN_CHARS_BASE_GRP = ('a-zA-Z'
34 '\u00c0-\u00d6'
35 '\u00d8-\u00f6'
36 '\u00f8-\u02ff'
37 '\u0370-\u037d'
38 '\u037f-\u1fff'
39 '\u200c-\u200d'
40 '\u2070-\u218f'
41 '\u2c00-\u2fef'
42 '\u3001-\ud7ff'
43 '\uf900-\ufdcf'
44 '\ufdf0-\ufffd')
46 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
48 PN_CHARS_GRP = (PN_CHARS_U_GRP +
49 r'\-' +
50 r'0-9' +
51 '\u00b7' +
52 '\u0300-\u036f' +
53 '\u203f-\u2040')
55 HEX_GRP = '0-9A-Fa-f'
57 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
59 # terminal productions ::
61 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
63 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
65 PN_CHARS = '[' + PN_CHARS_GRP + ']'
67 HEX = '[' + HEX_GRP + ']'
69 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
71 IRIREF = r'<(?:[^<>"{}|^`\\\x00-\x20])*>'
73 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
74 '.]*' + PN_CHARS + ')?'
76 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
78 VARNAME = '[0-9' + PN_CHARS_U_GRP + '][' + PN_CHARS_U_GRP + \
79 '0-9\u00b7\u0300-\u036f\u203f-\u2040]*'
81 PERCENT = '%' + HEX + HEX
83 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
85 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
87 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
88 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
89 PN_CHARS_GRP + ':]|' + PLX + '))?')
91 EXPONENT = r'[eE][+-]?\d+'
93 # Lexer token definitions ::
95 tokens = {
96 'root': [
97 (r'\s+', Text),
98 # keywords ::
99 (r'(?i)(select|construct|describe|ask|where|filter|group\s+by|minus|'
100 r'distinct|reduced|from\s+named|from|order\s+by|desc|asc|limit|'
101 r'offset|values|bindings|load|into|clear|drop|create|add|move|copy|'
102 r'insert\s+data|delete\s+data|delete\s+where|with|delete|insert|'
103 r'using\s+named|using|graph|default|named|all|optional|service|'
104 r'silent|bind|undef|union|not\s+in|in|as|having|to|prefix|base)\b', Keyword),
105 (r'(a)\b', Keyword),
106 # IRIs ::
107 ('(' + IRIREF + ')', Name.Label),
108 # blank nodes ::
109 ('(' + BLANK_NODE_LABEL + ')', Name.Label),
110 # # variables ::
111 ('[?$]' + VARNAME, Name.Variable),
112 # prefixed names ::
113 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
114 bygroups(Name.Namespace, Punctuation, Name.Tag)),
115 # function names ::
116 (r'(?i)(str|lang|langmatches|datatype|bound|iri|uri|bnode|rand|abs|'
117 r'ceil|floor|round|concat|strlen|ucase|lcase|encode_for_uri|'
118 r'contains|strstarts|strends|strbefore|strafter|year|month|day|'
119 r'hours|minutes|seconds|timezone|tz|now|uuid|struuid|md5|sha1|sha256|sha384|'
120 r'sha512|coalesce|if|strlang|strdt|sameterm|isiri|isuri|isblank|'
121 r'isliteral|isnumeric|regex|substr|replace|exists|not\s+exists|'
122 r'count|sum|min|max|avg|sample|group_concat|separator)\b',
123 Name.Function),
124 # boolean literals ::
125 (r'(true|false)', Keyword.Constant),
126 # double literals ::
127 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float),
128 # decimal literals ::
129 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
130 # integer literals ::
131 (r'[+\-]?\d+', Number.Integer),
132 # operators ::
133 (r'(\|\||&&|=|\*|\-|\+|/|!=|<=|>=|!|<|>)', Operator),
134 # punctuation characters ::
135 (r'[(){}.;,:^\[\]]', Punctuation),
136 # line comments ::
137 (r'#[^\n]*', Comment),
138 # strings ::
139 (r'"""', String, 'triple-double-quoted-string'),
140 (r'"', String, 'single-double-quoted-string'),
141 (r"'''", String, 'triple-single-quoted-string'),
142 (r"'", String, 'single-single-quoted-string'),
143 ],
144 'triple-double-quoted-string': [
145 (r'"""', String, 'end-of-string'),
146 (r'[^\\]+', String),
147 (r'\\', String, 'string-escape'),
148 ],
149 'single-double-quoted-string': [
150 (r'"', String, 'end-of-string'),
151 (r'[^"\\\n]+', String),
152 (r'\\', String, 'string-escape'),
153 ],
154 'triple-single-quoted-string': [
155 (r"'''", String, 'end-of-string'),
156 (r'[^\\]+', String),
157 (r'\\', String.Escape, 'string-escape'),
158 ],
159 'single-single-quoted-string': [
160 (r"'", String, 'end-of-string'),
161 (r"[^'\\\n]+", String),
162 (r'\\', String, 'string-escape'),
163 ],
164 'string-escape': [
165 (r'u' + HEX + '{4}', String.Escape, '#pop'),
166 (r'U' + HEX + '{8}', String.Escape, '#pop'),
167 (r'.', String.Escape, '#pop'),
168 ],
169 'end-of-string': [
170 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
171 bygroups(Operator, Name.Function), '#pop:2'),
172 (r'\^\^', Operator, '#pop:2'),
173 default('#pop:2'),
174 ],
175 }
178class TurtleLexer(RegexLexer):
179 """
180 Lexer for `Turtle <http://www.w3.org/TR/turtle/>`_ data language.
182 .. versionadded:: 2.1
183 """
184 name = 'Turtle'
185 aliases = ['turtle']
186 filenames = ['*.ttl']
187 mimetypes = ['text/turtle', 'application/x-turtle']
189 # character group definitions ::
190 PN_CHARS_BASE_GRP = ('a-zA-Z'
191 '\u00c0-\u00d6'
192 '\u00d8-\u00f6'
193 '\u00f8-\u02ff'
194 '\u0370-\u037d'
195 '\u037f-\u1fff'
196 '\u200c-\u200d'
197 '\u2070-\u218f'
198 '\u2c00-\u2fef'
199 '\u3001-\ud7ff'
200 '\uf900-\ufdcf'
201 '\ufdf0-\ufffd')
203 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
205 PN_CHARS_GRP = (PN_CHARS_U_GRP +
206 r'\-' +
207 r'0-9' +
208 '\u00b7' +
209 '\u0300-\u036f' +
210 '\u203f-\u2040')
212 PN_CHARS = '[' + PN_CHARS_GRP + ']'
214 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
216 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
218 HEX_GRP = '0-9A-Fa-f'
220 HEX = '[' + HEX_GRP + ']'
222 PERCENT = '%' + HEX + HEX
224 PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
226 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
228 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
230 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
232 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
233 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
234 PN_CHARS_GRP + ':]|' + PLX + '))?')
236 patterns = {
237 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
238 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
239 }
241 tokens = {
242 'root': [
243 (r'\s+', Text),
245 # Base / prefix
246 (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
247 bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
248 Punctuation)),
249 (r'(@prefix|PREFIX)(\s+)%(PNAME_NS)s(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
250 bygroups(Keyword, Whitespace, Name.Namespace, Whitespace,
251 Name.Variable, Whitespace, Punctuation)),
253 # The shorthand predicate 'a'
254 (r'(?<=\s)a(?=\s)', Keyword.Type),
256 # IRIREF
257 (r'%(IRIREF)s' % patterns, Name.Variable),
259 # PrefixedName
260 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
261 bygroups(Name.Namespace, Punctuation, Name.Tag)),
263 # Comment
264 (r'#[^\n]+', Comment),
266 (r'\b(true|false)\b', Literal),
267 (r'[+\-]?\d*\.\d+', Number.Float),
268 (r'[+\-]?\d*(:?\.\d+)?E[+\-]?\d+', Number.Float),
269 (r'[+\-]?\d+', Number.Integer),
270 (r'[\[\](){}.;,:^]', Punctuation),
272 (r'"""', String, 'triple-double-quoted-string'),
273 (r'"', String, 'single-double-quoted-string'),
274 (r"'''", String, 'triple-single-quoted-string'),
275 (r"'", String, 'single-single-quoted-string'),
276 ],
277 'triple-double-quoted-string': [
278 (r'"""', String, 'end-of-string'),
279 (r'[^\\]+', String),
280 (r'\\', String, 'string-escape'),
281 ],
282 'single-double-quoted-string': [
283 (r'"', String, 'end-of-string'),
284 (r'[^"\\\n]+', String),
285 (r'\\', String, 'string-escape'),
286 ],
287 'triple-single-quoted-string': [
288 (r"'''", String, 'end-of-string'),
289 (r'[^\\]+', String),
290 (r'\\', String, 'string-escape'),
291 ],
292 'single-single-quoted-string': [
293 (r"'", String, 'end-of-string'),
294 (r"[^'\\\n]+", String),
295 (r'\\', String, 'string-escape'),
296 ],
297 'string-escape': [
298 (r'.', String, '#pop'),
299 ],
300 'end-of-string': [
301 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
302 bygroups(Operator, Generic.Emph), '#pop:2'),
304 (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
306 default('#pop:2'),
308 ],
309 }
311 # Turtle and Tera Term macro files share the same file extension
312 # but each has a recognizable and distinct syntax.
313 def analyse_text(text):
314 for t in ('@base ', 'BASE ', '@prefix ', 'PREFIX '):
315 if re.search(r'^\s*%s' % t, text):
316 return 0.80
319class ShExCLexer(RegexLexer):
320 """
321 Lexer for `ShExC <https://shex.io/shex-semantics/#shexc>`_ shape expressions language syntax.
322 """
323 name = 'ShExC'
324 aliases = ['shexc', 'shex']
325 filenames = ['*.shex']
326 mimetypes = ['text/shex']
328 # character group definitions ::
330 PN_CHARS_BASE_GRP = ('a-zA-Z'
331 '\u00c0-\u00d6'
332 '\u00d8-\u00f6'
333 '\u00f8-\u02ff'
334 '\u0370-\u037d'
335 '\u037f-\u1fff'
336 '\u200c-\u200d'
337 '\u2070-\u218f'
338 '\u2c00-\u2fef'
339 '\u3001-\ud7ff'
340 '\uf900-\ufdcf'
341 '\ufdf0-\ufffd')
343 PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
345 PN_CHARS_GRP = (PN_CHARS_U_GRP +
346 r'\-' +
347 r'0-9' +
348 '\u00b7' +
349 '\u0300-\u036f' +
350 '\u203f-\u2040')
352 HEX_GRP = '0-9A-Fa-f'
354 PN_LOCAL_ESC_CHARS_GRP = r"_~.\-!$&'()*+,;=/?#@%"
356 # terminal productions ::
358 PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
360 PN_CHARS_U = '[' + PN_CHARS_U_GRP + ']'
362 PN_CHARS = '[' + PN_CHARS_GRP + ']'
364 HEX = '[' + HEX_GRP + ']'
366 PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
368 UCHAR_NO_BACKSLASH = '(?:u' + HEX + '{4}|U' + HEX + '{8})'
370 UCHAR = r'\\' + UCHAR_NO_BACKSLASH
372 IRIREF = r'<(?:[^\x00-\x20<>"{}|^`\\]|' + UCHAR + ')*>'
374 BLANK_NODE_LABEL = '_:[0-9' + PN_CHARS_U_GRP + '](?:[' + PN_CHARS_GRP + \
375 '.]*' + PN_CHARS + ')?'
377 PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
379 PERCENT = '%' + HEX + HEX
381 PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
383 PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
385 PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
386 '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
387 PN_CHARS_GRP + ':]|' + PLX + '))?')
389 EXPONENT = r'[eE][+-]?\d+'
391 # Lexer token definitions ::
393 tokens = {
394 'root': [
395 (r'\s+', Text),
396 # keywords ::
397 (r'(?i)(base|prefix|start|external|'
398 r'literal|iri|bnode|nonliteral|length|minlength|maxlength|'
399 r'mininclusive|minexclusive|maxinclusive|maxexclusive|'
400 r'totaldigits|fractiondigits|'
401 r'closed|extra)\b', Keyword),
402 (r'(a)\b', Keyword),
403 # IRIs ::
404 ('(' + IRIREF + ')', Name.Label),
405 # blank nodes ::
406 ('(' + BLANK_NODE_LABEL + ')', Name.Label),
407 # prefixed names ::
408 (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + ')?',
409 bygroups(Name.Namespace, Punctuation, Name.Tag)),
410 # boolean literals ::
411 (r'(true|false)', Keyword.Constant),
412 # double literals ::
413 (r'[+\-]?(\d+\.\d*' + EXPONENT + r'|\.?\d+' + EXPONENT + ')', Number.Float),
414 # decimal literals ::
415 (r'[+\-]?(\d+\.\d*|\.\d+)', Number.Float),
416 # integer literals ::
417 (r'[+\-]?\d+', Number.Integer),
418 # operators ::
419 (r'[@|$&=*+?^\-~]', Operator),
420 # operator keywords ::
421 (r'(?i)(and|or|not)\b', Operator.Word),
422 # punctuation characters ::
423 (r'[(){}.;,:^\[\]]', Punctuation),
424 # line comments ::
425 (r'#[^\n]*', Comment),
426 # strings ::
427 (r'"""', String, 'triple-double-quoted-string'),
428 (r'"', String, 'single-double-quoted-string'),
429 (r"'''", String, 'triple-single-quoted-string'),
430 (r"'", String, 'single-single-quoted-string'),
431 ],
432 'triple-double-quoted-string': [
433 (r'"""', String, 'end-of-string'),
434 (r'[^\\]+', String),
435 (r'\\', String, 'string-escape'),
436 ],
437 'single-double-quoted-string': [
438 (r'"', String, 'end-of-string'),
439 (r'[^"\\\n]+', String),
440 (r'\\', String, 'string-escape'),
441 ],
442 'triple-single-quoted-string': [
443 (r"'''", String, 'end-of-string'),
444 (r'[^\\]+', String),
445 (r'\\', String.Escape, 'string-escape'),
446 ],
447 'single-single-quoted-string': [
448 (r"'", String, 'end-of-string'),
449 (r"[^'\\\n]+", String),
450 (r'\\', String, 'string-escape'),
451 ],
452 'string-escape': [
453 (UCHAR_NO_BACKSLASH, String.Escape, '#pop'),
454 (r'.', String.Escape, '#pop'),
455 ],
456 'end-of-string': [
457 (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
458 bygroups(Operator, Name.Function), '#pop:2'),
459 (r'\^\^', Operator, '#pop:2'),
460 default('#pop:2'),
461 ],
462 }