Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/data.py: 40%
294 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.data
3 ~~~~~~~~~~~~~~~~~~~~
5 Lexers for data file format.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11from pygments.lexer import Lexer, ExtendedRegexLexer, LexerContext, \
12 include, bygroups
13from pygments.token import Comment, Error, Keyword, Literal, Name, Number, \
14 Punctuation, String, Whitespace
16__all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']
19class YamlLexerContext(LexerContext):
20 """Indentation context for the YAML lexer."""
22 def __init__(self, *args, **kwds):
23 super().__init__(*args, **kwds)
24 self.indent_stack = []
25 self.indent = -1
26 self.next_indent = 0
27 self.block_scalar_indent = None
30class YamlLexer(ExtendedRegexLexer):
31 """
32 Lexer for YAML, a human-friendly data serialization
33 language.
35 .. versionadded:: 0.11
36 """
38 name = 'YAML'
39 url = 'http://yaml.org/'
40 aliases = ['yaml']
41 filenames = ['*.yaml', '*.yml']
42 mimetypes = ['text/x-yaml']
44 def something(token_class):
45 """Do not produce empty tokens."""
46 def callback(lexer, match, context):
47 text = match.group()
48 if not text:
49 return
50 yield match.start(), token_class, text
51 context.pos = match.end()
52 return callback
54 def reset_indent(token_class):
55 """Reset the indentation levels."""
56 def callback(lexer, match, context):
57 text = match.group()
58 context.indent_stack = []
59 context.indent = -1
60 context.next_indent = 0
61 context.block_scalar_indent = None
62 yield match.start(), token_class, text
63 context.pos = match.end()
64 return callback
66 def save_indent(token_class, start=False):
67 """Save a possible indentation level."""
68 def callback(lexer, match, context):
69 text = match.group()
70 extra = ''
71 if start:
72 context.next_indent = len(text)
73 if context.next_indent < context.indent:
74 while context.next_indent < context.indent:
75 context.indent = context.indent_stack.pop()
76 if context.next_indent > context.indent:
77 extra = text[context.indent:]
78 text = text[:context.indent]
79 else:
80 context.next_indent += len(text)
81 if text:
82 yield match.start(), token_class, text
83 if extra:
84 yield match.start()+len(text), token_class.Error, extra
85 context.pos = match.end()
86 return callback
88 def set_indent(token_class, implicit=False):
89 """Set the previously saved indentation level."""
90 def callback(lexer, match, context):
91 text = match.group()
92 if context.indent < context.next_indent:
93 context.indent_stack.append(context.indent)
94 context.indent = context.next_indent
95 if not implicit:
96 context.next_indent += len(text)
97 yield match.start(), token_class, text
98 context.pos = match.end()
99 return callback
101 def set_block_scalar_indent(token_class):
102 """Set an explicit indentation level for a block scalar."""
103 def callback(lexer, match, context):
104 text = match.group()
105 context.block_scalar_indent = None
106 if not text:
107 return
108 increment = match.group(1)
109 if increment:
110 current_indent = max(context.indent, 0)
111 increment = int(increment)
112 context.block_scalar_indent = current_indent + increment
113 if text:
114 yield match.start(), token_class, text
115 context.pos = match.end()
116 return callback
118 def parse_block_scalar_empty_line(indent_token_class, content_token_class):
119 """Process an empty line in a block scalar."""
120 def callback(lexer, match, context):
121 text = match.group()
122 if (context.block_scalar_indent is None or
123 len(text) <= context.block_scalar_indent):
124 if text:
125 yield match.start(), indent_token_class, text
126 else:
127 indentation = text[:context.block_scalar_indent]
128 content = text[context.block_scalar_indent:]
129 yield match.start(), indent_token_class, indentation
130 yield (match.start()+context.block_scalar_indent,
131 content_token_class, content)
132 context.pos = match.end()
133 return callback
135 def parse_block_scalar_indent(token_class):
136 """Process indentation spaces in a block scalar."""
137 def callback(lexer, match, context):
138 text = match.group()
139 if context.block_scalar_indent is None:
140 if len(text) <= max(context.indent, 0):
141 context.stack.pop()
142 context.stack.pop()
143 return
144 context.block_scalar_indent = len(text)
145 else:
146 if len(text) < context.block_scalar_indent:
147 context.stack.pop()
148 context.stack.pop()
149 return
150 if text:
151 yield match.start(), token_class, text
152 context.pos = match.end()
153 return callback
155 def parse_plain_scalar_indent(token_class):
156 """Process indentation spaces in a plain scalar."""
157 def callback(lexer, match, context):
158 text = match.group()
159 if len(text) <= context.indent:
160 context.stack.pop()
161 context.stack.pop()
162 return
163 if text:
164 yield match.start(), token_class, text
165 context.pos = match.end()
166 return callback
168 tokens = {
169 # the root rules
170 'root': [
171 # ignored whitespaces
172 (r'[ ]+(?=#|$)', Whitespace),
173 # line breaks
174 (r'\n+', Whitespace),
175 # a comment
176 (r'#[^\n]*', Comment.Single),
177 # the '%YAML' directive
178 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
179 # the %TAG directive
180 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
181 # document start and document end indicators
182 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
183 'block-line'),
184 # indentation spaces
185 (r'[ ]*(?!\s|$)', save_indent(Whitespace, start=True),
186 ('block-line', 'indentation')),
187 ],
189 # trailing whitespaces after directives or a block scalar indicator
190 'ignored-line': [
191 # ignored whitespaces
192 (r'[ ]+(?=#|$)', Whitespace),
193 # a comment
194 (r'#[^\n]*', Comment.Single),
195 # line break
196 (r'\n', Whitespace, '#pop:2'),
197 ],
199 # the %YAML directive
200 'yaml-directive': [
201 # the version number
202 (r'([ ]+)([0-9]+\.[0-9]+)',
203 bygroups(Whitespace, Number), 'ignored-line'),
204 ],
206 # the %TAG directive
207 'tag-directive': [
208 # a tag handle and the corresponding prefix
209 (r'([ ]+)(!|![\w-]*!)'
210 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
211 bygroups(Whitespace, Keyword.Type, Whitespace, Keyword.Type),
212 'ignored-line'),
213 ],
215 # block scalar indicators and indentation spaces
216 'indentation': [
217 # trailing whitespaces are ignored
218 (r'[ ]*$', something(Whitespace), '#pop:2'),
219 # whitespaces preceding block collection indicators
220 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Whitespace)),
221 # block collection indicators
222 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
223 # the beginning a block line
224 (r'[ ]*', save_indent(Whitespace), '#pop'),
225 ],
227 # an indented line in the block context
228 'block-line': [
229 # the line end
230 (r'[ ]*(?=#|$)', something(Whitespace), '#pop'),
231 # whitespaces separating tokens
232 (r'[ ]+', Whitespace),
233 # key with colon
234 (r'''([^#,?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
235 bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),
236 # tags, anchors and aliases,
237 include('descriptors'),
238 # block collections and scalars
239 include('block-nodes'),
240 # flow collections and quoted scalars
241 include('flow-nodes'),
242 # a plain scalar
243 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
244 something(Name.Variable),
245 'plain-scalar-in-block-context'),
246 ],
248 # tags, anchors, aliases
249 'descriptors': [
250 # a full-form tag
251 (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
252 # a tag in the form '!', '!suffix' or '!handle!suffix'
253 (r'!(?:[\w-]+!)?'
254 r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),
255 # an anchor
256 (r'&[\w-]+', Name.Label),
257 # an alias
258 (r'\*[\w-]+', Name.Variable),
259 ],
261 # block collections and scalars
262 'block-nodes': [
263 # implicit key
264 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
265 # literal and folded scalars
266 (r'[|>]', Punctuation.Indicator,
267 ('block-scalar-content', 'block-scalar-header')),
268 ],
270 # flow collections and quoted scalars
271 'flow-nodes': [
272 # a flow sequence
273 (r'\[', Punctuation.Indicator, 'flow-sequence'),
274 # a flow mapping
275 (r'\{', Punctuation.Indicator, 'flow-mapping'),
276 # a single-quoted scalar
277 (r'\'', String, 'single-quoted-scalar'),
278 # a double-quoted scalar
279 (r'\"', String, 'double-quoted-scalar'),
280 ],
282 # the content of a flow collection
283 'flow-collection': [
284 # whitespaces
285 (r'[ ]+', Whitespace),
286 # line breaks
287 (r'\n+', Whitespace),
288 # a comment
289 (r'#[^\n]*', Comment.Single),
290 # simple indicators
291 (r'[?:,]', Punctuation.Indicator),
292 # tags, anchors and aliases
293 include('descriptors'),
294 # nested collections and quoted scalars
295 include('flow-nodes'),
296 # a plain scalar
297 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
298 something(Name.Variable),
299 'plain-scalar-in-flow-context'),
300 ],
302 # a flow sequence indicated by '[' and ']'
303 'flow-sequence': [
304 # include flow collection rules
305 include('flow-collection'),
306 # the closing indicator
307 (r'\]', Punctuation.Indicator, '#pop'),
308 ],
310 # a flow mapping indicated by '{' and '}'
311 'flow-mapping': [
312 # key with colon
313 (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',
314 bygroups(Name.Tag, Punctuation)),
315 # include flow collection rules
316 include('flow-collection'),
317 # the closing indicator
318 (r'\}', Punctuation.Indicator, '#pop'),
319 ],
321 # block scalar lines
322 'block-scalar-content': [
323 # line break
324 (r'\n', Whitespace),
325 # empty line
326 (r'^[ ]+$',
327 parse_block_scalar_empty_line(Whitespace, Name.Constant)),
328 # indentation spaces (we may leave the state here)
329 (r'^[ ]*', parse_block_scalar_indent(Whitespace)),
330 # line content
331 (r'[\S\t ]+', Name.Constant),
332 ],
334 # the content of a literal or folded scalar
335 'block-scalar-header': [
336 # indentation indicator followed by chomping flag
337 (r'([1-9])?[+-]?(?=[ ]|$)',
338 set_block_scalar_indent(Punctuation.Indicator),
339 'ignored-line'),
340 # chomping flag followed by indentation indicator
341 (r'[+-]?([1-9])?(?=[ ]|$)',
342 set_block_scalar_indent(Punctuation.Indicator),
343 'ignored-line'),
344 ],
346 # ignored and regular whitespaces in quoted scalars
347 'quoted-scalar-whitespaces': [
348 # leading and trailing whitespaces are ignored
349 (r'^[ ]+', Whitespace),
350 (r'[ ]+$', Whitespace),
351 # line breaks are ignored
352 (r'\n+', Whitespace),
353 # other whitespaces are a part of the value
354 (r'[ ]+', Name.Variable),
355 ],
357 # single-quoted scalars
358 'single-quoted-scalar': [
359 # include whitespace and line break rules
360 include('quoted-scalar-whitespaces'),
361 # escaping of the quote character
362 (r'\'\'', String.Escape),
363 # regular non-whitespace characters
364 (r'[^\s\']+', String),
365 # the closing quote
366 (r'\'', String, '#pop'),
367 ],
369 # double-quoted scalars
370 'double-quoted-scalar': [
371 # include whitespace and line break rules
372 include('quoted-scalar-whitespaces'),
373 # escaping of special characters
374 (r'\\[0abt\tn\nvfre "\\N_LP]', String),
375 # escape codes
376 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
377 String.Escape),
378 # regular non-whitespace characters
379 (r'[^\s"\\]+', String),
380 # the closing quote
381 (r'"', String, '#pop'),
382 ],
384 # the beginning of a new line while scanning a plain scalar
385 'plain-scalar-in-block-context-new-line': [
386 # empty lines
387 (r'^[ ]+$', Whitespace),
388 # line breaks
389 (r'\n+', Whitespace),
390 # document start and document end indicators
391 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
392 # indentation spaces (we may leave the block line state here)
393 (r'^[ ]*', parse_plain_scalar_indent(Whitespace), '#pop'),
394 ],
396 # a plain scalar in the block context
397 'plain-scalar-in-block-context': [
398 # the scalar ends with the ':' indicator
399 (r'[ ]*(?=:[ ]|:$)', something(Whitespace), '#pop'),
400 # the scalar ends with whitespaces followed by a comment
401 (r'[ ]+(?=#)', Whitespace, '#pop'),
402 # trailing whitespaces are ignored
403 (r'[ ]+$', Whitespace),
404 # line breaks are ignored
405 (r'\n+', Whitespace, 'plain-scalar-in-block-context-new-line'),
406 # other whitespaces are a part of the value
407 (r'[ ]+', Literal.Scalar.Plain),
408 # regular non-whitespace characters
409 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
410 ],
412 # a plain scalar is the flow context
413 'plain-scalar-in-flow-context': [
414 # the scalar ends with an indicator character
415 (r'[ ]*(?=[,:?\[\]{}])', something(Whitespace), '#pop'),
416 # the scalar ends with a comment
417 (r'[ ]+(?=#)', Whitespace, '#pop'),
418 # leading and trailing whitespaces are ignored
419 (r'^[ ]+', Whitespace),
420 (r'[ ]+$', Whitespace),
421 # line breaks are ignored
422 (r'\n+', Whitespace),
423 # other whitespaces are a part of the value
424 (r'[ ]+', Name.Variable),
425 # regular non-whitespace characters
426 (r'[^\s,:?\[\]{}]+', Name.Variable),
427 ],
429 }
431 def get_tokens_unprocessed(self, text=None, context=None):
432 if context is None:
433 context = YamlLexerContext(text, 0)
434 return super().get_tokens_unprocessed(text, context)
437class JsonLexer(Lexer):
438 """
439 For JSON data structures.
441 Javascript-style comments are supported (like ``/* */`` and ``//``),
442 though comments are not part of the JSON specification.
443 This allows users to highlight JSON as it is used in the wild.
445 No validation is performed on the input JSON document.
447 .. versionadded:: 1.5
448 """
450 name = 'JSON'
451 url = 'https://www.json.org'
452 aliases = ['json', 'json-object']
453 filenames = ['*.json', 'Pipfile.lock']
454 mimetypes = ['application/json', 'application/json-object']
456 # No validation of integers, floats, or constants is done.
457 # As long as the characters are members of the following
458 # sets, the token will be considered valid. For example,
459 #
460 # "--1--" is parsed as an integer
461 # "1...eee" is parsed as a float
462 # "trustful" is parsed as a constant
463 #
464 integers = set('-0123456789')
465 floats = set('.eE+')
466 constants = set('truefalsenull') # true|false|null
467 hexadecimals = set('0123456789abcdefABCDEF')
468 punctuations = set('{}[],')
469 whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}
471 def get_tokens_unprocessed(self, text):
472 """Parse JSON data."""
474 in_string = False
475 in_escape = False
476 in_unicode_escape = 0
477 in_whitespace = False
478 in_constant = False
479 in_number = False
480 in_float = False
481 in_punctuation = False
482 in_comment_single = False
483 in_comment_multiline = False
484 expecting_second_comment_opener = False # // or /*
485 expecting_second_comment_closer = False # */
487 start = 0
489 # The queue is used to store data that may need to be tokenized
490 # differently based on what follows. In particular, JSON object
491 # keys are tokenized differently than string values, but cannot
492 # be distinguished until punctuation is encountered outside the
493 # string.
494 #
495 # A ":" character after the string indicates that the string is
496 # an object key; any other character indicates the string is a
497 # regular string value.
498 #
499 # The queue holds tuples that contain the following data:
500 #
501 # (start_index, token_type, text)
502 #
503 # By default the token type of text in double quotes is
504 # String.Double. The token type will be replaced if a colon
505 # is encountered after the string closes.
506 #
507 queue = []
509 for stop, character in enumerate(text):
510 if in_string:
511 if in_unicode_escape:
512 if character in self.hexadecimals:
513 in_unicode_escape -= 1
514 if not in_unicode_escape:
515 in_escape = False
516 else:
517 in_unicode_escape = 0
518 in_escape = False
520 elif in_escape:
521 if character == 'u':
522 in_unicode_escape = 4
523 else:
524 in_escape = False
526 elif character == '\\':
527 in_escape = True
529 elif character == '"':
530 queue.append((start, String.Double, text[start:stop + 1]))
531 in_string = False
532 in_escape = False
533 in_unicode_escape = 0
535 continue
537 elif in_whitespace:
538 if character in self.whitespaces:
539 continue
541 if queue:
542 queue.append((start, Whitespace, text[start:stop]))
543 else:
544 yield start, Whitespace, text[start:stop]
545 in_whitespace = False
546 # Fall through so the new character can be evaluated.
548 elif in_constant:
549 if character in self.constants:
550 continue
552 yield start, Keyword.Constant, text[start:stop]
553 in_constant = False
554 # Fall through so the new character can be evaluated.
556 elif in_number:
557 if character in self.integers:
558 continue
559 elif character in self.floats:
560 in_float = True
561 continue
563 if in_float:
564 yield start, Number.Float, text[start:stop]
565 else:
566 yield start, Number.Integer, text[start:stop]
567 in_number = False
568 in_float = False
569 # Fall through so the new character can be evaluated.
571 elif in_punctuation:
572 if character in self.punctuations:
573 continue
575 yield start, Punctuation, text[start:stop]
576 in_punctuation = False
577 # Fall through so the new character can be evaluated.
579 elif in_comment_single:
580 if character != '\n':
581 continue
583 if queue:
584 queue.append((start, Comment.Single, text[start:stop]))
585 else:
586 yield start, Comment.Single, text[start:stop]
588 in_comment_single = False
589 # Fall through so the new character can be evaluated.
591 elif in_comment_multiline:
592 if character == '*':
593 expecting_second_comment_closer = True
594 elif expecting_second_comment_closer:
595 expecting_second_comment_closer = False
596 if character == '/':
597 if queue:
598 queue.append((start, Comment.Multiline, text[start:stop + 1]))
599 else:
600 yield start, Comment.Multiline, text[start:stop + 1]
602 in_comment_multiline = False
604 continue
606 elif expecting_second_comment_opener:
607 expecting_second_comment_opener = False
608 if character == '/':
609 in_comment_single = True
610 continue
611 elif character == '*':
612 in_comment_multiline = True
613 continue
615 # Exhaust the queue. Accept the existing token types.
616 yield from queue
617 queue.clear()
619 yield start, Error, text[start:stop]
620 # Fall through so the new character can be evaluated.
622 start = stop
624 if character == '"':
625 in_string = True
627 elif character in self.whitespaces:
628 in_whitespace = True
630 elif character in {'f', 'n', 't'}: # The first letters of true|false|null
631 # Exhaust the queue. Accept the existing token types.
632 yield from queue
633 queue.clear()
635 in_constant = True
637 elif character in self.integers:
638 # Exhaust the queue. Accept the existing token types.
639 yield from queue
640 queue.clear()
642 in_number = True
644 elif character == ':':
645 # Yield from the queue. Replace string token types.
646 for _start, _token, _text in queue:
647 # There can be only three types of tokens before a ':':
648 # Whitespace, Comment, or a quoted string.
649 #
650 # If it's a quoted string we emit Name.Tag.
651 # Otherwise, we yield the original token.
652 #
653 # In all other cases this would be invalid JSON,
654 # but this is not a validating JSON lexer, so it's OK.
655 if _token is String.Double:
656 yield _start, Name.Tag, _text
657 else:
658 yield _start, _token, _text
659 queue.clear()
661 in_punctuation = True
663 elif character in self.punctuations:
664 # Exhaust the queue. Accept the existing token types.
665 yield from queue
666 queue.clear()
668 in_punctuation = True
670 elif character == '/':
671 # This is the beginning of a comment.
672 expecting_second_comment_opener = True
674 else:
675 # Exhaust the queue. Accept the existing token types.
676 yield from queue
677 queue.clear()
679 yield start, Error, character
681 # Yield any remaining text.
682 yield from queue
683 if in_string:
684 yield start, Error, text[start:]
685 elif in_float:
686 yield start, Number.Float, text[start:]
687 elif in_number:
688 yield start, Number.Integer, text[start:]
689 elif in_constant:
690 yield start, Keyword.Constant, text[start:]
691 elif in_whitespace:
692 yield start, Whitespace, text[start:]
693 elif in_punctuation:
694 yield start, Punctuation, text[start:]
695 elif in_comment_single:
696 yield start, Comment.Single, text[start:]
697 elif in_comment_multiline:
698 yield start, Error, text[start:]
699 elif expecting_second_comment_opener:
700 yield start, Error, text[start:]
703class JsonBareObjectLexer(JsonLexer):
704 """
705 For JSON data structures (with missing object curly braces).
707 .. versionadded:: 2.2
709 .. deprecated:: 2.8.0
711 Behaves the same as `JsonLexer` now.
712 """
714 name = 'JSONBareObject'
715 aliases = []
716 filenames = []
717 mimetypes = []
720class JsonLdLexer(JsonLexer):
721 """
722 For JSON-LD linked data.
724 .. versionadded:: 2.0
725 """
727 name = 'JSON-LD'
728 url = 'https://json-ld.org/'
729 aliases = ['jsonld', 'json-ld']
730 filenames = ['*.jsonld']
731 mimetypes = ['application/ld+json']
733 json_ld_keywords = {
734 '"@%s"' % keyword
735 for keyword in (
736 'base',
737 'container',
738 'context',
739 'direction',
740 'graph',
741 'id',
742 'import',
743 'included',
744 'index',
745 'json',
746 'language',
747 'list',
748 'nest',
749 'none',
750 'prefix',
751 'propagate',
752 'protected',
753 'reverse',
754 'set',
755 'type',
756 'value',
757 'version',
758 'vocab',
759 )
760 }
762 def get_tokens_unprocessed(self, text):
763 for start, token, value in super().get_tokens_unprocessed(text):
764 if token is Name.Tag and value in self.json_ld_keywords:
765 yield start, Name.Decorator, value
766 else:
767 yield start, token, value