Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/data.py: 34%

1"""

2 pygments.lexers.data

3 ~~~~~~~~~~~~~~~~~~~~

5 Lexers for data file format.

8 :license: BSD, see LICENSE for details.

9"""

11from pygments.lexer import Lexer, ExtendedRegexLexer, LexerContext, \

12 include, bygroups

13from pygments.token import Comment, Error, Keyword, Literal, Name, Number, \

14 Punctuation, String, Whitespace

16__all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']

19class YamlLexerContext(LexerContext):

20 """Indentation context for the YAML lexer."""

22 def __init__(self, *args, **kwds):

23 super().__init__(*args, **kwds)

24 self.indent_stack = []

25 self.indent = -1

26 self.next_indent = 0

27 self.block_scalar_indent = None

30class YamlLexer(ExtendedRegexLexer):

31 """

32 Lexer for YAML, a human-friendly data serialization

33 language.

34 """

36 name = 'YAML'

37 url = 'http://yaml.org/'

38 aliases = ['yaml']

39 filenames = ['*.yaml', '*.yml']

40 mimetypes = ['text/x-yaml']

41 version_added = '0.11'

43 def something(token_class):

44 """Do not produce empty tokens."""

45 def callback(lexer, match, context):

46 text = match.group()

47 if not text:

48 return

49 yield match.start(), token_class, text

50 context.pos = match.end()

51 return callback

53 def reset_indent(token_class):

54 """Reset the indentation levels."""

55 def callback(lexer, match, context):

56 text = match.group()

57 context.indent_stack = []

58 context.indent = -1

59 context.next_indent = 0

60 context.block_scalar_indent = None

61 yield match.start(), token_class, text

62 context.pos = match.end()

63 return callback

65 def save_indent(token_class, start=False):

66 """Save a possible indentation level."""

67 def callback(lexer, match, context):

68 text = match.group()

69 extra = ''

70 if start:

71 context.next_indent = len(text)

72 if context.next_indent < context.indent:

73 while context.next_indent < context.indent:

74 context.indent = context.indent_stack.pop()

75 if context.next_indent > context.indent:

76 extra = text[context.indent:]

77 text = text[:context.indent]

78 else:

79 context.next_indent += len(text)

80 if text:

81 yield match.start(), token_class, text

82 if extra:

83 yield match.start()+len(text), token_class.Error, extra

84 context.pos = match.end()

85 return callback

87 def set_indent(token_class, implicit=False):

88 """Set the previously saved indentation level."""

89 def callback(lexer, match, context):

90 text = match.group()

91 if context.indent < context.next_indent:

92 context.indent_stack.append(context.indent)

93 context.indent = context.next_indent

94 if not implicit:

95 context.next_indent += len(text)

96 yield match.start(), token_class, text

97 context.pos = match.end()

98 return callback

100 def set_block_scalar_indent(token_class):

101 """Set an explicit indentation level for a block scalar."""

102 def callback(lexer, match, context):

103 text = match.group()

104 context.block_scalar_indent = None

105 if not text:

106 return

107 increment = match.group(1)

108 if increment:

109 current_indent = max(context.indent, 0)

110 increment = int(increment)

111 context.block_scalar_indent = current_indent + increment

112 if text:

113 yield match.start(), token_class, text

114 context.pos = match.end()

115 return callback

116

117 def parse_block_scalar_empty_line(indent_token_class, content_token_class):

118 """Process an empty line in a block scalar."""

119 def callback(lexer, match, context):

120 text = match.group()

121 if (context.block_scalar_indent is None or

122 len(text) <= context.block_scalar_indent):

123 if text:

124 yield match.start(), indent_token_class, text

125 else:

126 indentation = text[:context.block_scalar_indent]

127 content = text[context.block_scalar_indent:]

128 yield match.start(), indent_token_class, indentation

129 yield (match.start()+context.block_scalar_indent,

130 content_token_class, content)

131 context.pos = match.end()

132 return callback

133

134 def parse_block_scalar_indent(token_class):

135 """Process indentation spaces in a block scalar."""

136 def callback(lexer, match, context):

137 text = match.group()

138 if context.block_scalar_indent is None:

139 if len(text) <= max(context.indent, 0):

140 context.stack.pop()

141 context.stack.pop()

142 return

143 context.block_scalar_indent = len(text)

144 else:

145 if len(text) < context.block_scalar_indent:

146 context.stack.pop()

147 context.stack.pop()

148 return

149 if text:

150 yield match.start(), token_class, text

151 context.pos = match.end()

152 return callback

153

154 def parse_plain_scalar_indent(token_class):

155 """Process indentation spaces in a plain scalar."""

156 def callback(lexer, match, context):

157 text = match.group()

158 if len(text) <= context.indent:

159 context.stack.pop()

160 context.stack.pop()

161 return

162 if text:

163 yield match.start(), token_class, text

164 context.pos = match.end()

165 return callback

166

167 tokens = {

168 # the root rules

169 'root': [

170 # ignored whitespaces

171 (r'[ ]+(?=#|$)', Whitespace),

172 # line breaks

173 (r'\n+', Whitespace),

174 # a comment

175 (r'#[^\n]*', Comment.Single),

176 # the '%YAML' directive

177 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),

178 # the %TAG directive

179 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),

180 # document start and document end indicators

181 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),

182 'block-line'),

183 # indentation spaces

184 (r'[ ]*(?!\s|$)', save_indent(Whitespace, start=True),

185 ('block-line', 'indentation')),

186 ],

187

188 # trailing whitespaces after directives or a block scalar indicator

189 'ignored-line': [

190 # ignored whitespaces

191 (r'[ ]+(?=#|$)', Whitespace),

192 # a comment

193 (r'#[^\n]*', Comment.Single),

194 # line break

195 (r'\n', Whitespace, '#pop:2'),

196 ],

197

198 # the %YAML directive

199 'yaml-directive': [

200 # the version number

201 (r'([ ]+)([0-9]+\.[0-9]+)',

202 bygroups(Whitespace, Number), 'ignored-line'),

203 ],

204

205 # the %TAG directive

206 'tag-directive': [

207 # a tag handle and the corresponding prefix

208 (r'([ ]+)(!|![\w-]*!)'

209 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',

210 bygroups(Whitespace, Keyword.Type, Whitespace, Keyword.Type),

211 'ignored-line'),

212 ],

213

214 # block scalar indicators and indentation spaces

215 'indentation': [

216 # trailing whitespaces are ignored

217 (r'[ ]*$', something(Whitespace), '#pop:2'),

218 # whitespaces preceding block collection indicators

219 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Whitespace)),

220 # block collection indicators

221 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),

222 # the beginning a block line

223 (r'[ ]*', save_indent(Whitespace), '#pop'),

224 ],

225

226 # an indented line in the block context

227 'block-line': [

228 # the line end

229 (r'[ ]*(?=#|$)', something(Whitespace), '#pop'),

230 # whitespaces separating tokens

231 (r'[ ]+', Whitespace),

232 # key with colon

233 (r'''([^#,?\[\]{}"'\n]+)(:)(?=[ ]|$)''',

234 bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),

235 # tags, anchors and aliases,

236 include('descriptors'),

237 # block collections and scalars

238 include('block-nodes'),

239 # flow collections and quoted scalars

240 include('flow-nodes'),

241 # a plain scalar

242 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',

243 something(Name.Variable),

244 'plain-scalar-in-block-context'),

245 ],

246

247 # tags, anchors, aliases

248 'descriptors': [

249 # a full-form tag

250 (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),

251 # a tag in the form '!', '!suffix' or '!handle!suffix'

252 (r'!(?:[\w-]+!)?'

253 r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),

254 # an anchor

255 (r'&[\w-]+', Name.Label),

256 # an alias

257 (r'\*[\w-]+', Name.Variable),

258 ],

259

260 # block collections and scalars

261 'block-nodes': [

262 # implicit key

263 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),

264 # literal and folded scalars

265 (r'[|>]', Punctuation.Indicator,

266 ('block-scalar-content', 'block-scalar-header')),

267 ],

268

269 # flow collections and quoted scalars

270 'flow-nodes': [

271 # a flow sequence

272 (r'\[', Punctuation.Indicator, 'flow-sequence'),

273 # a flow mapping

274 (r'\{', Punctuation.Indicator, 'flow-mapping'),

275 # a single-quoted scalar

276 (r'\'', String, 'single-quoted-scalar'),

277 # a double-quoted scalar

278 (r'\"', String, 'double-quoted-scalar'),

279 ],

280

281 # the content of a flow collection

282 'flow-collection': [

283 # whitespaces

284 (r'[ ]+', Whitespace),

285 # line breaks

286 (r'\n+', Whitespace),

287 # a comment

288 (r'#[^\n]*', Comment.Single),

289 # simple indicators

290 (r'[?:,]', Punctuation.Indicator),

291 # tags, anchors and aliases

292 include('descriptors'),

293 # nested collections and quoted scalars

294 include('flow-nodes'),

295 # a plain scalar

296 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',

297 something(Name.Variable),

298 'plain-scalar-in-flow-context'),

299 ],

300

301 # a flow sequence indicated by '[' and ']'

302 'flow-sequence': [

303 # include flow collection rules

304 include('flow-collection'),

305 # the closing indicator

306 (r'\]', Punctuation.Indicator, '#pop'),

307 ],

308

309 # a flow mapping indicated by '{' and '}'

310 'flow-mapping': [

311 # key with colon

312 (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',

313 bygroups(Name.Tag, Punctuation)),

314 # include flow collection rules

315 include('flow-collection'),

316 # the closing indicator

317 (r'\}', Punctuation.Indicator, '#pop'),

318 ],

319

320 # block scalar lines

321 'block-scalar-content': [

322 # line break

323 (r'\n', Whitespace),

324 # empty line

325 (r'^[ ]+$',

326 parse_block_scalar_empty_line(Whitespace, Name.Constant)),

327 # indentation spaces (we may leave the state here)

328 (r'^[ ]*', parse_block_scalar_indent(Whitespace)),

329 # line content

330 (r'[\S\t ]+', Name.Constant),

331 ],

332

333 # the content of a literal or folded scalar

334 'block-scalar-header': [

335 # indentation indicator followed by chomping flag

336 (r'([1-9])?[+-]?(?=[ ]|$)',

337 set_block_scalar_indent(Punctuation.Indicator),

338 'ignored-line'),

339 # chomping flag followed by indentation indicator

340 (r'[+-]?([1-9])?(?=[ ]|$)',

341 set_block_scalar_indent(Punctuation.Indicator),

342 'ignored-line'),

343 ],

344

345 # ignored and regular whitespaces in quoted scalars

346 'quoted-scalar-whitespaces': [

347 # leading and trailing whitespaces are ignored

348 (r'^[ ]+', Whitespace),

349 (r'[ ]+$', Whitespace),

350 # line breaks are ignored

351 (r'\n+', Whitespace),

352 # other whitespaces are a part of the value

353 (r'[ ]+', Name.Variable),

354 ],

355

356 # single-quoted scalars

357 'single-quoted-scalar': [

358 # include whitespace and line break rules

359 include('quoted-scalar-whitespaces'),

360 # escaping of the quote character

361 (r'\'\'', String.Escape),

362 # regular non-whitespace characters

363 (r'[^\s\']+', String),

364 # the closing quote

365 (r'\'', String, '#pop'),

366 ],

367

368 # double-quoted scalars

369 'double-quoted-scalar': [

370 # include whitespace and line break rules

371 include('quoted-scalar-whitespaces'),

372 # escaping of special characters

373 (r'\\[0abt\tn\nvfre "\\N_LP]', String),

374 # escape codes

375 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',

376 String.Escape),

377 # regular non-whitespace characters

378 (r'[^\s"\\]+', String),

379 # the closing quote

380 (r'"', String, '#pop'),

381 ],

382

383 # the beginning of a new line while scanning a plain scalar

384 'plain-scalar-in-block-context-new-line': [

385 # empty lines

386 (r'^[ ]+$', Whitespace),

387 # line breaks

388 (r'\n+', Whitespace),

389 # document start and document end indicators

390 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),

391 # indentation spaces (we may leave the block line state here)

392 (r'^[ ]*', parse_plain_scalar_indent(Whitespace), '#pop'),

393 ],

394

395 # a plain scalar in the block context

396 'plain-scalar-in-block-context': [

397 # the scalar ends with the ':' indicator

398 (r'[ ]*(?=:[ ]|:$)', something(Whitespace), '#pop'),

399 # the scalar ends with whitespaces followed by a comment

400 (r'[ ]+(?=#)', Whitespace, '#pop'),

401 # trailing whitespaces are ignored

402 (r'[ ]+$', Whitespace),

403 # line breaks are ignored

404 (r'\n+', Whitespace, 'plain-scalar-in-block-context-new-line'),

405 # other whitespaces are a part of the value

406 (r'[ ]+', Literal.Scalar.Plain),

407 # regular non-whitespace characters

408 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),

409 ],

410

411 # a plain scalar is the flow context

412 'plain-scalar-in-flow-context': [

413 # the scalar ends with an indicator character

414 (r'[ ]*(?=[,:?\[\]{}])', something(Whitespace), '#pop'),

415 # the scalar ends with a comment

416 (r'[ ]+(?=#)', Whitespace, '#pop'),

417 # leading and trailing whitespaces are ignored

418 (r'^[ ]+', Whitespace),

419 (r'[ ]+$', Whitespace),

420 # line breaks are ignored

421 (r'\n+', Whitespace),

422 # other whitespaces are a part of the value

423 (r'[ ]+', Name.Variable),

424 # regular non-whitespace characters

425 (r'[^\s,:?\[\]{}]+', Name.Variable),

426 ],

427

428 }

429

430 def get_tokens_unprocessed(self, text=None, context=None):

431 if context is None:

432 context = YamlLexerContext(text, 0)

433 return super().get_tokens_unprocessed(text, context)

434

435

436class JsonLexer(Lexer):

437 """

438 For JSON data structures.

439

440 Javascript-style comments are supported (like ``/* */`` and ``//``),

441 though comments are not part of the JSON specification.

442 This allows users to highlight JSON as it is used in the wild.

443

444 No validation is performed on the input JSON document.

445 """

446

447 name = 'JSON'

448 url = 'https://www.json.org'

449 aliases = ['json', 'json-object']

450 filenames = ['*.json', '*.jsonl', '*.ndjson', 'Pipfile.lock']

451 mimetypes = ['application/json', 'application/json-object', 'application/x-ndjson', 'application/jsonl', 'application/json-seq']

452 version_added = '1.5'

453

454 # No validation of integers, floats, or constants is done.

455 # As long as the characters are members of the following

456 # sets, the token will be considered valid. For example,

457 #

458 # "--1--" is parsed as an integer

459 # "1...eee" is parsed as a float

460 # "trustful" is parsed as a constant

461 #

462 integers = set('-0123456789')

463 floats = set('.eE+')

464 constants = set('truefalsenull') # true|false|null

465 hexadecimals = set('0123456789abcdefABCDEF')

466 punctuations = set('{}[],')

467 whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}

468

469 def get_tokens_unprocessed(self, text):

470 """Parse JSON data."""

471

472 in_string = False

473 in_escape = False

474 in_unicode_escape = 0

475 in_whitespace = False

476 in_constant = False

477 in_number = False

478 in_float = False

479 in_punctuation = False

480 in_comment_single = False

481 in_comment_multiline = False

482 expecting_second_comment_opener = False # // or /*

483 expecting_second_comment_closer = False # */

484

485 start = 0

486

487 # The queue is used to store data that may need to be tokenized

488 # differently based on what follows. In particular, JSON object

489 # keys are tokenized differently than string values, but cannot

490 # be distinguished until punctuation is encountered outside the

491 # string.

492 #

493 # A ":" character after the string indicates that the string is

494 # an object key; any other character indicates the string is a

495 # regular string value.

496 #

497 # The queue holds tuples that contain the following data:

498 #

499 # (start_index, token_type, text)

500 #

501 # By default the token type of text in double quotes is

502 # String.Double. The token type will be replaced if a colon

503 # is encountered after the string closes.

504 #

505 queue = []

506

507 for stop, character in enumerate(text):

508 if in_string:

509 if in_unicode_escape:

510 if character in self.hexadecimals:

511 in_unicode_escape -= 1

512 if not in_unicode_escape:

513 in_escape = False

514 else:

515 in_unicode_escape = 0

516 in_escape = False

517

518 elif in_escape:

519 if character == 'u':

520 in_unicode_escape = 4

521 else:

522 in_escape = False

523

524 elif character == '\\':

525 in_escape = True

526

527 elif character == '"':

528 queue.append((start, String.Double, text[start:stop + 1]))

529 in_string = False

530 in_escape = False

531 in_unicode_escape = 0

532

533 continue

534

535 elif in_whitespace:

536 if character in self.whitespaces:

537 continue

538

539 if queue:

540 queue.append((start, Whitespace, text[start:stop]))

541 else:

542 yield start, Whitespace, text[start:stop]

543 in_whitespace = False

544 # Fall through so the new character can be evaluated.

545

546 elif in_constant:

547 if character in self.constants:

548 continue

549

550 yield start, Keyword.Constant, text[start:stop]

551 in_constant = False

552 # Fall through so the new character can be evaluated.

553

554 elif in_number:

555 if character in self.integers:

556 continue

557 elif character in self.floats:

558 in_float = True

559 continue

560

561 if in_float:

562 yield start, Number.Float, text[start:stop]

563 else:

564 yield start, Number.Integer, text[start:stop]

565 in_number = False

566 in_float = False

567 # Fall through so the new character can be evaluated.

568

569 elif in_punctuation:

570 if character in self.punctuations:

571 continue

572

573 yield start, Punctuation, text[start:stop]

574 in_punctuation = False

575 # Fall through so the new character can be evaluated.

576

577 elif in_comment_single:

578 if character != '\n':

579 continue

580

581 if queue:

582 queue.append((start, Comment.Single, text[start:stop]))

583 else:

584 yield start, Comment.Single, text[start:stop]

585

586 in_comment_single = False

587 # Fall through so the new character can be evaluated.

588

589 elif in_comment_multiline:

590 if character == '*':

591 expecting_second_comment_closer = True

592 elif expecting_second_comment_closer:

593 expecting_second_comment_closer = False

594 if character == '/':

595 if queue:

596 queue.append((start, Comment.Multiline, text[start:stop + 1]))

597 else:

598 yield start, Comment.Multiline, text[start:stop + 1]

599

600 in_comment_multiline = False

601

602 continue

603

604 elif expecting_second_comment_opener:

605 expecting_second_comment_opener = False

606 if character == '/':

607 in_comment_single = True

608 continue

609 elif character == '*':

610 in_comment_multiline = True

611 continue

612

613 # Exhaust the queue. Accept the existing token types.

614 yield from queue

615 queue.clear()

616

617 yield start, Error, text[start:stop]

618 # Fall through so the new character can be evaluated.

619

620 start = stop

621

622 if character == '"':

623 in_string = True

624

625 elif character in self.whitespaces:

626 in_whitespace = True

627

628 elif character in {'f', 'n', 't'}: # The first letters of true|false|null

629 # Exhaust the queue. Accept the existing token types.

630 yield from queue

631 queue.clear()

632

633 in_constant = True

634

635 elif character in self.integers:

636 # Exhaust the queue. Accept the existing token types.

637 yield from queue

638 queue.clear()

639

640 in_number = True

641

642 elif character == ':':

643 # Yield from the queue. Replace string token types.

644 for _start, _token, _text in queue:

645 # There can be only three types of tokens before a ':':

646 # Whitespace, Comment, or a quoted string.

647 #

648 # If it's a quoted string we emit Name.Tag.

649 # Otherwise, we yield the original token.

650 #

651 # In all other cases this would be invalid JSON,

652 # but this is not a validating JSON lexer, so it's OK.

653 if _token is String.Double:

654 yield _start, Name.Tag, _text

655 else:

656 yield _start, _token, _text

657 queue.clear()

658

659 in_punctuation = True

660

661 elif character in self.punctuations:

662 # Exhaust the queue. Accept the existing token types.

663 yield from queue

664 queue.clear()

665

666 in_punctuation = True

667

668 elif character == '/':

669 # This is the beginning of a comment.

670 expecting_second_comment_opener = True

671

672 else:

673 # Exhaust the queue. Accept the existing token types.

674 yield from queue

675 queue.clear()

676

677 yield start, Error, character

678

679 # Yield any remaining text.

680 yield from queue

681 if in_string:

682 yield start, Error, text[start:]

683 elif in_float:

684 yield start, Number.Float, text[start:]

685 elif in_number:

686 yield start, Number.Integer, text[start:]

687 elif in_constant:

688 yield start, Keyword.Constant, text[start:]

689 elif in_whitespace:

690 yield start, Whitespace, text[start:]

691 elif in_punctuation:

692 yield start, Punctuation, text[start:]

693 elif in_comment_single:

694 yield start, Comment.Single, text[start:]

695 elif in_comment_multiline:

696 yield start, Error, text[start:]

697 elif expecting_second_comment_opener:

698 yield start, Error, text[start:]

699

700

701class JsonBareObjectLexer(JsonLexer):

702 """

703 For JSON data structures (with missing object curly braces).

704

705 .. deprecated:: 2.8.0

706

707 Behaves the same as `JsonLexer` now.

708 """

709

710 name = 'JSONBareObject'

711 aliases = []

712 filenames = []

713 mimetypes = []

714 version_added = '2.2'

715

716

717class JsonLdLexer(JsonLexer):

718 """

719 For JSON-LD linked data.

720 """

721

722 name = 'JSON-LD'

723 url = 'https://json-ld.org/'

724 aliases = ['jsonld', 'json-ld']

725 filenames = ['*.jsonld']

726 mimetypes = ['application/ld+json']

727 version_added = '2.0'

728

729 json_ld_keywords = {

730 f'"@{keyword}"'

731 for keyword in (

732 'base',

733 'container',

734 'context',

735 'direction',

736 'graph',

737 'id',

738 'import',

739 'included',

740 'index',

741 'json',

742 'language',

743 'list',

744 'nest',

745 'none',

746 'prefix',

747 'propagate',

748 'protected',

749 'reverse',

750 'set',

751 'type',

752 'value',

753 'version',

754 'vocab',

755 )

756 }

757

758 def get_tokens_unprocessed(self, text):

759 for start, token, value in super().get_tokens_unprocessed(text):

760 if token is Name.Tag and value in self.json_ld_keywords:

761 yield start, Name.Decorator, value

762 else:

763 yield start, token, value