Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/data.py: 40%

1"""

2 pygments.lexers.data

3 ~~~~~~~~~~~~~~~~~~~~

5 Lexers for data file format.

8 :license: BSD, see LICENSE for details.

9"""

11from pygments.lexer import Lexer, ExtendedRegexLexer, LexerContext, \

12 include, bygroups

13from pygments.token import Comment, Error, Keyword, Literal, Name, Number, \

14 Punctuation, String, Whitespace

16__all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer']

19class YamlLexerContext(LexerContext):

20 """Indentation context for the YAML lexer."""

22 def __init__(self, *args, **kwds):

23 super().__init__(*args, **kwds)

24 self.indent_stack = []

25 self.indent = -1

26 self.next_indent = 0

27 self.block_scalar_indent = None

30class YamlLexer(ExtendedRegexLexer):

31 """

32 Lexer for YAML, a human-friendly data serialization

33 language.

35 .. versionadded:: 0.11

36 """

38 name = 'YAML'

39 url = 'http://yaml.org/'

40 aliases = ['yaml']

41 filenames = ['*.yaml', '*.yml']

42 mimetypes = ['text/x-yaml']

44 def something(token_class):

45 """Do not produce empty tokens."""

46 def callback(lexer, match, context):

47 text = match.group()

48 if not text:

49 return

50 yield match.start(), token_class, text

51 context.pos = match.end()

52 return callback

54 def reset_indent(token_class):

55 """Reset the indentation levels."""

56 def callback(lexer, match, context):

57 text = match.group()

58 context.indent_stack = []

59 context.indent = -1

60 context.next_indent = 0

61 context.block_scalar_indent = None

62 yield match.start(), token_class, text

63 context.pos = match.end()

64 return callback

66 def save_indent(token_class, start=False):

67 """Save a possible indentation level."""

68 def callback(lexer, match, context):

69 text = match.group()

70 extra = ''

71 if start:

72 context.next_indent = len(text)

73 if context.next_indent < context.indent:

74 while context.next_indent < context.indent:

75 context.indent = context.indent_stack.pop()

76 if context.next_indent > context.indent:

77 extra = text[context.indent:]

78 text = text[:context.indent]

79 else:

80 context.next_indent += len(text)

81 if text:

82 yield match.start(), token_class, text

83 if extra:

84 yield match.start()+len(text), token_class.Error, extra

85 context.pos = match.end()

86 return callback

88 def set_indent(token_class, implicit=False):

89 """Set the previously saved indentation level."""

90 def callback(lexer, match, context):

91 text = match.group()

92 if context.indent < context.next_indent:

93 context.indent_stack.append(context.indent)

94 context.indent = context.next_indent

95 if not implicit:

96 context.next_indent += len(text)

97 yield match.start(), token_class, text

98 context.pos = match.end()

99 return callback

100

101 def set_block_scalar_indent(token_class):

102 """Set an explicit indentation level for a block scalar."""

103 def callback(lexer, match, context):

104 text = match.group()

105 context.block_scalar_indent = None

106 if not text:

107 return

108 increment = match.group(1)

109 if increment:

110 current_indent = max(context.indent, 0)

111 increment = int(increment)

112 context.block_scalar_indent = current_indent + increment

113 if text:

114 yield match.start(), token_class, text

115 context.pos = match.end()

116 return callback

117

118 def parse_block_scalar_empty_line(indent_token_class, content_token_class):

119 """Process an empty line in a block scalar."""

120 def callback(lexer, match, context):

121 text = match.group()

122 if (context.block_scalar_indent is None or

123 len(text) <= context.block_scalar_indent):

124 if text:

125 yield match.start(), indent_token_class, text

126 else:

127 indentation = text[:context.block_scalar_indent]

128 content = text[context.block_scalar_indent:]

129 yield match.start(), indent_token_class, indentation

130 yield (match.start()+context.block_scalar_indent,

131 content_token_class, content)

132 context.pos = match.end()

133 return callback

134

135 def parse_block_scalar_indent(token_class):

136 """Process indentation spaces in a block scalar."""

137 def callback(lexer, match, context):

138 text = match.group()

139 if context.block_scalar_indent is None:

140 if len(text) <= max(context.indent, 0):

141 context.stack.pop()

142 context.stack.pop()

143 return

144 context.block_scalar_indent = len(text)

145 else:

146 if len(text) < context.block_scalar_indent:

147 context.stack.pop()

148 context.stack.pop()

149 return

150 if text:

151 yield match.start(), token_class, text

152 context.pos = match.end()

153 return callback

154

155 def parse_plain_scalar_indent(token_class):

156 """Process indentation spaces in a plain scalar."""

157 def callback(lexer, match, context):

158 text = match.group()

159 if len(text) <= context.indent:

160 context.stack.pop()

161 context.stack.pop()

162 return

163 if text:

164 yield match.start(), token_class, text

165 context.pos = match.end()

166 return callback

167

168 tokens = {

169 # the root rules

170 'root': [

171 # ignored whitespaces

172 (r'[ ]+(?=#|$)', Whitespace),

173 # line breaks

174 (r'\n+', Whitespace),

175 # a comment

176 (r'#[^\n]*', Comment.Single),

177 # the '%YAML' directive

178 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),

179 # the %TAG directive

180 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),

181 # document start and document end indicators

182 (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),

183 'block-line'),

184 # indentation spaces

185 (r'[ ]*(?!\s|$)', save_indent(Whitespace, start=True),

186 ('block-line', 'indentation')),

187 ],

188

189 # trailing whitespaces after directives or a block scalar indicator

190 'ignored-line': [

191 # ignored whitespaces

192 (r'[ ]+(?=#|$)', Whitespace),

193 # a comment

194 (r'#[^\n]*', Comment.Single),

195 # line break

196 (r'\n', Whitespace, '#pop:2'),

197 ],

198

199 # the %YAML directive

200 'yaml-directive': [

201 # the version number

202 (r'([ ]+)([0-9]+\.[0-9]+)',

203 bygroups(Whitespace, Number), 'ignored-line'),

204 ],

205

206 # the %TAG directive

207 'tag-directive': [

208 # a tag handle and the corresponding prefix

209 (r'([ ]+)(!|![\w-]*!)'

210 r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',

211 bygroups(Whitespace, Keyword.Type, Whitespace, Keyword.Type),

212 'ignored-line'),

213 ],

214

215 # block scalar indicators and indentation spaces

216 'indentation': [

217 # trailing whitespaces are ignored

218 (r'[ ]*$', something(Whitespace), '#pop:2'),

219 # whitespaces preceding block collection indicators

220 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Whitespace)),

221 # block collection indicators

222 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),

223 # the beginning a block line

224 (r'[ ]*', save_indent(Whitespace), '#pop'),

225 ],

226

227 # an indented line in the block context

228 'block-line': [

229 # the line end

230 (r'[ ]*(?=#|$)', something(Whitespace), '#pop'),

231 # whitespaces separating tokens

232 (r'[ ]+', Whitespace),

233 # key with colon

234 (r'''([^#,?\[\]{}"'\n]+)(:)(?=[ ]|$)''',

235 bygroups(Name.Tag, set_indent(Punctuation, implicit=True))),

236 # tags, anchors and aliases,

237 include('descriptors'),

238 # block collections and scalars

239 include('block-nodes'),

240 # flow collections and quoted scalars

241 include('flow-nodes'),

242 # a plain scalar

243 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',

244 something(Name.Variable),

245 'plain-scalar-in-block-context'),

246 ],

247

248 # tags, anchors, aliases

249 'descriptors': [

250 # a full-form tag

251 (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),

252 # a tag in the form '!', '!suffix' or '!handle!suffix'

253 (r'!(?:[\w-]+!)?'

254 r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type),

255 # an anchor

256 (r'&[\w-]+', Name.Label),

257 # an alias

258 (r'\*[\w-]+', Name.Variable),

259 ],

260

261 # block collections and scalars

262 'block-nodes': [

263 # implicit key

264 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),

265 # literal and folded scalars

266 (r'[|>]', Punctuation.Indicator,

267 ('block-scalar-content', 'block-scalar-header')),

268 ],

269

270 # flow collections and quoted scalars

271 'flow-nodes': [

272 # a flow sequence

273 (r'\[', Punctuation.Indicator, 'flow-sequence'),

274 # a flow mapping

275 (r'\{', Punctuation.Indicator, 'flow-mapping'),

276 # a single-quoted scalar

277 (r'\'', String, 'single-quoted-scalar'),

278 # a double-quoted scalar

279 (r'\"', String, 'double-quoted-scalar'),

280 ],

281

282 # the content of a flow collection

283 'flow-collection': [

284 # whitespaces

285 (r'[ ]+', Whitespace),

286 # line breaks

287 (r'\n+', Whitespace),

288 # a comment

289 (r'#[^\n]*', Comment.Single),

290 # simple indicators

291 (r'[?:,]', Punctuation.Indicator),

292 # tags, anchors and aliases

293 include('descriptors'),

294 # nested collections and quoted scalars

295 include('flow-nodes'),

296 # a plain scalar

297 (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',

298 something(Name.Variable),

299 'plain-scalar-in-flow-context'),

300 ],

301

302 # a flow sequence indicated by '[' and ']'

303 'flow-sequence': [

304 # include flow collection rules

305 include('flow-collection'),

306 # the closing indicator

307 (r'\]', Punctuation.Indicator, '#pop'),

308 ],

309

310 # a flow mapping indicated by '{' and '}'

311 'flow-mapping': [

312 # key with colon

313 (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''',

314 bygroups(Name.Tag, Punctuation)),

315 # include flow collection rules

316 include('flow-collection'),

317 # the closing indicator

318 (r'\}', Punctuation.Indicator, '#pop'),

319 ],

320

321 # block scalar lines

322 'block-scalar-content': [

323 # line break

324 (r'\n', Whitespace),

325 # empty line

326 (r'^[ ]+$',

327 parse_block_scalar_empty_line(Whitespace, Name.Constant)),

328 # indentation spaces (we may leave the state here)

329 (r'^[ ]*', parse_block_scalar_indent(Whitespace)),

330 # line content

331 (r'[\S\t ]+', Name.Constant),

332 ],

333

334 # the content of a literal or folded scalar

335 'block-scalar-header': [

336 # indentation indicator followed by chomping flag

337 (r'([1-9])?[+-]?(?=[ ]|$)',

338 set_block_scalar_indent(Punctuation.Indicator),

339 'ignored-line'),

340 # chomping flag followed by indentation indicator

341 (r'[+-]?([1-9])?(?=[ ]|$)',

342 set_block_scalar_indent(Punctuation.Indicator),

343 'ignored-line'),

344 ],

345

346 # ignored and regular whitespaces in quoted scalars

347 'quoted-scalar-whitespaces': [

348 # leading and trailing whitespaces are ignored

349 (r'^[ ]+', Whitespace),

350 (r'[ ]+$', Whitespace),

351 # line breaks are ignored

352 (r'\n+', Whitespace),

353 # other whitespaces are a part of the value

354 (r'[ ]+', Name.Variable),

355 ],

356

357 # single-quoted scalars

358 'single-quoted-scalar': [

359 # include whitespace and line break rules

360 include('quoted-scalar-whitespaces'),

361 # escaping of the quote character

362 (r'\'\'', String.Escape),

363 # regular non-whitespace characters

364 (r'[^\s\']+', String),

365 # the closing quote

366 (r'\'', String, '#pop'),

367 ],

368

369 # double-quoted scalars

370 'double-quoted-scalar': [

371 # include whitespace and line break rules

372 include('quoted-scalar-whitespaces'),

373 # escaping of special characters

374 (r'\\[0abt\tn\nvfre "\\N_LP]', String),

375 # escape codes

376 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',

377 String.Escape),

378 # regular non-whitespace characters

379 (r'[^\s"\\]+', String),

380 # the closing quote

381 (r'"', String, '#pop'),

382 ],

383

384 # the beginning of a new line while scanning a plain scalar

385 'plain-scalar-in-block-context-new-line': [

386 # empty lines

387 (r'^[ ]+$', Whitespace),

388 # line breaks

389 (r'\n+', Whitespace),

390 # document start and document end indicators

391 (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),

392 # indentation spaces (we may leave the block line state here)

393 (r'^[ ]*', parse_plain_scalar_indent(Whitespace), '#pop'),

394 ],

395

396 # a plain scalar in the block context

397 'plain-scalar-in-block-context': [

398 # the scalar ends with the ':' indicator

399 (r'[ ]*(?=:[ ]|:$)', something(Whitespace), '#pop'),

400 # the scalar ends with whitespaces followed by a comment

401 (r'[ ]+(?=#)', Whitespace, '#pop'),

402 # trailing whitespaces are ignored

403 (r'[ ]+$', Whitespace),

404 # line breaks are ignored

405 (r'\n+', Whitespace, 'plain-scalar-in-block-context-new-line'),

406 # other whitespaces are a part of the value

407 (r'[ ]+', Literal.Scalar.Plain),

408 # regular non-whitespace characters

409 (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),

410 ],

411

412 # a plain scalar is the flow context

413 'plain-scalar-in-flow-context': [

414 # the scalar ends with an indicator character

415 (r'[ ]*(?=[,:?\[\]{}])', something(Whitespace), '#pop'),

416 # the scalar ends with a comment

417 (r'[ ]+(?=#)', Whitespace, '#pop'),

418 # leading and trailing whitespaces are ignored

419 (r'^[ ]+', Whitespace),

420 (r'[ ]+$', Whitespace),

421 # line breaks are ignored

422 (r'\n+', Whitespace),

423 # other whitespaces are a part of the value

424 (r'[ ]+', Name.Variable),

425 # regular non-whitespace characters

426 (r'[^\s,:?\[\]{}]+', Name.Variable),

427 ],

428

429 }

430

431 def get_tokens_unprocessed(self, text=None, context=None):

432 if context is None:

433 context = YamlLexerContext(text, 0)

434 return super().get_tokens_unprocessed(text, context)

435

436

437class JsonLexer(Lexer):

438 """

439 For JSON data structures.

440

441 Javascript-style comments are supported (like ``/* */`` and ``//``),

442 though comments are not part of the JSON specification.

443 This allows users to highlight JSON as it is used in the wild.

444

445 No validation is performed on the input JSON document.

446

447 .. versionadded:: 1.5

448 """

449

450 name = 'JSON'

451 url = 'https://www.json.org'

452 aliases = ['json', 'json-object']

453 filenames = ['*.json', 'Pipfile.lock']

454 mimetypes = ['application/json', 'application/json-object']

455

456 # No validation of integers, floats, or constants is done.

457 # As long as the characters are members of the following

458 # sets, the token will be considered valid. For example,

459 #

460 # "--1--" is parsed as an integer

461 # "1...eee" is parsed as a float

462 # "trustful" is parsed as a constant

463 #

464 integers = set('-0123456789')

465 floats = set('.eE+')

466 constants = set('truefalsenull') # true|false|null

467 hexadecimals = set('0123456789abcdefABCDEF')

468 punctuations = set('{}[],')

469 whitespaces = {'\u0020', '\u000a', '\u000d', '\u0009'}

470

471 def get_tokens_unprocessed(self, text):

472 """Parse JSON data."""

473

474 in_string = False

475 in_escape = False

476 in_unicode_escape = 0

477 in_whitespace = False

478 in_constant = False

479 in_number = False

480 in_float = False

481 in_punctuation = False

482 in_comment_single = False

483 in_comment_multiline = False

484 expecting_second_comment_opener = False # // or /*

485 expecting_second_comment_closer = False # */

486

487 start = 0

488

489 # The queue is used to store data that may need to be tokenized

490 # differently based on what follows. In particular, JSON object

491 # keys are tokenized differently than string values, but cannot

492 # be distinguished until punctuation is encountered outside the

493 # string.

494 #

495 # A ":" character after the string indicates that the string is

496 # an object key; any other character indicates the string is a

497 # regular string value.

498 #

499 # The queue holds tuples that contain the following data:

500 #

501 # (start_index, token_type, text)

502 #

503 # By default the token type of text in double quotes is

504 # String.Double. The token type will be replaced if a colon

505 # is encountered after the string closes.

506 #

507 queue = []

508

509 for stop, character in enumerate(text):

510 if in_string:

511 if in_unicode_escape:

512 if character in self.hexadecimals:

513 in_unicode_escape -= 1

514 if not in_unicode_escape:

515 in_escape = False

516 else:

517 in_unicode_escape = 0

518 in_escape = False

519

520 elif in_escape:

521 if character == 'u':

522 in_unicode_escape = 4

523 else:

524 in_escape = False

525

526 elif character == '\\':

527 in_escape = True

528

529 elif character == '"':

530 queue.append((start, String.Double, text[start:stop + 1]))

531 in_string = False

532 in_escape = False

533 in_unicode_escape = 0

534

535 continue

536

537 elif in_whitespace:

538 if character in self.whitespaces:

539 continue

540

541 if queue:

542 queue.append((start, Whitespace, text[start:stop]))

543 else:

544 yield start, Whitespace, text[start:stop]

545 in_whitespace = False

546 # Fall through so the new character can be evaluated.

547

548 elif in_constant:

549 if character in self.constants:

550 continue

551

552 yield start, Keyword.Constant, text[start:stop]

553 in_constant = False

554 # Fall through so the new character can be evaluated.

555

556 elif in_number:

557 if character in self.integers:

558 continue

559 elif character in self.floats:

560 in_float = True

561 continue

562

563 if in_float:

564 yield start, Number.Float, text[start:stop]

565 else:

566 yield start, Number.Integer, text[start:stop]

567 in_number = False

568 in_float = False

569 # Fall through so the new character can be evaluated.

570

571 elif in_punctuation:

572 if character in self.punctuations:

573 continue

574

575 yield start, Punctuation, text[start:stop]

576 in_punctuation = False

577 # Fall through so the new character can be evaluated.

578

579 elif in_comment_single:

580 if character != '\n':

581 continue

582

583 if queue:

584 queue.append((start, Comment.Single, text[start:stop]))

585 else:

586 yield start, Comment.Single, text[start:stop]

587

588 in_comment_single = False

589 # Fall through so the new character can be evaluated.

590

591 elif in_comment_multiline:

592 if character == '*':

593 expecting_second_comment_closer = True

594 elif expecting_second_comment_closer:

595 expecting_second_comment_closer = False

596 if character == '/':

597 if queue:

598 queue.append((start, Comment.Multiline, text[start:stop + 1]))

599 else:

600 yield start, Comment.Multiline, text[start:stop + 1]

601

602 in_comment_multiline = False

603

604 continue

605

606 elif expecting_second_comment_opener:

607 expecting_second_comment_opener = False

608 if character == '/':

609 in_comment_single = True

610 continue

611 elif character == '*':

612 in_comment_multiline = True

613 continue

614

615 # Exhaust the queue. Accept the existing token types.

616 yield from queue

617 queue.clear()

618

619 yield start, Error, text[start:stop]

620 # Fall through so the new character can be evaluated.

621

622 start = stop

623

624 if character == '"':

625 in_string = True

626

627 elif character in self.whitespaces:

628 in_whitespace = True

629

630 elif character in {'f', 'n', 't'}: # The first letters of true|false|null

631 # Exhaust the queue. Accept the existing token types.

632 yield from queue

633 queue.clear()

634

635 in_constant = True

636

637 elif character in self.integers:

638 # Exhaust the queue. Accept the existing token types.

639 yield from queue

640 queue.clear()

641

642 in_number = True

643

644 elif character == ':':

645 # Yield from the queue. Replace string token types.

646 for _start, _token, _text in queue:

647 # There can be only three types of tokens before a ':':

648 # Whitespace, Comment, or a quoted string.

649 #

650 # If it's a quoted string we emit Name.Tag.

651 # Otherwise, we yield the original token.

652 #

653 # In all other cases this would be invalid JSON,

654 # but this is not a validating JSON lexer, so it's OK.

655 if _token is String.Double:

656 yield _start, Name.Tag, _text

657 else:

658 yield _start, _token, _text

659 queue.clear()

660

661 in_punctuation = True

662

663 elif character in self.punctuations:

664 # Exhaust the queue. Accept the existing token types.

665 yield from queue

666 queue.clear()

667

668 in_punctuation = True

669

670 elif character == '/':

671 # This is the beginning of a comment.

672 expecting_second_comment_opener = True

673

674 else:

675 # Exhaust the queue. Accept the existing token types.

676 yield from queue

677 queue.clear()

678

679 yield start, Error, character

680

681 # Yield any remaining text.

682 yield from queue

683 if in_string:

684 yield start, Error, text[start:]

685 elif in_float:

686 yield start, Number.Float, text[start:]

687 elif in_number:

688 yield start, Number.Integer, text[start:]

689 elif in_constant:

690 yield start, Keyword.Constant, text[start:]

691 elif in_whitespace:

692 yield start, Whitespace, text[start:]

693 elif in_punctuation:

694 yield start, Punctuation, text[start:]

695 elif in_comment_single:

696 yield start, Comment.Single, text[start:]

697 elif in_comment_multiline:

698 yield start, Error, text[start:]

699 elif expecting_second_comment_opener:

700 yield start, Error, text[start:]

701

702

703class JsonBareObjectLexer(JsonLexer):

704 """

705 For JSON data structures (with missing object curly braces).

706

707 .. versionadded:: 2.2

708

709 .. deprecated:: 2.8.0

710

711 Behaves the same as `JsonLexer` now.

712 """

713

714 name = 'JSONBareObject'

715 aliases = []

716 filenames = []

717 mimetypes = []

718

719

720class JsonLdLexer(JsonLexer):

721 """

722 For JSON-LD linked data.

723

724 .. versionadded:: 2.0

725 """

726

727 name = 'JSON-LD'

728 url = 'https://json-ld.org/'

729 aliases = ['jsonld', 'json-ld']

730 filenames = ['*.jsonld']

731 mimetypes = ['application/ld+json']

732

733 json_ld_keywords = {

734 '"@%s"' % keyword

735 for keyword in (

736 'base',

737 'container',

738 'context',

739 'direction',

740 'graph',

741 'id',

742 'import',

743 'included',

744 'index',

745 'json',

746 'language',

747 'list',

748 'nest',

749 'none',

750 'prefix',

751 'propagate',

752 'protected',

753 'reverse',

754 'set',

755 'type',

756 'value',

757 'version',

758 'vocab',

759 )

760 }

761

762 def get_tokens_unprocessed(self, text):

763 for start, token, value in super().get_tokens_unprocessed(text):

764 if token is Name.Tag and value in self.json_ld_keywords:

765 yield start, Name.Decorator, value

766 else:

767 yield start, token, value