Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/protobuf/text

1# Protocol Buffers - Google's data interchange format

4# Use of this source code is governed by a BSD-style

5# license that can be found in the LICENSE file or at

6# https://developers.google.com/open-source/licenses/bsd

8"""Contains routines for printing protocol messages in text format.

10Simple usage example::

12 # Create a proto object and serialize it to a text proto string.

13 message = my_proto_pb2.MyMessage(foo='bar')

14 text_proto = text_format.MessageToString(message)

16 # Parse a text proto string.

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())

18"""

20__author__ = 'kenton@google.com (Kenton Varda)'

22# TODO Import thread contention leads to test failures.

23import encodings.raw_unicode_escape # pylint: disable=unused-import

24import encodings.unicode_escape # pylint: disable=unused-import

25import io

26import math

27import re

28import warnings

30from google.protobuf.internal import decoder

31from google.protobuf.internal import type_checkers

32from google.protobuf import descriptor

33from google.protobuf import text_encoding

34from google.protobuf import unknown_fields

36# pylint: disable=g-import-not-at-top

37__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField',

38 'PrintFieldValue', 'Merge', 'MessageToBytes']

40_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

41 type_checkers.Int32ValueChecker(),

42 type_checkers.Uint64ValueChecker(),

43 type_checkers.Int64ValueChecker())

44_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE)

45_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE)

46_FLOAT_OCTAL_PREFIX = re.compile('-?0[0-9]+')

47_QUOTES = frozenset(("'", '"'))

48_ANY_FULL_TYPE_NAME = 'google.protobuf.Any'

49_DEBUG_STRING_SILENT_MARKER = '\t '

51_as_utf8_default = True

54class Error(Exception):

55 """Top-level module error for text_format."""

58class ParseError(Error):

59 """Thrown in case of text parsing or tokenizing error."""

61 def __init__(self, message=None, line=None, column=None):

62 if message is not None and line is not None:

63 loc = str(line)

64 if column is not None:

65 loc += ':{0}'.format(column)

66 message = '{0} : {1}'.format(loc, message)

67 if message is not None:

68 super(ParseError, self).__init__(message)

69 else:

70 super(ParseError, self).__init__()

71 self._line = line

72 self._column = column

74 def GetLine(self):

75 return self._line

77 def GetColumn(self):

78 return self._column

81class TextWriter(object):

83 def __init__(self, as_utf8):

84 self._writer = io.StringIO()

86 def write(self, val):

87 return self._writer.write(val)

89 def close(self):

90 return self._writer.close()

92 def getvalue(self):

93 return self._writer.getvalue()

96def MessageToString(

97 message,

98 as_utf8=_as_utf8_default,

99 as_one_line=False,

100 use_short_repeated_primitives=False,

101 pointy_brackets=False,

102 use_index_order=False,

103 float_format=None,

104 double_format=None,

105 use_field_number=False,

106 descriptor_pool=None,

107 indent=0,

108 message_formatter=None,

109 print_unknown_fields=False,

110 force_colon=False) -> str:

111 """Convert protobuf message to text format.

112

113 Double values can be formatted compactly with 15 digits of

114 precision (which is the most that IEEE 754 "double" can guarantee)

115 using double_format='.15g'. To ensure that converting to text and back to a

116 proto will result in an identical value, double_format='.17g' should be used.

117

118 Args:

119 message: The protocol buffers message.

120 as_utf8: Return unescaped Unicode for non-ASCII characters.

121 as_one_line: Don't introduce newlines between fields.

122 use_short_repeated_primitives: Use short repeated format for primitives.

123 pointy_brackets: If True, use angle brackets instead of curly braces for

124 nesting.

125 use_index_order: If True, fields of a proto message will be printed using

126 the order defined in source code instead of the field number, extensions

127 will be printed at the end of the message and their relative order is

128 determined by the extension number. By default, use the field number

129 order.

130 float_format (str): Deprecated. If set, use this to specify float field

131 formatting (per the "Format Specification Mini-Language"); otherwise,

132 shortest float that has same value in wire will be printed. Also affect

133 double field if double_format is not set but float_format is set.

134 double_format (str): Deprecated. If set, use this to specify double field

135 formatting (per the "Format Specification Mini-Language"); if it is not

136 set but float_format is set, use float_format. Otherwise, use ``str()``

137 use_field_number: If True, print field numbers instead of names.

138 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

139 indent (int): The initial indent level, in terms of spaces, for pretty

140 print.

141 message_formatter (function(message, indent, as_one_line) -> unicode|None):

142 Custom formatter for selected sub-messages (usually based on message

143 type). Use to pretty print parts of the protobuf for easier diffing.

144 print_unknown_fields: If True, unknown fields will be printed.

145 force_colon: If set, a colon will be added after the field name even if the

146 field is a proto message.

147

148 Returns:

149 str: A string of the text formatted protocol buffer message.

150 """

151 out = TextWriter(as_utf8)

152 printer = _Printer(

153 out,

154 indent,

155 as_utf8,

156 as_one_line,

157 use_short_repeated_primitives,

158 pointy_brackets,

159 use_index_order,

160 float_format,

161 double_format,

162 use_field_number,

163 descriptor_pool,

164 message_formatter,

165 print_unknown_fields=print_unknown_fields,

166 force_colon=force_colon)

167 printer.PrintMessage(message)

168 result = out.getvalue()

169 out.close()

170 if as_one_line:

171 return result.rstrip()

172 return result

173

174

175def MessageToBytes(message, **kwargs) -> bytes:

176 """Convert protobuf message to encoded text format. See MessageToString."""

177 text = MessageToString(message, **kwargs)

178 if isinstance(text, bytes):

179 return text

180 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii'

181 return text.encode(codec)

182

183

184def _IsMapEntry(field):

185 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

186 field.message_type.has_options and

187 field.message_type.GetOptions().map_entry)

188

189

190def _IsGroupLike(field):

191 """Determines if a field is consistent with a proto2 group.

192

193 Args:

194 field: The field descriptor.

195

196 Returns:

197 True if this field is group-like, false otherwise.

198 """

199 # Groups are always tag-delimited.

200 if field.type != descriptor.FieldDescriptor.TYPE_GROUP:

201 return False

202

203 # Group fields always are always the lowercase type name.

204 if field.name != field.message_type.name.lower():

205 return False

206

207 if field.message_type.file != field.file:

208 return False

209

210 # Group messages are always defined in the same scope as the field. File

211 # level extensions will compare NULL == NULL here, which is why the file

212 # comparison above is necessary to ensure both come from the same file.

213 return (

214 field.message_type.containing_type == field.extension_scope

215 if field.is_extension

216 else field.message_type.containing_type == field.containing_type

217 )

218

219

220def PrintMessage(message,

221 out,

222 indent=0,

223 as_utf8=_as_utf8_default,

224 as_one_line=False,

225 use_short_repeated_primitives=False,

226 pointy_brackets=False,

227 use_index_order=False,

228 float_format=None,

229 double_format=None,

230 use_field_number=False,

231 descriptor_pool=None,

232 message_formatter=None,

233 print_unknown_fields=False,

234 force_colon=False):

235 """Convert the message to text format and write it to the out stream.

236

237 Args:

238 message: The Message object to convert to text format.

239 out: A file handle to write the message to.

240 indent: The initial indent level for pretty print.

241 as_utf8: Return unescaped Unicode for non-ASCII characters.

242 as_one_line: Don't introduce newlines between fields.

243 use_short_repeated_primitives: Use short repeated format for primitives.

244 pointy_brackets: If True, use angle brackets instead of curly braces for

245 nesting.

246 use_index_order: If True, print fields of a proto message using the order

247 defined in source code instead of the field number. By default, use the

248 field number order.

249 float_format: If set, use this to specify float field formatting

250 (per the "Format Specification Mini-Language"); otherwise, shortest

251 float that has same value in wire will be printed. Also affect double

252 field if double_format is not set but float_format is set.

253 double_format: If set, use this to specify double field formatting

254 (per the "Format Specification Mini-Language"); if it is not set but

255 float_format is set, use float_format. Otherwise, str() is used.

256 use_field_number: If True, print field numbers instead of names.

257 descriptor_pool: A DescriptorPool used to resolve Any types.

258 message_formatter: A function(message, indent, as_one_line): unicode|None

259 to custom format selected sub-messages (usually based on message type).

260 Use to pretty print parts of the protobuf for easier diffing.

261 print_unknown_fields: If True, unknown fields will be printed.

262 force_colon: If set, a colon will be added after the field name even if

263 the field is a proto message.

264 """

265 printer = _Printer(

266 out=out, indent=indent, as_utf8=as_utf8,

267 as_one_line=as_one_line,

268 use_short_repeated_primitives=use_short_repeated_primitives,

269 pointy_brackets=pointy_brackets,

270 use_index_order=use_index_order,

271 float_format=float_format,

272 double_format=double_format,

273 use_field_number=use_field_number,

274 descriptor_pool=descriptor_pool,

275 message_formatter=message_formatter,

276 print_unknown_fields=print_unknown_fields,

277 force_colon=force_colon)

278 printer.PrintMessage(message)

279

280

281def PrintField(field,

282 value,

283 out,

284 indent=0,

285 as_utf8=_as_utf8_default,

286 as_one_line=False,

287 use_short_repeated_primitives=False,

288 pointy_brackets=False,

289 use_index_order=False,

290 float_format=None,

291 double_format=None,

292 message_formatter=None,

293 print_unknown_fields=False,

294 force_colon=False):

295 """Print a single field name/value pair."""

296 printer = _Printer(out, indent, as_utf8, as_one_line,

297 use_short_repeated_primitives, pointy_brackets,

298 use_index_order, float_format, double_format,

299 message_formatter=message_formatter,

300 print_unknown_fields=print_unknown_fields,

301 force_colon=force_colon)

302 printer.PrintField(field, value)

303

304

305def PrintFieldValue(field,

306 value,

307 out,

308 indent=0,

309 as_utf8=_as_utf8_default,

310 as_one_line=False,

311 use_short_repeated_primitives=False,

312 pointy_brackets=False,

313 use_index_order=False,

314 float_format=None,

315 double_format=None,

316 message_formatter=None,

317 print_unknown_fields=False,

318 force_colon=False):

319 """Print a single field value (not including name)."""

320 printer = _Printer(out, indent, as_utf8, as_one_line,

321 use_short_repeated_primitives, pointy_brackets,

322 use_index_order, float_format, double_format,

323 message_formatter=message_formatter,

324 print_unknown_fields=print_unknown_fields,

325 force_colon=force_colon)

326 printer.PrintFieldValue(field, value)

327

328

329def _BuildMessageFromTypeName(type_name, descriptor_pool):

330 """Returns a protobuf message instance.

331

332 Args:

333 type_name: Fully-qualified protobuf message type name string.

334 descriptor_pool: DescriptorPool instance.

335

336 Returns:

337 A Message instance of type matching type_name, or None if the a Descriptor

338 wasn't found matching type_name.

339 """

340 # pylint: disable=g-import-not-at-top

341 if descriptor_pool is None:

342 from google.protobuf import descriptor_pool as pool_mod

343 descriptor_pool = pool_mod.Default()

344 from google.protobuf import message_factory

345 try:

346 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)

347 except KeyError:

348 return None

349 message_type = message_factory.GetMessageClass(message_descriptor)

350 return message_type()

351

352

353# These values must match WireType enum in //google/protobuf/wire_format.h.

354WIRETYPE_LENGTH_DELIMITED = 2

355WIRETYPE_START_GROUP = 3

356

357

358class _Printer(object):

359 """Text format printer for protocol message."""

360

361 def __init__(

362 self,

363 out,

364 indent=0,

365 as_utf8=_as_utf8_default,

366 as_one_line=False,

367 use_short_repeated_primitives=False,

368 pointy_brackets=False,

369 use_index_order=False,

370 float_format=None,

371 double_format=None,

372 use_field_number=False,

373 descriptor_pool=None,

374 message_formatter=None,

375 print_unknown_fields=False,

376 force_colon=False):

377 """Initialize the Printer.

378

379 Double values can be formatted compactly with 15 digits of precision

380 (which is the most that IEEE 754 "double" can guarantee) using

381 double_format='.15g'. To ensure that converting to text and back to a proto

382 will result in an identical value, double_format='.17g' should be used.

383

384 Args:

385 out: To record the text format result.

386 indent: The initial indent level for pretty print.

387 as_utf8: Return unescaped Unicode for non-ASCII characters.

388 as_one_line: Don't introduce newlines between fields.

389 use_short_repeated_primitives: Use short repeated format for primitives.

390 pointy_brackets: If True, use angle brackets instead of curly braces for

391 nesting.

392 use_index_order: If True, print fields of a proto message using the order

393 defined in source code instead of the field number. By default, use the

394 field number order.

395 float_format: Deprecated. If set, use this to specify float field

396 formatting (per the "Format Specification Mini-Language"); otherwise,

397 shortest float that has same value in wire will be printed. Also affect

398 double field if double_format is not set but float_format is set.

399 double_format: Deprecated. If set, use this to specify double field

400 formatting (per the "Format Specification Mini-Language"); if it is not

401 set but float_format is set, use float_format. Otherwise, str() is used.

402 use_field_number: If True, print field numbers instead of names.

403 descriptor_pool: A DescriptorPool used to resolve Any types.

404 message_formatter: A function(message, indent, as_one_line): unicode|None

405 to custom format selected sub-messages (usually based on message type).

406 Use to pretty print parts of the protobuf for easier diffing.

407 print_unknown_fields: If True, unknown fields will be printed.

408 force_colon: If set, a colon will be added after the field name even if

409 the field is a proto message.

410 """

411 self.out = out

412 self.indent = indent

413 self.as_utf8 = as_utf8

414 self.as_one_line = as_one_line

415 self.use_short_repeated_primitives = use_short_repeated_primitives

416 self.pointy_brackets = pointy_brackets

417 self.use_index_order = use_index_order

418 self.float_format = float_format

419 if double_format is not None:

420 warnings.warn(

421 'double_format is deprecated for text_format. This will '

422 'turn into error in 7.34.0, please remove it before that.'

423 )

424 self.double_format = double_format

425 else:

426 self.double_format = float_format

427 self.use_field_number = use_field_number

428 self.descriptor_pool = descriptor_pool

429 self.message_formatter = message_formatter

430 self.print_unknown_fields = print_unknown_fields

431 self.force_colon = force_colon

432

433 def _TryPrintAsAnyMessage(self, message):

434 """Serializes if message is a google.protobuf.Any field."""

435 if '/' not in message.type_url:

436 return False

437 packed_message = _BuildMessageFromTypeName(message.TypeName(),

438 self.descriptor_pool)

439 if packed_message is not None:

440 packed_message.MergeFromString(message.value)

441 colon = ':' if self.force_colon else ''

442 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon))

443 self._PrintMessageFieldValue(packed_message)

444 self.out.write(' ' if self.as_one_line else '\n')

445 return True

446 else:

447 return False

448

449 def _TryCustomFormatMessage(self, message):

450 formatted = self.message_formatter(message, self.indent, self.as_one_line)

451 if formatted is None:

452 return False

453

454 out = self.out

455 out.write(' ' * self.indent)

456 out.write(formatted)

457 out.write(' ' if self.as_one_line else '\n')

458 return True

459

460 def PrintMessage(self, message):

461 """Convert protobuf message to text format.

462

463 Args:

464 message: The protocol buffers message.

465 """

466 if self.message_formatter and self._TryCustomFormatMessage(message):

467 return

468 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and

469 self._TryPrintAsAnyMessage(message)):

470 return

471 fields = message.ListFields()

472 if self.use_index_order:

473 fields.sort(

474 key=lambda x: x[0].number if x[0].is_extension else x[0].index)

475 for field, value in fields:

476 if _IsMapEntry(field):

477 for key in sorted(value):

478 # This is slow for maps with submessage entries because it copies the

479 # entire tree. Unfortunately this would take significant refactoring

480 # of this file to work around.

481 #

482 # TODO: refactor and optimize if this becomes an issue.

483 entry_submsg = value.GetEntryClass()(key=key, value=value[key])

484 self.PrintField(field, entry_submsg)

485 elif field.is_repeated:

486 if (self.use_short_repeated_primitives

487 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE

488 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING):

489 self._PrintShortRepeatedPrimitivesValue(field, value)

490 else:

491 for element in value:

492 self.PrintField(field, element)

493 else:

494 self.PrintField(field, value)

495

496 if self.print_unknown_fields:

497 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message))

498

499 def _PrintUnknownFields(self, unknown_field_set):

500 """Print unknown fields."""

501 out = self.out

502 for field in unknown_field_set:

503 out.write(' ' * self.indent)

504 out.write(str(field.field_number))

505 if field.wire_type == WIRETYPE_START_GROUP:

506 if self.as_one_line:

507 out.write(' { ')

508 else:

509 out.write(' {\n')

510 self.indent += 2

511

512 self._PrintUnknownFields(field.data)

513

514 if self.as_one_line:

515 out.write('} ')

516 else:

517 self.indent -= 2

518 out.write(' ' * self.indent + '}\n')

519 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED:

520 try:

521 # If this field is parseable as a Message, it is probably

522 # an embedded message.

523 # pylint: disable=protected-access

524 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet(

525 memoryview(field.data), 0, len(field.data))

526 except Exception: # pylint: disable=broad-except

527 pos = 0

528

529 if pos == len(field.data):

530 if self.as_one_line:

531 out.write(' { ')

532 else:

533 out.write(' {\n')

534 self.indent += 2

535

536 self._PrintUnknownFields(embedded_unknown_message)

537

538 if self.as_one_line:

539 out.write('} ')

540 else:

541 self.indent -= 2

542 out.write(' ' * self.indent + '}\n')

543 else:

544 # A string or bytes field. self.as_utf8 may not work.

545 out.write(': \"')

546 out.write(text_encoding.CEscape(field.data, False))

547 out.write('\" ' if self.as_one_line else '\"\n')

548 else:

549 # varint, fixed32, fixed64

550 out.write(': ')

551 out.write(str(field.data))

552 out.write(' ' if self.as_one_line else '\n')

553

554 def _PrintFieldName(self, field):

555 """Print field name."""

556 out = self.out

557 out.write(' ' * self.indent)

558 if self.use_field_number:

559 out.write(str(field.number))

560 else:

561 if field.is_extension:

562 out.write('[')

563 if (field.containing_type.GetOptions().message_set_wire_format and

564 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

565 not field.is_required and

566 not field.is_repeated):

567 out.write(field.message_type.full_name)

568 else:

569 out.write(field.full_name)

570 out.write(']')

571 elif _IsGroupLike(field):

572 # For groups, use the capitalized name.

573 out.write(field.message_type.name)

574 else:

575 out.write(field.name)

576

577 if (self.force_colon or

578 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):

579 # The colon is optional in this case, but our cross-language golden files

580 # don't include it. Here, the colon is only included if force_colon is

581 # set to True

582 out.write(':')

583

584 def PrintField(self, field, value):

585 """Print a single field name/value pair."""

586 self._PrintFieldName(field)

587 self.out.write(' ')

588 self.PrintFieldValue(field, value)

589 self.out.write(' ' if self.as_one_line else '\n')

590

591 def _PrintShortRepeatedPrimitivesValue(self, field, value):

592 """"Prints short repeated primitives value."""

593 # Note: this is called only when value has at least one element.

594 self._PrintFieldName(field)

595 self.out.write(' [')

596 for i in range(len(value) - 1):

597 self.PrintFieldValue(field, value[i])

598 self.out.write(', ')

599 self.PrintFieldValue(field, value[-1])

600 self.out.write(']')

601 self.out.write(' ' if self.as_one_line else '\n')

602

603 def _PrintMessageFieldValue(self, value):

604 if self.pointy_brackets:

605 openb = '<'

606 closeb = '>'

607 else:

608 openb = '{'

609 closeb = '}'

610

611 if self.as_one_line:

612 self.out.write('%s ' % openb)

613 self.PrintMessage(value)

614 self.out.write(closeb)

615 else:

616 self.out.write('%s\n' % openb)

617 self.indent += 2

618 self.PrintMessage(value)

619 self.indent -= 2

620 self.out.write(' ' * self.indent + closeb)

621

622 def PrintFieldValue(self, field, value):

623 """Print a single field value (not including name).

624

625 For repeated fields, the value should be a single element.

626

627 Args:

628 field: The descriptor of the field to be printed.

629 value: The value of the field.

630 """

631 out = self.out

632 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

633 self._PrintMessageFieldValue(value)

634 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

635 enum_value = field.enum_type.values_by_number.get(value, None)

636 if enum_value is not None:

637 out.write(enum_value.name)

638 else:

639 out.write(str(value))

640 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

641 out.write('\"')

642 if isinstance(value, str) and not self.as_utf8:

643 out_value = value.encode('utf-8')

644 else:

645 out_value = value

646 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:

647 # We always need to escape all binary data in TYPE_BYTES fields.

648 out_as_utf8 = False

649 else:

650 out_as_utf8 = self.as_utf8

651 out.write(text_encoding.CEscape(out_value, out_as_utf8))

652 out.write('\"')

653 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:

654 if value:

655 out.write('true')

656 else:

657 out.write('false')

658 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:

659 if self.float_format is not None:

660 warnings.warn(

661 'float_format is deprecated for text_format. This '

662 'will turn into error in 7.34.0, please remove it '

663 'before that.'

664 )

665 out.write('{1:{0}}'.format(self.float_format, value))

666 else:

667 if math.isnan(value):

668 out.write(str(value))

669 else:

670 out.write(str(type_checkers.ToShortestFloat(value)))

671 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and

672 self.double_format is not None):

673 out.write('{1:{0}}'.format(self.double_format, value))

674 else:

675 out.write(str(value))

676

677

678def Parse(text,

679 message,

680 allow_unknown_extension=False,

681 allow_field_number=False,

682 descriptor_pool=None,

683 allow_unknown_field=False):

684 """Parses a text representation of a protocol message into a message.

685

686 NOTE: for historical reasons this function does not clear the input

687 message. This is different from what the binary msg.ParseFrom(...) does.

688 If text contains a field already set in message, the value is appended if the

689 field is repeated. Otherwise, an error is raised.

690

691 Example::

692

693 a = MyProto()

694 a.repeated_field.append('test')

695 b = MyProto()

696

697 # Repeated fields are combined

698 text_format.Parse(repr(a), b)

699 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]

700

701 # Non-repeated fields cannot be overwritten

702 a.singular_field = 1

703 b.singular_field = 2

704 text_format.Parse(repr(a), b) # ParseError

705

706 # Binary version:

707 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"

708

709 Caller is responsible for clearing the message as needed.

710

711 Args:

712 text (str): Message text representation.

713 message (Message): A protocol buffer message to merge into.

714 allow_unknown_extension: if True, skip over missing extensions and keep

715 parsing

716 allow_field_number: if True, both field number and field name are allowed.

717 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

718 allow_unknown_field: if True, skip over unknown field and keep

719 parsing. Avoid to use this option if possible. It may hide some

720 errors (e.g. spelling error on field name)

721

722 Returns:

723 Message: The same message passed as argument.

724

725 Raises:

726 ParseError: On text parsing problems.

727 """

728 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'),

729 message,

730 allow_unknown_extension,

731 allow_field_number,

732 descriptor_pool=descriptor_pool,

733 allow_unknown_field=allow_unknown_field)

734

735

736def Merge(text,

737 message,

738 allow_unknown_extension=False,

739 allow_field_number=False,

740 descriptor_pool=None,

741 allow_unknown_field=False):

742 """Parses a text representation of a protocol message into a message.

743

744 Like Parse(), but allows repeated values for a non-repeated field, and uses

745 the last one. This means any non-repeated, top-level fields specified in text

746 replace those in the message.

747

748 Args:

749 text (str): Message text representation.

750 message (Message): A protocol buffer message to merge into.

751 allow_unknown_extension: if True, skip over missing extensions and keep

752 parsing

753 allow_field_number: if True, both field number and field name are allowed.

754 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

755 allow_unknown_field: if True, skip over unknown field and keep

756 parsing. Avoid to use this option if possible. It may hide some

757 errors (e.g. spelling error on field name)

758

759 Returns:

760 Message: The same message passed as argument.

761

762 Raises:

763 ParseError: On text parsing problems.

764 """

765 return MergeLines(

766 text.split(b'\n' if isinstance(text, bytes) else u'\n'),

767 message,

768 allow_unknown_extension,

769 allow_field_number,

770 descriptor_pool=descriptor_pool,

771 allow_unknown_field=allow_unknown_field)

772

773

774def ParseLines(lines,

775 message,

776 allow_unknown_extension=False,

777 allow_field_number=False,

778 descriptor_pool=None,

779 allow_unknown_field=False):

780 """Parses a text representation of a protocol message into a message.

781

782 See Parse() for caveats.

783

784 Args:

785 lines: An iterable of lines of a message's text representation.

786 message: A protocol buffer message to merge into.

787 allow_unknown_extension: if True, skip over missing extensions and keep

788 parsing

789 allow_field_number: if True, both field number and field name are allowed.

790 descriptor_pool: A DescriptorPool used to resolve Any types.

791 allow_unknown_field: if True, skip over unknown field and keep

792 parsing. Avoid to use this option if possible. It may hide some

793 errors (e.g. spelling error on field name)

794

795 Returns:

796 The same message passed as argument.

797

798 Raises:

799 ParseError: On text parsing problems.

800 """

801 parser = _Parser(allow_unknown_extension,

802 allow_field_number,

803 descriptor_pool=descriptor_pool,

804 allow_unknown_field=allow_unknown_field)

805 return parser.ParseLines(lines, message)

806

807

808def MergeLines(lines,

809 message,

810 allow_unknown_extension=False,

811 allow_field_number=False,

812 descriptor_pool=None,

813 allow_unknown_field=False):

814 """Parses a text representation of a protocol message into a message.

815

816 See Merge() for more details.

817

818 Args:

819 lines: An iterable of lines of a message's text representation.

820 message: A protocol buffer message to merge into.

821 allow_unknown_extension: if True, skip over missing extensions and keep

822 parsing

823 allow_field_number: if True, both field number and field name are allowed.

824 descriptor_pool: A DescriptorPool used to resolve Any types.

825 allow_unknown_field: if True, skip over unknown field and keep

826 parsing. Avoid to use this option if possible. It may hide some

827 errors (e.g. spelling error on field name)

828

829 Returns:

830 The same message passed as argument.

831

832 Raises:

833 ParseError: On text parsing problems.

834 """

835 parser = _Parser(allow_unknown_extension,

836 allow_field_number,

837 descriptor_pool=descriptor_pool,

838 allow_unknown_field=allow_unknown_field)

839 return parser.MergeLines(lines, message)

840

841

842class _Parser(object):

843 """Text format parser for protocol message."""

844

845 def __init__(self,

846 allow_unknown_extension=False,

847 allow_field_number=False,

848 descriptor_pool=None,

849 allow_unknown_field=False):

850 self.allow_unknown_extension = allow_unknown_extension

851 self.allow_field_number = allow_field_number

852 self.descriptor_pool = descriptor_pool

853 self.allow_unknown_field = allow_unknown_field

854

855 def ParseLines(self, lines, message):

856 """Parses a text representation of a protocol message into a message."""

857 self._allow_multiple_scalars = False

858 self._ParseOrMerge(lines, message)

859 return message

860

861 def MergeLines(self, lines, message):

862 """Merges a text representation of a protocol message into a message."""

863 self._allow_multiple_scalars = True

864 self._ParseOrMerge(lines, message)

865 return message

866

867 def _ParseOrMerge(self, lines, message):

868 """Converts a text representation of a protocol message into a message.

869

870 Args:

871 lines: Lines of a message's text representation.

872 message: A protocol buffer message to merge into.

873

874 Raises:

875 ParseError: On text parsing problems.

876 """

877 # Tokenize expects native str lines.

878 try:

879 str_lines = (

880 line if isinstance(line, str) else line.decode('utf-8')

881 for line in lines)

882 tokenizer = Tokenizer(str_lines)

883 except UnicodeDecodeError as e:

884 raise ParseError from e

885 if message:

886 self.root_type = message.DESCRIPTOR.full_name

887 while not tokenizer.AtEnd():

888 self._MergeField(tokenizer, message)

889

890 def _MergeField(self, tokenizer, message):

891 """Merges a single protocol message field into a message.

892

893 Args:

894 tokenizer: A tokenizer to parse the field name and values.

895 message: A protocol message to record the data.

896

897 Raises:

898 ParseError: In case of text parsing problems.

899 """

900 message_descriptor = message.DESCRIPTOR

901 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and

902 tokenizer.TryConsume('[')):

903 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)

904 tokenizer.Consume(']')

905 tokenizer.TryConsume(':')

906 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

907 type_url_prefix + '/' + packed_type_name)

908 if tokenizer.TryConsume('<'):

909 expanded_any_end_token = '>'

910 else:

911 tokenizer.Consume('{')

912 expanded_any_end_token = '}'

913 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,

914 self.descriptor_pool)

915 # Direct comparison with None is used instead of implicit bool conversion

916 # to avoid false positives with falsy initial values, e.g. for

917 # google.protobuf.ListValue.

918 if expanded_any_sub_message is None:

919 raise ParseError('Type %s not found in descriptor pool' %

920 packed_type_name)

921 while not tokenizer.TryConsume(expanded_any_end_token):

922 if tokenizer.AtEnd():

923 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %

924 (expanded_any_end_token,))

925 self._MergeField(tokenizer, expanded_any_sub_message)

926 deterministic = False

927

928 message.Pack(expanded_any_sub_message,

929 type_url_prefix=type_url_prefix,

930 deterministic=deterministic)

931 return

932

933 if tokenizer.TryConsume('['):

934 name = [tokenizer.ConsumeIdentifier()]

935 while tokenizer.TryConsume('.'):

936 name.append(tokenizer.ConsumeIdentifier())

937 name = '.'.join(name)

938

939 if not message_descriptor.is_extendable:

940 raise tokenizer.ParseErrorPreviousToken(

941 'Message type "%s" does not have extensions.' %

942 message_descriptor.full_name)

943 # pylint: disable=protected-access

944 field = message.Extensions._FindExtensionByName(name)

945 # pylint: enable=protected-access

946 if not field:

947 if self.allow_unknown_extension:

948 field = None

949 else:

950 raise tokenizer.ParseErrorPreviousToken(

951 'Extension "%s" not registered. '

952 'Did you import the _pb2 module which defines it? '

953 'If you are trying to place the extension in the MessageSet '

954 'field of another message that is in an Any or MessageSet field, '

955 'that message\'s _pb2 module must be imported as well' % name)

956 elif message_descriptor != field.containing_type:

957 raise tokenizer.ParseErrorPreviousToken(

958 'Extension "%s" does not extend message type "%s".' %

959 (name, message_descriptor.full_name))

960

961 tokenizer.Consume(']')

962

963 else:

964 name = tokenizer.ConsumeIdentifierOrNumber()

965 if self.allow_field_number and name.isdigit():

966 number = ParseInteger(name, True, True)

967 field = message_descriptor.fields_by_number.get(number, None)

968 if not field and message_descriptor.is_extendable:

969 field = message.Extensions._FindExtensionByNumber(number)

970 else:

971 field = message_descriptor.fields_by_name.get(name, None)

972

973 # Group names are expected to be capitalized as they appear in the

974 # .proto file, which actually matches their type names, not their field

975 # names.

976 if not field:

977 field = message_descriptor.fields_by_name.get(name.lower(), None)

978 if field and not _IsGroupLike(field):

979 field = None

980 if field and field.message_type.name != name:

981 field = None

982

983 if not field and not self.allow_unknown_field:

984 raise tokenizer.ParseErrorPreviousToken(

985 'Message type "%s" has no field named "%s".' %

986 (message_descriptor.full_name, name))

987

988 if field:

989 if not self._allow_multiple_scalars and field.containing_oneof:

990 # Check if there's a different field set in this oneof.

991 # Note that we ignore the case if the same field was set before, and we

992 # apply _allow_multiple_scalars to non-scalar fields as well.

993 which_oneof = message.WhichOneof(field.containing_oneof.name)

994 if which_oneof is not None and which_oneof != field.name:

995 raise tokenizer.ParseErrorPreviousToken(

996 'Field "%s" is specified along with field "%s", another member '

997 'of oneof "%s" for message type "%s".' %

998 (field.name, which_oneof, field.containing_oneof.name,

999 message_descriptor.full_name))

1000

1001 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

1002 tokenizer.TryConsume(':')

1003 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

1004 field.full_name)

1005 merger = self._MergeMessageField

1006 else:

1007 tokenizer.Consume(':')

1008 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

1009 field.full_name)

1010 merger = self._MergeScalarField

1011

1012 if (field.is_repeated and

1013 tokenizer.TryConsume('[')):

1014 # Short repeated format, e.g. "foo: [1, 2, 3]"

1015 if not tokenizer.TryConsume(']'):

1016 while True:

1017 merger(tokenizer, message, field)

1018 if tokenizer.TryConsume(']'):

1019 break

1020 tokenizer.Consume(',')

1021

1022 else:

1023 merger(tokenizer, message, field)

1024

1025 else: # Proto field is unknown.

1026 assert (self.allow_unknown_extension or self.allow_unknown_field)

1027 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name)

1028

1029 # For historical reasons, fields may optionally be separated by commas or

1030 # semicolons.

1031 if not tokenizer.TryConsume(','):

1032 tokenizer.TryConsume(';')

1033

1034 def _LogSilentMarker(self, immediate_message_type, field_name):

1035 pass

1036

1037 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name):

1038 if tokenizer.contains_silent_marker_before_current_token:

1039 self._LogSilentMarker(immediate_message_type, field_name)

1040

1041 def _ConsumeAnyTypeUrl(self, tokenizer):

1042 """Consumes a google.protobuf.Any type URL and returns the type name."""

1043 # Consume "type.googleapis.com/".

1044 prefix = [tokenizer.ConsumeIdentifier()]

1045 tokenizer.Consume('.')

1046 prefix.append(tokenizer.ConsumeIdentifier())

1047 tokenizer.Consume('.')

1048 prefix.append(tokenizer.ConsumeIdentifier())

1049 tokenizer.Consume('/')

1050 # Consume the fully-qualified type name.

1051 name = [tokenizer.ConsumeIdentifier()]

1052 while tokenizer.TryConsume('.'):

1053 name.append(tokenizer.ConsumeIdentifier())

1054 return '.'.join(prefix), '.'.join(name)

1055

1056 def _MergeMessageField(self, tokenizer, message, field):

1057 """Merges a single scalar field into a message.

1058

1059 Args:

1060 tokenizer: A tokenizer to parse the field value.

1061 message: The message of which field is a member.

1062 field: The descriptor of the field to be merged.

1063

1064 Raises:

1065 ParseError: In case of text parsing problems.

1066 """

1067 is_map_entry = _IsMapEntry(field)

1068

1069 if tokenizer.TryConsume('<'):

1070 end_token = '>'

1071 else:

1072 tokenizer.Consume('{')

1073 end_token = '}'

1074

1075 if field.is_repeated:

1076 if field.is_extension:

1077 sub_message = message.Extensions[field].add()

1078 elif is_map_entry:

1079 sub_message = getattr(message, field.name).GetEntryClass()()

1080 else:

1081 sub_message = getattr(message, field.name).add()

1082 else:

1083 if field.is_extension:

1084 if (not self._allow_multiple_scalars and

1085 message.HasExtension(field)):

1086 raise tokenizer.ParseErrorPreviousToken(

1087 'Message type "%s" should not have multiple "%s" extensions.' %

1088 (message.DESCRIPTOR.full_name, field.full_name))

1089 sub_message = message.Extensions[field]

1090 else:

1091 # Also apply _allow_multiple_scalars to message field.

1092 # TODO: Change to _allow_singular_overwrites.

1093 if (not self._allow_multiple_scalars and

1094 message.HasField(field.name)):

1095 raise tokenizer.ParseErrorPreviousToken(

1096 'Message type "%s" should not have multiple "%s" fields.' %

1097 (message.DESCRIPTOR.full_name, field.name))

1098 sub_message = getattr(message, field.name)

1099 sub_message.SetInParent()

1100

1101 while not tokenizer.TryConsume(end_token):

1102 if tokenizer.AtEnd():

1103 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))

1104 self._MergeField(tokenizer, sub_message)

1105

1106 if is_map_entry:

1107 value_cpptype = field.message_type.fields_by_name['value'].cpp_type

1108 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

1109 value = getattr(message, field.name)[sub_message.key]

1110 value.CopyFrom(sub_message.value)

1111 else:

1112 getattr(message, field.name)[sub_message.key] = sub_message.value

1113

1114 def _MergeScalarField(self, tokenizer, message, field):

1115 """Merges a single scalar field into a message.

1116

1117 Args:

1118 tokenizer: A tokenizer to parse the field value.

1119 message: A protocol message to record the data.

1120 field: The descriptor of the field to be merged.

1121

1122 Raises:

1123 ParseError: In case of text parsing problems.

1124 RuntimeError: On runtime errors.

1125 """

1126 _ = self.allow_unknown_extension

1127 value = None

1128

1129 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

1130 descriptor.FieldDescriptor.TYPE_SINT32,

1131 descriptor.FieldDescriptor.TYPE_SFIXED32):

1132 value = _ConsumeInt32(tokenizer)

1133 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

1134 descriptor.FieldDescriptor.TYPE_SINT64,

1135 descriptor.FieldDescriptor.TYPE_SFIXED64):

1136 value = _ConsumeInt64(tokenizer)

1137 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

1138 descriptor.FieldDescriptor.TYPE_FIXED32):

1139 value = _ConsumeUint32(tokenizer)

1140 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

1141 descriptor.FieldDescriptor.TYPE_FIXED64):

1142 value = _ConsumeUint64(tokenizer)

1143 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

1144 descriptor.FieldDescriptor.TYPE_DOUBLE):

1145 value = tokenizer.ConsumeFloat()

1146 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

1147 value = tokenizer.ConsumeBool()

1148 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

1149 value = tokenizer.ConsumeString()

1150 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

1151 value = tokenizer.ConsumeByteString()

1152 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

1153 value = tokenizer.ConsumeEnum(field)

1154 else:

1155 raise RuntimeError('Unknown field type %d' % field.type)

1156

1157 if field.is_repeated:

1158 if field.is_extension:

1159 message.Extensions[field].append(value)

1160 else:

1161 getattr(message, field.name).append(value)

1162 else:

1163 if field.is_extension:

1164 if (not self._allow_multiple_scalars and

1165 field.has_presence and

1166 message.HasExtension(field)):

1167 raise tokenizer.ParseErrorPreviousToken(

1168 'Message type "%s" should not have multiple "%s" extensions.' %

1169 (message.DESCRIPTOR.full_name, field.full_name))

1170 else:

1171 message.Extensions[field] = value

1172 else:

1173 duplicate_error = False

1174 if not self._allow_multiple_scalars:

1175 if field.has_presence:

1176 duplicate_error = message.HasField(field.name)

1177 else:

1178 # For field that doesn't represent presence, try best effort to

1179 # check multiple scalars by compare to default values.

1180 duplicate_error = not decoder.IsDefaultScalarValue(

1181 getattr(message, field.name)

1182 )

1183

1184 if duplicate_error:

1185 raise tokenizer.ParseErrorPreviousToken(

1186 'Message type "%s" should not have multiple "%s" fields.' %

1187 (message.DESCRIPTOR.full_name, field.name))

1188 else:

1189 setattr(message, field.name, value)

1190

1191 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type):

1192 """Skips over contents (value or message) of a field.

1193

1194 Args:

1195 tokenizer: A tokenizer to parse the field name and values.

1196 field_name: The field name currently being parsed.

1197 immediate_message_type: The type of the message immediately containing

1198 the silent marker.

1199 """

1200 # Try to guess the type of this field.

1201 # If this field is not a message, there should be a ":" between the

1202 # field name and the field value and also the field value should not

1203 # start with "{" or "<" which indicates the beginning of a message body.

1204 # If there is no ":" or there is a "{" or "<" after ":", this field has

1205 # to be a message or the input is ill-formed.

1206 if tokenizer.TryConsume(

1207 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'):

1208 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name)

1209 if tokenizer.LookingAt('['):

1210 self._SkipRepeatedFieldValue(tokenizer, immediate_message_type)

1211 else:

1212 self._SkipFieldValue(tokenizer)

1213 else:

1214 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name)

1215 self._SkipFieldMessage(tokenizer, immediate_message_type)

1216

1217 def _SkipField(self, tokenizer, immediate_message_type):

1218 """Skips over a complete field (name and value/message).

1219

1220 Args:

1221 tokenizer: A tokenizer to parse the field name and values.

1222 immediate_message_type: The type of the message immediately containing

1223 the silent marker.

1224 """

1225 field_name = ''

1226 if tokenizer.TryConsume('['):

1227 # Consume extension or google.protobuf.Any type URL

1228 field_name += '[' + tokenizer.ConsumeIdentifier()

1229 num_identifiers = 1

1230 while tokenizer.TryConsume('.'):

1231 field_name += '.' + tokenizer.ConsumeIdentifier()

1232 num_identifiers += 1

1233 # This is possibly a type URL for an Any message.

1234 if num_identifiers == 3 and tokenizer.TryConsume('/'):

1235 field_name += '/' + tokenizer.ConsumeIdentifier()

1236 while tokenizer.TryConsume('.'):

1237 field_name += '.' + tokenizer.ConsumeIdentifier()

1238 tokenizer.Consume(']')

1239 field_name += ']'

1240 else:

1241 field_name += tokenizer.ConsumeIdentifierOrNumber()

1242

1243 self._SkipFieldContents(tokenizer, field_name, immediate_message_type)

1244

1245 # For historical reasons, fields may optionally be separated by commas or

1246 # semicolons.

1247 if not tokenizer.TryConsume(','):

1248 tokenizer.TryConsume(';')

1249

1250 def _SkipFieldMessage(self, tokenizer, immediate_message_type):

1251 """Skips over a field message.

1252

1253 Args:

1254 tokenizer: A tokenizer to parse the field name and values.

1255 immediate_message_type: The type of the message immediately containing

1256 the silent marker

1257 """

1258 if tokenizer.TryConsume('<'):

1259 delimiter = '>'

1260 else:

1261 tokenizer.Consume('{')

1262 delimiter = '}'

1263

1264 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):

1265 self._SkipField(tokenizer, immediate_message_type)

1266

1267 tokenizer.Consume(delimiter)

1268

1269 def _SkipFieldValue(self, tokenizer):

1270 """Skips over a field value.

1271

1272 Args:

1273 tokenizer: A tokenizer to parse the field name and values.

1274

1275 Raises:

1276 ParseError: In case an invalid field value is found.

1277 """

1278 if (not tokenizer.TryConsumeByteString()and

1279 not tokenizer.TryConsumeIdentifier() and

1280 not _TryConsumeInt64(tokenizer) and

1281 not _TryConsumeUint64(tokenizer) and

1282 not tokenizer.TryConsumeFloat()):

1283 raise ParseError('Invalid field value: ' + tokenizer.token)

1284

1285 def _SkipRepeatedFieldValue(self, tokenizer, immediate_message_type):

1286 """Skips over a repeated field value.

1287

1288 Args:

1289 tokenizer: A tokenizer to parse the field value.

1290 """

1291 tokenizer.Consume('[')

1292 if not tokenizer.TryConsume(']'):

1293 while True:

1294 if tokenizer.LookingAt('<') or tokenizer.LookingAt('{'):

1295 self._SkipFieldMessage(tokenizer, immediate_message_type)

1296 else:

1297 self._SkipFieldValue(tokenizer)

1298 if tokenizer.TryConsume(']'):

1299 break

1300 tokenizer.Consume(',')

1301

1302

1303class Tokenizer(object):

1304 """Protocol buffer text representation tokenizer.

1305

1306 This class handles the lower level string parsing by splitting it into

1307 meaningful tokens.

1308

1309 It was directly ported from the Java protocol buffer API.

1310 """

1311

1312 _WHITESPACE = re.compile(r'\s+')

1313 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)

1314 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)

1315 _TOKEN = re.compile('|'.join([

1316 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier

1317 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number

1318 ] + [ # quoted str for each quote mark

1319 # Avoid backtracking! https://stackoverflow.com/a/844267

1320 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark)

1321 for mark in _QUOTES

1322 ]))

1323

1324 _IDENTIFIER = re.compile(r'[^\d\W]\w*')

1325 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')

1326

1327 def __init__(self, lines, skip_comments=True):

1328 self._position = 0

1329 self._line = -1

1330 self._column = 0

1331 self._token_start = None

1332 self.token = ''

1333 self._lines = iter(lines)

1334 self._current_line = ''

1335 self._previous_line = 0

1336 self._previous_column = 0

1337 self._more_lines = True

1338 self._skip_comments = skip_comments

1339 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT

1340 or self._WHITESPACE)

1341 self.contains_silent_marker_before_current_token = False

1342

1343 self._SkipWhitespace()

1344 self.NextToken()

1345

1346 def LookingAt(self, token):

1347 return self.token == token

1348

1349 def AtEnd(self):

1350 """Checks the end of the text was reached.

1351

1352 Returns:

1353 True iff the end was reached.

1354 """

1355 return not self.token

1356

1357 def _PopLine(self):

1358 while len(self._current_line) <= self._column:

1359 try:

1360 self._current_line = next(self._lines)

1361 except StopIteration:

1362 self._current_line = ''

1363 self._more_lines = False

1364 return

1365 else:

1366 self._line += 1

1367 self._column = 0

1368

1369 def _SkipWhitespace(self):

1370 while True:

1371 self._PopLine()

1372 match = self._whitespace_pattern.match(self._current_line, self._column)

1373 if not match:

1374 break

1375 self.contains_silent_marker_before_current_token = match.group(0) == (

1376 ' ' + _DEBUG_STRING_SILENT_MARKER)

1377 length = len(match.group(0))

1378 self._column += length

1379

1380 def TryConsume(self, token):

1381 """Tries to consume a given piece of text.

1382

1383 Args:

1384 token: Text to consume.

1385

1386 Returns:

1387 True iff the text was consumed.

1388 """

1389 if self.token == token:

1390 self.NextToken()

1391 return True

1392 return False

1393

1394 def Consume(self, token):

1395 """Consumes a piece of text.

1396

1397 Args:

1398 token: Text to consume.

1399

1400 Raises:

1401 ParseError: If the text couldn't be consumed.

1402 """

1403 if not self.TryConsume(token):

1404 raise self.ParseError('Expected "%s".' % token)

1405

1406 def ConsumeComment(self):

1407 result = self.token

1408 if not self._COMMENT.match(result):

1409 raise self.ParseError('Expected comment.')

1410 self.NextToken()

1411 return result

1412

1413 def ConsumeCommentOrTrailingComment(self):

1414 """Consumes a comment, returns a 2-tuple (trailing bool, comment str)."""

1415

1416 # Tokenizer initializes _previous_line and _previous_column to 0. As the

1417 # tokenizer starts, it looks like there is a previous token on the line.

1418 just_started = self._line == 0 and self._column == 0

1419

1420 before_parsing = self._previous_line

1421 comment = self.ConsumeComment()

1422

1423 # A trailing comment is a comment on the same line than the previous token.

1424 trailing = (self._previous_line == before_parsing

1425 and not just_started)

1426

1427 return trailing, comment

1428

1429 def TryConsumeIdentifier(self):

1430 try:

1431 self.ConsumeIdentifier()

1432 return True

1433 except ParseError:

1434 return False

1435

1436 def ConsumeIdentifier(self):

1437 """Consumes protocol message field identifier.

1438

1439 Returns:

1440 Identifier string.

1441

1442 Raises:

1443 ParseError: If an identifier couldn't be consumed.

1444 """

1445 result = self.token

1446 if not self._IDENTIFIER.match(result):

1447 raise self.ParseError('Expected identifier.')

1448 self.NextToken()

1449 return result

1450

1451 def TryConsumeIdentifierOrNumber(self):

1452 try:

1453 self.ConsumeIdentifierOrNumber()

1454 return True

1455 except ParseError:

1456 return False

1457

1458 def ConsumeIdentifierOrNumber(self):

1459 """Consumes protocol message field identifier.

1460

1461 Returns:

1462 Identifier string.

1463

1464 Raises:

1465 ParseError: If an identifier couldn't be consumed.

1466 """

1467 result = self.token

1468 if not self._IDENTIFIER_OR_NUMBER.match(result):

1469 raise self.ParseError('Expected identifier or number, got %s.' % result)

1470 self.NextToken()

1471 return result

1472

1473 def TryConsumeInteger(self):

1474 try:

1475 self.ConsumeInteger()

1476 return True

1477 except ParseError:

1478 return False

1479

1480 def ConsumeInteger(self):

1481 """Consumes an integer number.

1482

1483 Returns:

1484 The integer parsed.

1485

1486 Raises:

1487 ParseError: If an integer couldn't be consumed.

1488 """

1489 try:

1490 result = _ParseAbstractInteger(self.token)

1491 except ValueError as e:

1492 raise self.ParseError(str(e))

1493 self.NextToken()

1494 return result

1495

1496 def TryConsumeFloat(self):

1497 try:

1498 self.ConsumeFloat()

1499 return True

1500 except ParseError:

1501 return False

1502

1503 def ConsumeFloat(self):

1504 """Consumes an floating point number.

1505

1506 Returns:

1507 The number parsed.

1508

1509 Raises:

1510 ParseError: If a floating point number couldn't be consumed.

1511 """

1512 try:

1513 result = ParseFloat(self.token)

1514 except ValueError as e:

1515 raise self.ParseError(str(e))

1516 self.NextToken()

1517 return result

1518

1519 def ConsumeBool(self):

1520 """Consumes a boolean value.

1521

1522 Returns:

1523 The bool parsed.

1524

1525 Raises:

1526 ParseError: If a boolean value couldn't be consumed.

1527 """

1528 try:

1529 result = ParseBool(self.token)

1530 except ValueError as e:

1531 raise self.ParseError(str(e))

1532 self.NextToken()

1533 return result

1534

1535 def TryConsumeByteString(self):

1536 try:

1537 self.ConsumeByteString()

1538 return True

1539 except ParseError:

1540 return False

1541

1542 def ConsumeString(self):

1543 """Consumes a string value.

1544

1545 Returns:

1546 The string parsed.

1547

1548 Raises:

1549 ParseError: If a string value couldn't be consumed.

1550 """

1551 the_bytes = self.ConsumeByteString()

1552 try:

1553 return str(the_bytes, 'utf-8')

1554 except UnicodeDecodeError as e:

1555 raise self._StringParseError(e)

1556

1557 def ConsumeByteString(self):

1558 """Consumes a byte array value.

1559

1560 Returns:

1561 The array parsed (as a string).

1562

1563 Raises:

1564 ParseError: If a byte array value couldn't be consumed.

1565 """

1566 the_list = [self._ConsumeSingleByteString()]

1567 while self.token and self.token[0] in _QUOTES:

1568 the_list.append(self._ConsumeSingleByteString())

1569 return b''.join(the_list)

1570

1571 def _ConsumeSingleByteString(self):

1572 """Consume one token of a string literal.

1573

1574 String literals (whether bytes or text) can come in multiple adjacent

1575 tokens which are automatically concatenated, like in C or Python. This

1576 method only consumes one token.

1577

1578 Returns:

1579 The token parsed.

1580 Raises:

1581 ParseError: When the wrong format data is found.

1582 """

1583 text = self.token

1584 if len(text) < 1 or text[0] not in _QUOTES:

1585 raise self.ParseError('Expected string but found: %r' % (text,))

1586

1587 if len(text) < 2 or text[-1] != text[0]:

1588 raise self.ParseError('String missing ending quote: %r' % (text,))

1589

1590 try:

1591 result = text_encoding.CUnescape(text[1:-1])

1592 except ValueError as e:

1593 raise self.ParseError(str(e))

1594 self.NextToken()

1595 return result

1596

1597 def ConsumeEnum(self, field):

1598 try:

1599 result = ParseEnum(field, self.token)

1600 except ValueError as e:

1601 raise self.ParseError(str(e))

1602 self.NextToken()

1603 return result

1604

1605 def ParseErrorPreviousToken(self, message):

1606 """Creates and *returns* a ParseError for the previously read token.

1607

1608 Args:

1609 message: A message to set for the exception.

1610

1611 Returns:

1612 A ParseError instance.

1613 """

1614 return ParseError(message, self._previous_line + 1,

1615 self._previous_column + 1)

1616

1617 def ParseError(self, message):

1618 """Creates and *returns* a ParseError for the current token."""

1619 return ParseError('\'' + self._current_line + '\': ' + message,

1620 self._line + 1, self._column + 1)

1621

1622 def _StringParseError(self, e):

1623 return self.ParseError('Couldn\'t parse string: ' + str(e))

1624

1625 def NextToken(self):

1626 """Reads the next meaningful token."""

1627 self._previous_line = self._line

1628 self._previous_column = self._column

1629 self.contains_silent_marker_before_current_token = False

1630

1631 self._column += len(self.token)

1632 self._SkipWhitespace()

1633

1634 if not self._more_lines:

1635 self.token = ''

1636 return

1637

1638 match = self._TOKEN.match(self._current_line, self._column)

1639 if not match and not self._skip_comments:

1640 match = self._COMMENT.match(self._current_line, self._column)

1641 if match:

1642 token = match.group(0)

1643 self.token = token

1644 else:

1645 self.token = self._current_line[self._column]

1646

1647# Aliased so it can still be accessed by current visibility violators.

1648# TODO: Migrate violators to textformat_tokenizer.

1649_Tokenizer = Tokenizer # pylint: disable=invalid-name

1650

1651

1652def _ConsumeInt32(tokenizer):

1653 """Consumes a signed 32bit integer number from tokenizer.

1654

1655 Args:

1656 tokenizer: A tokenizer used to parse the number.

1657

1658 Returns:

1659 The integer parsed.

1660

1661 Raises:

1662 ParseError: If a signed 32bit integer couldn't be consumed.

1663 """

1664 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)

1665

1666

1667def _ConsumeUint32(tokenizer):

1668 """Consumes an unsigned 32bit integer number from tokenizer.

1669

1670 Args:

1671 tokenizer: A tokenizer used to parse the number.

1672

1673 Returns:

1674 The integer parsed.

1675

1676 Raises:

1677 ParseError: If an unsigned 32bit integer couldn't be consumed.

1678 """

1679 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)

1680

1681

1682def _TryConsumeInt64(tokenizer):

1683 try:

1684 _ConsumeInt64(tokenizer)

1685 return True

1686 except ParseError:

1687 return False

1688

1689

1690def _ConsumeInt64(tokenizer):

1691 """Consumes a signed 32bit integer number from tokenizer.

1692

1693 Args:

1694 tokenizer: A tokenizer used to parse the number.

1695

1696 Returns:

1697 The integer parsed.

1698

1699 Raises:

1700 ParseError: If a signed 32bit integer couldn't be consumed.

1701 """

1702 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)

1703

1704

1705def _TryConsumeUint64(tokenizer):

1706 try:

1707 _ConsumeUint64(tokenizer)

1708 return True

1709 except ParseError:

1710 return False

1711

1712

1713def _ConsumeUint64(tokenizer):

1714 """Consumes an unsigned 64bit integer number from tokenizer.

1715

1716 Args:

1717 tokenizer: A tokenizer used to parse the number.

1718

1719 Returns:

1720 The integer parsed.

1721

1722 Raises:

1723 ParseError: If an unsigned 64bit integer couldn't be consumed.

1724 """

1725 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)

1726

1727

1728def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):

1729 """Consumes an integer number from tokenizer.

1730

1731 Args:

1732 tokenizer: A tokenizer used to parse the number.

1733 is_signed: True if a signed integer must be parsed.

1734 is_long: True if a long integer must be parsed.

1735

1736 Returns:

1737 The integer parsed.

1738

1739 Raises:

1740 ParseError: If an integer with given characteristics couldn't be consumed.

1741 """

1742 try:

1743 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)

1744 except ValueError as e:

1745 raise tokenizer.ParseError(str(e))

1746 tokenizer.NextToken()

1747 return result

1748

1749

1750def ParseInteger(text, is_signed=False, is_long=False):

1751 """Parses an integer.

1752

1753 Args:

1754 text: The text to parse.

1755 is_signed: True if a signed integer must be parsed.

1756 is_long: True if a long integer must be parsed.

1757

1758 Returns:

1759 The integer value.

1760

1761 Raises:

1762 ValueError: Thrown Iff the text is not a valid integer.

1763 """

1764 # Do the actual parsing. Exception handling is propagated to caller.

1765 result = _ParseAbstractInteger(text)

1766

1767 # Check if the integer is sane. Exceptions handled by callers.

1768 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

1769 checker.CheckValue(result)

1770 return result

1771

1772

1773def _ParseAbstractInteger(text):

1774 """Parses an integer without checking size/signedness.

1775

1776 Args:

1777 text: The text to parse.

1778

1779 Returns:

1780 The integer value.

1781

1782 Raises:

1783 ValueError: Thrown Iff the text is not a valid integer.

1784 """

1785 # Do the actual parsing. Exception handling is propagated to caller.

1786 orig_text = text

1787 c_octal_match = re.match(r'(-?)0(\d+)$', text)

1788 if c_octal_match:

1789 # Python 3 no longer supports 0755 octal syntax without the 'o', so

1790 # we always use the '0o' prefix for multi-digit numbers starting with 0.

1791 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2)

1792 try:

1793 return int(text, 0)

1794 except ValueError:

1795 raise ValueError('Couldn\'t parse integer: %s' % orig_text)

1796

1797

1798def ParseFloat(text):

1799 """Parse a floating point number.

1800

1801 Args:

1802 text: Text to parse.

1803

1804 Returns:

1805 The number parsed.

1806

1807 Raises:

1808 ValueError: If a floating point number couldn't be parsed.

1809 """

1810 if _FLOAT_OCTAL_PREFIX.match(text):

1811 raise ValueError('Invalid octal float: %s' % text)

1812 try:

1813 # Assume Python compatible syntax.

1814 return float(text)

1815 except ValueError:

1816 # Check alternative spellings.

1817 if _FLOAT_INFINITY.match(text):

1818 if text[0] == '-':

1819 return float('-inf')

1820 else:

1821 return float('inf')

1822 elif _FLOAT_NAN.match(text):

1823 return float('nan')

1824 else:

1825 # assume '1.0f' format

1826 try:

1827 return float(text.rstrip('fF'))

1828 except ValueError:

1829 raise ValueError("Couldn't parse float: %s" % text)

1830

1831

1832def ParseBool(text):

1833 """Parse a boolean value.

1834

1835 Args:

1836 text: Text to parse.

1837

1838 Returns:

1839 Boolean values parsed

1840

1841 Raises:

1842 ValueError: If text is not a valid boolean.

1843 """

1844 if text in ('true', 't', '1', 'True'):

1845 return True

1846 elif text in ('false', 'f', '0', 'False'):

1847 return False

1848 else:

1849 raise ValueError('Expected "true" or "false".')

1850

1851

1852def ParseEnum(field, value):

1853 """Parse an enum value.

1854

1855 The value can be specified by a number (the enum value), or by

1856 a string literal (the enum name).

1857

1858 Args:

1859 field: Enum field descriptor.

1860 value: String value.

1861

1862 Returns:

1863 Enum value number.

1864

1865 Raises:

1866 ValueError: If the enum value could not be parsed.

1867 """

1868 enum_descriptor = field.enum_type

1869 try:

1870 number = int(value, 0)

1871 except ValueError:

1872 # Identifier.

1873 enum_value = enum_descriptor.values_by_name.get(value, None)

1874 if enum_value is None:

1875 raise ValueError('Enum type "%s" has no value named %s.' %

1876 (enum_descriptor.full_name, value))

1877 else:

1878 if not field.enum_type.is_closed:

1879 return number

1880 enum_value = enum_descriptor.values_by_number.get(number, None)

1881 if enum_value is None:

1882 raise ValueError('Enum type "%s" has no value with number %d.' %

1883 (enum_descriptor.full_name, number))

1884 return enum_value.number

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/protobuf/text_format.py: 16%

770 statements