Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text

1# Protocol Buffers - Google's data interchange format

4# Use of this source code is governed by a BSD-style

5# license that can be found in the LICENSE file or at

6# https://developers.google.com/open-source/licenses/bsd

8"""Contains routines for printing protocol messages in text format.

10Simple usage example::

12 # Create a proto object and serialize it to a text proto string.

13 message = my_proto_pb2.MyMessage(foo='bar')

14 text_proto = text_format.MessageToString(message)

16 # Parse a text proto string.

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())

18"""

20__author__ = 'kenton@google.com (Kenton Varda)'

22# TODO Import thread contention leads to test failures.

23import encodings.raw_unicode_escape # pylint: disable=unused-import

24import encodings.unicode_escape # pylint: disable=unused-import

25import io

26import math

27import re

29from google.protobuf.internal import decoder

30from google.protobuf.internal import type_checkers

31from google.protobuf import descriptor

32from google.protobuf import text_encoding

33from google.protobuf import unknown_fields

35# pylint: disable=g-import-not-at-top

36__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField',

37 'PrintFieldValue', 'Merge', 'MessageToBytes']

39_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

40 type_checkers.Int32ValueChecker(),

41 type_checkers.Uint64ValueChecker(),

42 type_checkers.Int64ValueChecker())

43_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE)

44_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE)

45_QUOTES = frozenset(("'", '"'))

46_ANY_FULL_TYPE_NAME = 'google.protobuf.Any'

47_DEBUG_STRING_SILENT_MARKER = '\t '

50class Error(Exception):

51 """Top-level module error for text_format."""

54class ParseError(Error):

55 """Thrown in case of text parsing or tokenizing error."""

57 def __init__(self, message=None, line=None, column=None):

58 if message is not None and line is not None:

59 loc = str(line)

60 if column is not None:

61 loc += ':{0}'.format(column)

62 message = '{0} : {1}'.format(loc, message)

63 if message is not None:

64 super(ParseError, self).__init__(message)

65 else:

66 super(ParseError, self).__init__()

67 self._line = line

68 self._column = column

70 def GetLine(self):

71 return self._line

73 def GetColumn(self):

74 return self._column

77class TextWriter(object):

79 def __init__(self, as_utf8):

80 self._writer = io.StringIO()

82 def write(self, val):

83 return self._writer.write(val)

85 def close(self):

86 return self._writer.close()

88 def getvalue(self):

89 return self._writer.getvalue()

92def MessageToString(

93 message,

94 as_utf8=False,

95 as_one_line=False,

96 use_short_repeated_primitives=False,

97 pointy_brackets=False,

98 use_index_order=False,

99 float_format=None,

100 double_format=None,

101 use_field_number=False,

102 descriptor_pool=None,

103 indent=0,

104 message_formatter=None,

105 print_unknown_fields=False,

106 force_colon=False) -> str:

107 """Convert protobuf message to text format.

108

109 Double values can be formatted compactly with 15 digits of

110 precision (which is the most that IEEE 754 "double" can guarantee)

111 using double_format='.15g'. To ensure that converting to text and back to a

112 proto will result in an identical value, double_format='.17g' should be used.

113

114 Args:

115 message: The protocol buffers message.

116 as_utf8: Return unescaped Unicode for non-ASCII characters.

117 as_one_line: Don't introduce newlines between fields.

118 use_short_repeated_primitives: Use short repeated format for primitives.

119 pointy_brackets: If True, use angle brackets instead of curly braces for

120 nesting.

121 use_index_order: If True, fields of a proto message will be printed using

122 the order defined in source code instead of the field number, extensions

123 will be printed at the end of the message and their relative order is

124 determined by the extension number. By default, use the field number

125 order.

126 float_format (str): If set, use this to specify float field formatting

127 (per the "Format Specification Mini-Language"); otherwise, shortest float

128 that has same value in wire will be printed. Also affect double field

129 if double_format is not set but float_format is set.

130 double_format (str): If set, use this to specify double field formatting

131 (per the "Format Specification Mini-Language"); if it is not set but

132 float_format is set, use float_format. Otherwise, use ``str()``

133 use_field_number: If True, print field numbers instead of names.

134 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

135 indent (int): The initial indent level, in terms of spaces, for pretty

136 print.

137 message_formatter (function(message, indent, as_one_line) -> unicode|None):

138 Custom formatter for selected sub-messages (usually based on message

139 type). Use to pretty print parts of the protobuf for easier diffing.

140 print_unknown_fields: If True, unknown fields will be printed.

141 force_colon: If set, a colon will be added after the field name even if the

142 field is a proto message.

143

144 Returns:

145 str: A string of the text formatted protocol buffer message.

146 """

147 out = TextWriter(as_utf8)

148 printer = _Printer(

149 out,

150 indent,

151 as_utf8,

152 as_one_line,

153 use_short_repeated_primitives,

154 pointy_brackets,

155 use_index_order,

156 float_format,

157 double_format,

158 use_field_number,

159 descriptor_pool,

160 message_formatter,

161 print_unknown_fields=print_unknown_fields,

162 force_colon=force_colon)

163 printer.PrintMessage(message)

164 result = out.getvalue()

165 out.close()

166 if as_one_line:

167 return result.rstrip()

168 return result

169

170

171def MessageToBytes(message, **kwargs) -> bytes:

172 """Convert protobuf message to encoded text format. See MessageToString."""

173 text = MessageToString(message, **kwargs)

174 if isinstance(text, bytes):

175 return text

176 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii'

177 return text.encode(codec)

178

179

180def _IsMapEntry(field):

181 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

182 field.message_type.has_options and

183 field.message_type.GetOptions().map_entry)

184

185

186def PrintMessage(message,

187 out,

188 indent=0,

189 as_utf8=False,

190 as_one_line=False,

191 use_short_repeated_primitives=False,

192 pointy_brackets=False,

193 use_index_order=False,

194 float_format=None,

195 double_format=None,

196 use_field_number=False,

197 descriptor_pool=None,

198 message_formatter=None,

199 print_unknown_fields=False,

200 force_colon=False):

201 """Convert the message to text format and write it to the out stream.

202

203 Args:

204 message: The Message object to convert to text format.

205 out: A file handle to write the message to.

206 indent: The initial indent level for pretty print.

207 as_utf8: Return unescaped Unicode for non-ASCII characters.

208 as_one_line: Don't introduce newlines between fields.

209 use_short_repeated_primitives: Use short repeated format for primitives.

210 pointy_brackets: If True, use angle brackets instead of curly braces for

211 nesting.

212 use_index_order: If True, print fields of a proto message using the order

213 defined in source code instead of the field number. By default, use the

214 field number order.

215 float_format: If set, use this to specify float field formatting

216 (per the "Format Specification Mini-Language"); otherwise, shortest

217 float that has same value in wire will be printed. Also affect double

218 field if double_format is not set but float_format is set.

219 double_format: If set, use this to specify double field formatting

220 (per the "Format Specification Mini-Language"); if it is not set but

221 float_format is set, use float_format. Otherwise, str() is used.

222 use_field_number: If True, print field numbers instead of names.

223 descriptor_pool: A DescriptorPool used to resolve Any types.

224 message_formatter: A function(message, indent, as_one_line): unicode|None

225 to custom format selected sub-messages (usually based on message type).

226 Use to pretty print parts of the protobuf for easier diffing.

227 print_unknown_fields: If True, unknown fields will be printed.

228 force_colon: If set, a colon will be added after the field name even if

229 the field is a proto message.

230 """

231 printer = _Printer(

232 out=out, indent=indent, as_utf8=as_utf8,

233 as_one_line=as_one_line,

234 use_short_repeated_primitives=use_short_repeated_primitives,

235 pointy_brackets=pointy_brackets,

236 use_index_order=use_index_order,

237 float_format=float_format,

238 double_format=double_format,

239 use_field_number=use_field_number,

240 descriptor_pool=descriptor_pool,

241 message_formatter=message_formatter,

242 print_unknown_fields=print_unknown_fields,

243 force_colon=force_colon)

244 printer.PrintMessage(message)

245

246

247def PrintField(field,

248 value,

249 out,

250 indent=0,

251 as_utf8=False,

252 as_one_line=False,

253 use_short_repeated_primitives=False,

254 pointy_brackets=False,

255 use_index_order=False,

256 float_format=None,

257 double_format=None,

258 message_formatter=None,

259 print_unknown_fields=False,

260 force_colon=False):

261 """Print a single field name/value pair."""

262 printer = _Printer(out, indent, as_utf8, as_one_line,

263 use_short_repeated_primitives, pointy_brackets,

264 use_index_order, float_format, double_format,

265 message_formatter=message_formatter,

266 print_unknown_fields=print_unknown_fields,

267 force_colon=force_colon)

268 printer.PrintField(field, value)

269

270

271def PrintFieldValue(field,

272 value,

273 out,

274 indent=0,

275 as_utf8=False,

276 as_one_line=False,

277 use_short_repeated_primitives=False,

278 pointy_brackets=False,

279 use_index_order=False,

280 float_format=None,

281 double_format=None,

282 message_formatter=None,

283 print_unknown_fields=False,

284 force_colon=False):

285 """Print a single field value (not including name)."""

286 printer = _Printer(out, indent, as_utf8, as_one_line,

287 use_short_repeated_primitives, pointy_brackets,

288 use_index_order, float_format, double_format,

289 message_formatter=message_formatter,

290 print_unknown_fields=print_unknown_fields,

291 force_colon=force_colon)

292 printer.PrintFieldValue(field, value)

293

294

295def _BuildMessageFromTypeName(type_name, descriptor_pool):

296 """Returns a protobuf message instance.

297

298 Args:

299 type_name: Fully-qualified protobuf message type name string.

300 descriptor_pool: DescriptorPool instance.

301

302 Returns:

303 A Message instance of type matching type_name, or None if the a Descriptor

304 wasn't found matching type_name.

305 """

306 # pylint: disable=g-import-not-at-top

307 if descriptor_pool is None:

308 from google.protobuf import descriptor_pool as pool_mod

309 descriptor_pool = pool_mod.Default()

310 from google.protobuf import message_factory

311 try:

312 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)

313 except KeyError:

314 return None

315 message_type = message_factory.GetMessageClass(message_descriptor)

316 return message_type()

317

318

319# These values must match WireType enum in //google/protobuf/wire_format.h.

320WIRETYPE_LENGTH_DELIMITED = 2

321WIRETYPE_START_GROUP = 3

322

323

324class _Printer(object):

325 """Text format printer for protocol message."""

326

327 def __init__(

328 self,

329 out,

330 indent=0,

331 as_utf8=False,

332 as_one_line=False,

333 use_short_repeated_primitives=False,

334 pointy_brackets=False,

335 use_index_order=False,

336 float_format=None,

337 double_format=None,

338 use_field_number=False,

339 descriptor_pool=None,

340 message_formatter=None,

341 print_unknown_fields=False,

342 force_colon=False):

343 """Initialize the Printer.

344

345 Double values can be formatted compactly with 15 digits of precision

346 (which is the most that IEEE 754 "double" can guarantee) using

347 double_format='.15g'. To ensure that converting to text and back to a proto

348 will result in an identical value, double_format='.17g' should be used.

349

350 Args:

351 out: To record the text format result.

352 indent: The initial indent level for pretty print.

353 as_utf8: Return unescaped Unicode for non-ASCII characters.

354 as_one_line: Don't introduce newlines between fields.

355 use_short_repeated_primitives: Use short repeated format for primitives.

356 pointy_brackets: If True, use angle brackets instead of curly braces for

357 nesting.

358 use_index_order: If True, print fields of a proto message using the order

359 defined in source code instead of the field number. By default, use the

360 field number order.

361 float_format: If set, use this to specify float field formatting

362 (per the "Format Specification Mini-Language"); otherwise, shortest

363 float that has same value in wire will be printed. Also affect double

364 field if double_format is not set but float_format is set.

365 double_format: If set, use this to specify double field formatting

366 (per the "Format Specification Mini-Language"); if it is not set but

367 float_format is set, use float_format. Otherwise, str() is used.

368 use_field_number: If True, print field numbers instead of names.

369 descriptor_pool: A DescriptorPool used to resolve Any types.

370 message_formatter: A function(message, indent, as_one_line): unicode|None

371 to custom format selected sub-messages (usually based on message type).

372 Use to pretty print parts of the protobuf for easier diffing.

373 print_unknown_fields: If True, unknown fields will be printed.

374 force_colon: If set, a colon will be added after the field name even if

375 the field is a proto message.

376 """

377 self.out = out

378 self.indent = indent

379 self.as_utf8 = as_utf8

380 self.as_one_line = as_one_line

381 self.use_short_repeated_primitives = use_short_repeated_primitives

382 self.pointy_brackets = pointy_brackets

383 self.use_index_order = use_index_order

384 self.float_format = float_format

385 if double_format is not None:

386 self.double_format = double_format

387 else:

388 self.double_format = float_format

389 self.use_field_number = use_field_number

390 self.descriptor_pool = descriptor_pool

391 self.message_formatter = message_formatter

392 self.print_unknown_fields = print_unknown_fields

393 self.force_colon = force_colon

394

395 def _TryPrintAsAnyMessage(self, message):

396 """Serializes if message is a google.protobuf.Any field."""

397 if '/' not in message.type_url:

398 return False

399 packed_message = _BuildMessageFromTypeName(message.TypeName(),

400 self.descriptor_pool)

401 if packed_message:

402 packed_message.MergeFromString(message.value)

403 colon = ':' if self.force_colon else ''

404 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon))

405 self._PrintMessageFieldValue(packed_message)

406 self.out.write(' ' if self.as_one_line else '\n')

407 return True

408 else:

409 return False

410

411 def _TryCustomFormatMessage(self, message):

412 formatted = self.message_formatter(message, self.indent, self.as_one_line)

413 if formatted is None:

414 return False

415

416 out = self.out

417 out.write(' ' * self.indent)

418 out.write(formatted)

419 out.write(' ' if self.as_one_line else '\n')

420 return True

421

422 def PrintMessage(self, message):

423 """Convert protobuf message to text format.

424

425 Args:

426 message: The protocol buffers message.

427 """

428 if self.message_formatter and self._TryCustomFormatMessage(message):

429 return

430 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and

431 self._TryPrintAsAnyMessage(message)):

432 return

433 fields = message.ListFields()

434 if self.use_index_order:

435 fields.sort(

436 key=lambda x: x[0].number if x[0].is_extension else x[0].index)

437 for field, value in fields:

438 if _IsMapEntry(field):

439 for key in sorted(value):

440 # This is slow for maps with submessage entries because it copies the

441 # entire tree. Unfortunately this would take significant refactoring

442 # of this file to work around.

443 #

444 # TODO: refactor and optimize if this becomes an issue.

445 entry_submsg = value.GetEntryClass()(key=key, value=value[key])

446 self.PrintField(field, entry_submsg)

447 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

448 if (self.use_short_repeated_primitives

449 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE

450 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING):

451 self._PrintShortRepeatedPrimitivesValue(field, value)

452 else:

453 for element in value:

454 self.PrintField(field, element)

455 else:

456 self.PrintField(field, value)

457

458 if self.print_unknown_fields:

459 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message))

460

461 def _PrintUnknownFields(self, unknown_field_set):

462 """Print unknown fields."""

463 out = self.out

464 for field in unknown_field_set:

465 out.write(' ' * self.indent)

466 out.write(str(field.field_number))

467 if field.wire_type == WIRETYPE_START_GROUP:

468 if self.as_one_line:

469 out.write(' { ')

470 else:

471 out.write(' {\n')

472 self.indent += 2

473

474 self._PrintUnknownFields(field.data)

475

476 if self.as_one_line:

477 out.write('} ')

478 else:

479 self.indent -= 2

480 out.write(' ' * self.indent + '}\n')

481 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED:

482 try:

483 # If this field is parseable as a Message, it is probably

484 # an embedded message.

485 # pylint: disable=protected-access

486 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet(

487 memoryview(field.data), 0, len(field.data))

488 except Exception: # pylint: disable=broad-except

489 pos = 0

490

491 if pos == len(field.data):

492 if self.as_one_line:

493 out.write(' { ')

494 else:

495 out.write(' {\n')

496 self.indent += 2

497

498 self._PrintUnknownFields(embedded_unknown_message)

499

500 if self.as_one_line:

501 out.write('} ')

502 else:

503 self.indent -= 2

504 out.write(' ' * self.indent + '}\n')

505 else:

506 # A string or bytes field. self.as_utf8 may not work.

507 out.write(': \"')

508 out.write(text_encoding.CEscape(field.data, False))

509 out.write('\" ' if self.as_one_line else '\"\n')

510 else:

511 # varint, fixed32, fixed64

512 out.write(': ')

513 out.write(str(field.data))

514 out.write(' ' if self.as_one_line else '\n')

515

516 def _PrintFieldName(self, field):

517 """Print field name."""

518 out = self.out

519 out.write(' ' * self.indent)

520 if self.use_field_number:

521 out.write(str(field.number))

522 else:

523 if field.is_extension:

524 out.write('[')

525 if (field.containing_type.GetOptions().message_set_wire_format and

526 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

527 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):

528 out.write(field.message_type.full_name)

529 else:

530 out.write(field.full_name)

531 out.write(']')

532 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:

533 # For groups, use the capitalized name.

534 out.write(field.message_type.name)

535 else:

536 out.write(field.name)

537

538 if (self.force_colon or

539 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):

540 # The colon is optional in this case, but our cross-language golden files

541 # don't include it. Here, the colon is only included if force_colon is

542 # set to True

543 out.write(':')

544

545 def PrintField(self, field, value):

546 """Print a single field name/value pair."""

547 self._PrintFieldName(field)

548 self.out.write(' ')

549 self.PrintFieldValue(field, value)

550 self.out.write(' ' if self.as_one_line else '\n')

551

552 def _PrintShortRepeatedPrimitivesValue(self, field, value):

553 """"Prints short repeated primitives value."""

554 # Note: this is called only when value has at least one element.

555 self._PrintFieldName(field)

556 self.out.write(' [')

557 for i in range(len(value) - 1):

558 self.PrintFieldValue(field, value[i])

559 self.out.write(', ')

560 self.PrintFieldValue(field, value[-1])

561 self.out.write(']')

562 self.out.write(' ' if self.as_one_line else '\n')

563

564 def _PrintMessageFieldValue(self, value):

565 if self.pointy_brackets:

566 openb = '<'

567 closeb = '>'

568 else:

569 openb = '{'

570 closeb = '}'

571

572 if self.as_one_line:

573 self.out.write('%s ' % openb)

574 self.PrintMessage(value)

575 self.out.write(closeb)

576 else:

577 self.out.write('%s\n' % openb)

578 self.indent += 2

579 self.PrintMessage(value)

580 self.indent -= 2

581 self.out.write(' ' * self.indent + closeb)

582

583 def PrintFieldValue(self, field, value):

584 """Print a single field value (not including name).

585

586 For repeated fields, the value should be a single element.

587

588 Args:

589 field: The descriptor of the field to be printed.

590 value: The value of the field.

591 """

592 out = self.out

593 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

594 self._PrintMessageFieldValue(value)

595 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

596 enum_value = field.enum_type.values_by_number.get(value, None)

597 if enum_value is not None:

598 out.write(enum_value.name)

599 else:

600 out.write(str(value))

601 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

602 out.write('\"')

603 if isinstance(value, str) and not self.as_utf8:

604 out_value = value.encode('utf-8')

605 else:

606 out_value = value

607 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:

608 # We always need to escape all binary data in TYPE_BYTES fields.

609 out_as_utf8 = False

610 else:

611 out_as_utf8 = self.as_utf8

612 out.write(text_encoding.CEscape(out_value, out_as_utf8))

613 out.write('\"')

614 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:

615 if value:

616 out.write('true')

617 else:

618 out.write('false')

619 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:

620 if self.float_format is not None:

621 out.write('{1:{0}}'.format(self.float_format, value))

622 else:

623 if math.isnan(value):

624 out.write(str(value))

625 else:

626 out.write(str(type_checkers.ToShortestFloat(value)))

627 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and

628 self.double_format is not None):

629 out.write('{1:{0}}'.format(self.double_format, value))

630 else:

631 out.write(str(value))

632

633

634def Parse(text,

635 message,

636 allow_unknown_extension=False,

637 allow_field_number=False,

638 descriptor_pool=None,

639 allow_unknown_field=False):

640 """Parses a text representation of a protocol message into a message.

641

642 NOTE: for historical reasons this function does not clear the input

643 message. This is different from what the binary msg.ParseFrom(...) does.

644 If text contains a field already set in message, the value is appended if the

645 field is repeated. Otherwise, an error is raised.

646

647 Example::

648

649 a = MyProto()

650 a.repeated_field.append('test')

651 b = MyProto()

652

653 # Repeated fields are combined

654 text_format.Parse(repr(a), b)

655 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]

656

657 # Non-repeated fields cannot be overwritten

658 a.singular_field = 1

659 b.singular_field = 2

660 text_format.Parse(repr(a), b) # ParseError

661

662 # Binary version:

663 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"

664

665 Caller is responsible for clearing the message as needed.

666

667 Args:

668 text (str): Message text representation.

669 message (Message): A protocol buffer message to merge into.

670 allow_unknown_extension: if True, skip over missing extensions and keep

671 parsing

672 allow_field_number: if True, both field number and field name are allowed.

673 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

674 allow_unknown_field: if True, skip over unknown field and keep

675 parsing. Avoid to use this option if possible. It may hide some

676 errors (e.g. spelling error on field name)

677

678 Returns:

679 Message: The same message passed as argument.

680

681 Raises:

682 ParseError: On text parsing problems.

683 """

684 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'),

685 message,

686 allow_unknown_extension,

687 allow_field_number,

688 descriptor_pool=descriptor_pool,

689 allow_unknown_field=allow_unknown_field)

690

691

692def Merge(text,

693 message,

694 allow_unknown_extension=False,

695 allow_field_number=False,

696 descriptor_pool=None,

697 allow_unknown_field=False):

698 """Parses a text representation of a protocol message into a message.

699

700 Like Parse(), but allows repeated values for a non-repeated field, and uses

701 the last one. This means any non-repeated, top-level fields specified in text

702 replace those in the message.

703

704 Args:

705 text (str): Message text representation.

706 message (Message): A protocol buffer message to merge into.

707 allow_unknown_extension: if True, skip over missing extensions and keep

708 parsing

709 allow_field_number: if True, both field number and field name are allowed.

710 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

711 allow_unknown_field: if True, skip over unknown field and keep

712 parsing. Avoid to use this option if possible. It may hide some

713 errors (e.g. spelling error on field name)

714

715 Returns:

716 Message: The same message passed as argument.

717

718 Raises:

719 ParseError: On text parsing problems.

720 """

721 return MergeLines(

722 text.split(b'\n' if isinstance(text, bytes) else u'\n'),

723 message,

724 allow_unknown_extension,

725 allow_field_number,

726 descriptor_pool=descriptor_pool,

727 allow_unknown_field=allow_unknown_field)

728

729

730def ParseLines(lines,

731 message,

732 allow_unknown_extension=False,

733 allow_field_number=False,

734 descriptor_pool=None,

735 allow_unknown_field=False):

736 """Parses a text representation of a protocol message into a message.

737

738 See Parse() for caveats.

739

740 Args:

741 lines: An iterable of lines of a message's text representation.

742 message: A protocol buffer message to merge into.

743 allow_unknown_extension: if True, skip over missing extensions and keep

744 parsing

745 allow_field_number: if True, both field number and field name are allowed.

746 descriptor_pool: A DescriptorPool used to resolve Any types.

747 allow_unknown_field: if True, skip over unknown field and keep

748 parsing. Avoid to use this option if possible. It may hide some

749 errors (e.g. spelling error on field name)

750

751 Returns:

752 The same message passed as argument.

753

754 Raises:

755 ParseError: On text parsing problems.

756 """

757 parser = _Parser(allow_unknown_extension,

758 allow_field_number,

759 descriptor_pool=descriptor_pool,

760 allow_unknown_field=allow_unknown_field)

761 return parser.ParseLines(lines, message)

762

763

764def MergeLines(lines,

765 message,

766 allow_unknown_extension=False,

767 allow_field_number=False,

768 descriptor_pool=None,

769 allow_unknown_field=False):

770 """Parses a text representation of a protocol message into a message.

771

772 See Merge() for more details.

773

774 Args:

775 lines: An iterable of lines of a message's text representation.

776 message: A protocol buffer message to merge into.

777 allow_unknown_extension: if True, skip over missing extensions and keep

778 parsing

779 allow_field_number: if True, both field number and field name are allowed.

780 descriptor_pool: A DescriptorPool used to resolve Any types.

781 allow_unknown_field: if True, skip over unknown field and keep

782 parsing. Avoid to use this option if possible. It may hide some

783 errors (e.g. spelling error on field name)

784

785 Returns:

786 The same message passed as argument.

787

788 Raises:

789 ParseError: On text parsing problems.

790 """

791 parser = _Parser(allow_unknown_extension,

792 allow_field_number,

793 descriptor_pool=descriptor_pool,

794 allow_unknown_field=allow_unknown_field)

795 return parser.MergeLines(lines, message)

796

797

798class _Parser(object):

799 """Text format parser for protocol message."""

800

801 def __init__(self,

802 allow_unknown_extension=False,

803 allow_field_number=False,

804 descriptor_pool=None,

805 allow_unknown_field=False):

806 self.allow_unknown_extension = allow_unknown_extension

807 self.allow_field_number = allow_field_number

808 self.descriptor_pool = descriptor_pool

809 self.allow_unknown_field = allow_unknown_field

810

811 def ParseLines(self, lines, message):

812 """Parses a text representation of a protocol message into a message."""

813 self._allow_multiple_scalars = False

814 self._ParseOrMerge(lines, message)

815 return message

816

817 def MergeLines(self, lines, message):

818 """Merges a text representation of a protocol message into a message."""

819 self._allow_multiple_scalars = True

820 self._ParseOrMerge(lines, message)

821 return message

822

823 def _ParseOrMerge(self, lines, message):

824 """Converts a text representation of a protocol message into a message.

825

826 Args:

827 lines: Lines of a message's text representation.

828 message: A protocol buffer message to merge into.

829

830 Raises:

831 ParseError: On text parsing problems.

832 """

833 # Tokenize expects native str lines.

834 try:

835 str_lines = (

836 line if isinstance(line, str) else line.decode('utf-8')

837 for line in lines)

838 tokenizer = Tokenizer(str_lines)

839 except UnicodeDecodeError as e:

840 raise ParseError from e

841 if message:

842 self.root_type = message.DESCRIPTOR.full_name

843 while not tokenizer.AtEnd():

844 self._MergeField(tokenizer, message)

845

846 def _MergeField(self, tokenizer, message):

847 """Merges a single protocol message field into a message.

848

849 Args:

850 tokenizer: A tokenizer to parse the field name and values.

851 message: A protocol message to record the data.

852

853 Raises:

854 ParseError: In case of text parsing problems.

855 """

856 message_descriptor = message.DESCRIPTOR

857 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and

858 tokenizer.TryConsume('[')):

859 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)

860 tokenizer.Consume(']')

861 tokenizer.TryConsume(':')

862 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

863 type_url_prefix + '/' + packed_type_name)

864 if tokenizer.TryConsume('<'):

865 expanded_any_end_token = '>'

866 else:

867 tokenizer.Consume('{')

868 expanded_any_end_token = '}'

869 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,

870 self.descriptor_pool)

871 # Direct comparison with None is used instead of implicit bool conversion

872 # to avoid false positives with falsy initial values, e.g. for

873 # google.protobuf.ListValue.

874 if expanded_any_sub_message is None:

875 raise ParseError('Type %s not found in descriptor pool' %

876 packed_type_name)

877 while not tokenizer.TryConsume(expanded_any_end_token):

878 if tokenizer.AtEnd():

879 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %

880 (expanded_any_end_token,))

881 self._MergeField(tokenizer, expanded_any_sub_message)

882 deterministic = False

883

884 message.Pack(expanded_any_sub_message,

885 type_url_prefix=type_url_prefix,

886 deterministic=deterministic)

887 return

888

889 if tokenizer.TryConsume('['):

890 name = [tokenizer.ConsumeIdentifier()]

891 while tokenizer.TryConsume('.'):

892 name.append(tokenizer.ConsumeIdentifier())

893 name = '.'.join(name)

894

895 if not message_descriptor.is_extendable:

896 raise tokenizer.ParseErrorPreviousToken(

897 'Message type "%s" does not have extensions.' %

898 message_descriptor.full_name)

899 # pylint: disable=protected-access

900 field = message.Extensions._FindExtensionByName(name)

901 # pylint: enable=protected-access

902 if not field:

903 if self.allow_unknown_extension:

904 field = None

905 else:

906 raise tokenizer.ParseErrorPreviousToken(

907 'Extension "%s" not registered. '

908 'Did you import the _pb2 module which defines it? '

909 'If you are trying to place the extension in the MessageSet '

910 'field of another message that is in an Any or MessageSet field, '

911 'that message\'s _pb2 module must be imported as well' % name)

912 elif message_descriptor != field.containing_type:

913 raise tokenizer.ParseErrorPreviousToken(

914 'Extension "%s" does not extend message type "%s".' %

915 (name, message_descriptor.full_name))

916

917 tokenizer.Consume(']')

918

919 else:

920 name = tokenizer.ConsumeIdentifierOrNumber()

921 if self.allow_field_number and name.isdigit():

922 number = ParseInteger(name, True, True)

923 field = message_descriptor.fields_by_number.get(number, None)

924 if not field and message_descriptor.is_extendable:

925 field = message.Extensions._FindExtensionByNumber(number)

926 else:

927 field = message_descriptor.fields_by_name.get(name, None)

928

929 # Group names are expected to be capitalized as they appear in the

930 # .proto file, which actually matches their type names, not their field

931 # names.

932 if not field:

933 field = message_descriptor.fields_by_name.get(name.lower(), None)

934 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:

935 field = None

936

937 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and

938 field.message_type.name != name):

939 field = None

940

941 if not field and not self.allow_unknown_field:

942 raise tokenizer.ParseErrorPreviousToken(

943 'Message type "%s" has no field named "%s".' %

944 (message_descriptor.full_name, name))

945

946 if field:

947 if not self._allow_multiple_scalars and field.containing_oneof:

948 # Check if there's a different field set in this oneof.

949 # Note that we ignore the case if the same field was set before, and we

950 # apply _allow_multiple_scalars to non-scalar fields as well.

951 which_oneof = message.WhichOneof(field.containing_oneof.name)

952 if which_oneof is not None and which_oneof != field.name:

953 raise tokenizer.ParseErrorPreviousToken(

954 'Field "%s" is specified along with field "%s", another member '

955 'of oneof "%s" for message type "%s".' %

956 (field.name, which_oneof, field.containing_oneof.name,

957 message_descriptor.full_name))

958

959 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

960 tokenizer.TryConsume(':')

961 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

962 field.full_name)

963 merger = self._MergeMessageField

964 else:

965 tokenizer.Consume(':')

966 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

967 field.full_name)

968 merger = self._MergeScalarField

969

970 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and

971 tokenizer.TryConsume('[')):

972 # Short repeated format, e.g. "foo: [1, 2, 3]"

973 if not tokenizer.TryConsume(']'):

974 while True:

975 merger(tokenizer, message, field)

976 if tokenizer.TryConsume(']'):

977 break

978 tokenizer.Consume(',')

979

980 else:

981 merger(tokenizer, message, field)

982

983 else: # Proto field is unknown.

984 assert (self.allow_unknown_extension or self.allow_unknown_field)

985 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name)

986

987 # For historical reasons, fields may optionally be separated by commas or

988 # semicolons.

989 if not tokenizer.TryConsume(','):

990 tokenizer.TryConsume(';')

991

992 def _LogSilentMarker(self, immediate_message_type, field_name):

993 pass

994

995 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name):

996 if tokenizer.contains_silent_marker_before_current_token:

997 self._LogSilentMarker(immediate_message_type, field_name)

998

999 def _ConsumeAnyTypeUrl(self, tokenizer):

1000 """Consumes a google.protobuf.Any type URL and returns the type name."""

1001 # Consume "type.googleapis.com/".

1002 prefix = [tokenizer.ConsumeIdentifier()]

1003 tokenizer.Consume('.')

1004 prefix.append(tokenizer.ConsumeIdentifier())

1005 tokenizer.Consume('.')

1006 prefix.append(tokenizer.ConsumeIdentifier())

1007 tokenizer.Consume('/')

1008 # Consume the fully-qualified type name.

1009 name = [tokenizer.ConsumeIdentifier()]

1010 while tokenizer.TryConsume('.'):

1011 name.append(tokenizer.ConsumeIdentifier())

1012 return '.'.join(prefix), '.'.join(name)

1013

1014 def _MergeMessageField(self, tokenizer, message, field):

1015 """Merges a single scalar field into a message.

1016

1017 Args:

1018 tokenizer: A tokenizer to parse the field value.

1019 message: The message of which field is a member.

1020 field: The descriptor of the field to be merged.

1021

1022 Raises:

1023 ParseError: In case of text parsing problems.

1024 """

1025 is_map_entry = _IsMapEntry(field)

1026

1027 if tokenizer.TryConsume('<'):

1028 end_token = '>'

1029 else:

1030 tokenizer.Consume('{')

1031 end_token = '}'

1032

1033 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

1034 if field.is_extension:

1035 sub_message = message.Extensions[field].add()

1036 elif is_map_entry:

1037 sub_message = getattr(message, field.name).GetEntryClass()()

1038 else:

1039 sub_message = getattr(message, field.name).add()

1040 else:

1041 if field.is_extension:

1042 if (not self._allow_multiple_scalars and

1043 message.HasExtension(field)):

1044 raise tokenizer.ParseErrorPreviousToken(

1045 'Message type "%s" should not have multiple "%s" extensions.' %

1046 (message.DESCRIPTOR.full_name, field.full_name))

1047 sub_message = message.Extensions[field]

1048 else:

1049 # Also apply _allow_multiple_scalars to message field.

1050 # TODO: Change to _allow_singular_overwrites.

1051 if (not self._allow_multiple_scalars and

1052 message.HasField(field.name)):

1053 raise tokenizer.ParseErrorPreviousToken(

1054 'Message type "%s" should not have multiple "%s" fields.' %

1055 (message.DESCRIPTOR.full_name, field.name))

1056 sub_message = getattr(message, field.name)

1057 sub_message.SetInParent()

1058

1059 while not tokenizer.TryConsume(end_token):

1060 if tokenizer.AtEnd():

1061 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))

1062 self._MergeField(tokenizer, sub_message)

1063

1064 if is_map_entry:

1065 value_cpptype = field.message_type.fields_by_name['value'].cpp_type

1066 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

1067 value = getattr(message, field.name)[sub_message.key]

1068 value.CopyFrom(sub_message.value)

1069 else:

1070 getattr(message, field.name)[sub_message.key] = sub_message.value

1071

1072 def _MergeScalarField(self, tokenizer, message, field):

1073 """Merges a single scalar field into a message.

1074

1075 Args:

1076 tokenizer: A tokenizer to parse the field value.

1077 message: A protocol message to record the data.

1078 field: The descriptor of the field to be merged.

1079

1080 Raises:

1081 ParseError: In case of text parsing problems.

1082 RuntimeError: On runtime errors.

1083 """

1084 _ = self.allow_unknown_extension

1085 value = None

1086

1087 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

1088 descriptor.FieldDescriptor.TYPE_SINT32,

1089 descriptor.FieldDescriptor.TYPE_SFIXED32):

1090 value = _ConsumeInt32(tokenizer)

1091 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

1092 descriptor.FieldDescriptor.TYPE_SINT64,

1093 descriptor.FieldDescriptor.TYPE_SFIXED64):

1094 value = _ConsumeInt64(tokenizer)

1095 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

1096 descriptor.FieldDescriptor.TYPE_FIXED32):

1097 value = _ConsumeUint32(tokenizer)

1098 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

1099 descriptor.FieldDescriptor.TYPE_FIXED64):

1100 value = _ConsumeUint64(tokenizer)

1101 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

1102 descriptor.FieldDescriptor.TYPE_DOUBLE):

1103 value = tokenizer.ConsumeFloat()

1104 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

1105 value = tokenizer.ConsumeBool()

1106 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

1107 value = tokenizer.ConsumeString()

1108 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

1109 value = tokenizer.ConsumeByteString()

1110 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

1111 value = tokenizer.ConsumeEnum(field)

1112 else:

1113 raise RuntimeError('Unknown field type %d' % field.type)

1114

1115 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

1116 if field.is_extension:

1117 message.Extensions[field].append(value)

1118 else:

1119 getattr(message, field.name).append(value)

1120 else:

1121 if field.is_extension:

1122 if (not self._allow_multiple_scalars and

1123 field.has_presence and

1124 message.HasExtension(field)):

1125 raise tokenizer.ParseErrorPreviousToken(

1126 'Message type "%s" should not have multiple "%s" extensions.' %

1127 (message.DESCRIPTOR.full_name, field.full_name))

1128 else:

1129 message.Extensions[field] = value

1130 else:

1131 duplicate_error = False

1132 if not self._allow_multiple_scalars:

1133 if field.has_presence:

1134 duplicate_error = message.HasField(field.name)

1135 else:

1136 # For field that doesn't represent presence, try best effort to

1137 # check multiple scalars by compare to default values.

1138 duplicate_error = bool(getattr(message, field.name))

1139

1140 if duplicate_error:

1141 raise tokenizer.ParseErrorPreviousToken(

1142 'Message type "%s" should not have multiple "%s" fields.' %

1143 (message.DESCRIPTOR.full_name, field.name))

1144 else:

1145 setattr(message, field.name, value)

1146

1147 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type):

1148 """Skips over contents (value or message) of a field.

1149

1150 Args:

1151 tokenizer: A tokenizer to parse the field name and values.

1152 field_name: The field name currently being parsed.

1153 immediate_message_type: The type of the message immediately containing

1154 the silent marker.

1155 """

1156 # Try to guess the type of this field.

1157 # If this field is not a message, there should be a ":" between the

1158 # field name and the field value and also the field value should not

1159 # start with "{" or "<" which indicates the beginning of a message body.

1160 # If there is no ":" or there is a "{" or "<" after ":", this field has

1161 # to be a message or the input is ill-formed.

1162 if tokenizer.TryConsume(

1163 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'):

1164 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name)

1165 if tokenizer.LookingAt('['):

1166 self._SkipRepeatedFieldValue(tokenizer)

1167 else:

1168 self._SkipFieldValue(tokenizer)

1169 else:

1170 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name)

1171 self._SkipFieldMessage(tokenizer, immediate_message_type)

1172

1173 def _SkipField(self, tokenizer, immediate_message_type):

1174 """Skips over a complete field (name and value/message).

1175

1176 Args:

1177 tokenizer: A tokenizer to parse the field name and values.

1178 immediate_message_type: The type of the message immediately containing

1179 the silent marker.

1180 """

1181 field_name = ''

1182 if tokenizer.TryConsume('['):

1183 # Consume extension or google.protobuf.Any type URL

1184 field_name += '[' + tokenizer.ConsumeIdentifier()

1185 num_identifiers = 1

1186 while tokenizer.TryConsume('.'):

1187 field_name += '.' + tokenizer.ConsumeIdentifier()

1188 num_identifiers += 1

1189 # This is possibly a type URL for an Any message.

1190 if num_identifiers == 3 and tokenizer.TryConsume('/'):

1191 field_name += '/' + tokenizer.ConsumeIdentifier()

1192 while tokenizer.TryConsume('.'):

1193 field_name += '.' + tokenizer.ConsumeIdentifier()

1194 tokenizer.Consume(']')

1195 field_name += ']'

1196 else:

1197 field_name += tokenizer.ConsumeIdentifierOrNumber()

1198

1199 self._SkipFieldContents(tokenizer, field_name, immediate_message_type)

1200

1201 # For historical reasons, fields may optionally be separated by commas or

1202 # semicolons.

1203 if not tokenizer.TryConsume(','):

1204 tokenizer.TryConsume(';')

1205

1206 def _SkipFieldMessage(self, tokenizer, immediate_message_type):

1207 """Skips over a field message.

1208

1209 Args:

1210 tokenizer: A tokenizer to parse the field name and values.

1211 immediate_message_type: The type of the message immediately containing

1212 the silent marker

1213 """

1214 if tokenizer.TryConsume('<'):

1215 delimiter = '>'

1216 else:

1217 tokenizer.Consume('{')

1218 delimiter = '}'

1219

1220 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):

1221 self._SkipField(tokenizer, immediate_message_type)

1222

1223 tokenizer.Consume(delimiter)

1224

1225 def _SkipFieldValue(self, tokenizer):

1226 """Skips over a field value.

1227

1228 Args:

1229 tokenizer: A tokenizer to parse the field name and values.

1230

1231 Raises:

1232 ParseError: In case an invalid field value is found.

1233 """

1234 if (not tokenizer.TryConsumeByteString()and

1235 not tokenizer.TryConsumeIdentifier() and

1236 not _TryConsumeInt64(tokenizer) and

1237 not _TryConsumeUint64(tokenizer) and

1238 not tokenizer.TryConsumeFloat()):

1239 raise ParseError('Invalid field value: ' + tokenizer.token)

1240

1241 def _SkipRepeatedFieldValue(self, tokenizer):

1242 """Skips over a repeated field value.

1243

1244 Args:

1245 tokenizer: A tokenizer to parse the field value.

1246 """

1247 tokenizer.Consume('[')

1248 if not tokenizer.LookingAt(']'):

1249 self._SkipFieldValue(tokenizer)

1250 while tokenizer.TryConsume(','):

1251 self._SkipFieldValue(tokenizer)

1252 tokenizer.Consume(']')

1253

1254

1255class Tokenizer(object):

1256 """Protocol buffer text representation tokenizer.

1257

1258 This class handles the lower level string parsing by splitting it into

1259 meaningful tokens.

1260

1261 It was directly ported from the Java protocol buffer API.

1262 """

1263

1264 _WHITESPACE = re.compile(r'\s+')

1265 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)

1266 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)

1267 _TOKEN = re.compile('|'.join([

1268 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier

1269 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number

1270 ] + [ # quoted str for each quote mark

1271 # Avoid backtracking! https://stackoverflow.com/a/844267

1272 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark)

1273 for mark in _QUOTES

1274 ]))

1275

1276 _IDENTIFIER = re.compile(r'[^\d\W]\w*')

1277 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')

1278

1279 def __init__(self, lines, skip_comments=True):

1280 self._position = 0

1281 self._line = -1

1282 self._column = 0

1283 self._token_start = None

1284 self.token = ''

1285 self._lines = iter(lines)

1286 self._current_line = ''

1287 self._previous_line = 0

1288 self._previous_column = 0

1289 self._more_lines = True

1290 self._skip_comments = skip_comments

1291 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT

1292 or self._WHITESPACE)

1293 self.contains_silent_marker_before_current_token = False

1294

1295 self._SkipWhitespace()

1296 self.NextToken()

1297

1298 def LookingAt(self, token):

1299 return self.token == token

1300

1301 def AtEnd(self):

1302 """Checks the end of the text was reached.

1303

1304 Returns:

1305 True iff the end was reached.

1306 """

1307 return not self.token

1308

1309 def _PopLine(self):

1310 while len(self._current_line) <= self._column:

1311 try:

1312 self._current_line = next(self._lines)

1313 except StopIteration:

1314 self._current_line = ''

1315 self._more_lines = False

1316 return

1317 else:

1318 self._line += 1

1319 self._column = 0

1320

1321 def _SkipWhitespace(self):

1322 while True:

1323 self._PopLine()

1324 match = self._whitespace_pattern.match(self._current_line, self._column)

1325 if not match:

1326 break

1327 self.contains_silent_marker_before_current_token = match.group(0) == (

1328 ' ' + _DEBUG_STRING_SILENT_MARKER)

1329 length = len(match.group(0))

1330 self._column += length

1331

1332 def TryConsume(self, token):

1333 """Tries to consume a given piece of text.

1334

1335 Args:

1336 token: Text to consume.

1337

1338 Returns:

1339 True iff the text was consumed.

1340 """

1341 if self.token == token:

1342 self.NextToken()

1343 return True

1344 return False

1345

1346 def Consume(self, token):

1347 """Consumes a piece of text.

1348

1349 Args:

1350 token: Text to consume.

1351

1352 Raises:

1353 ParseError: If the text couldn't be consumed.

1354 """

1355 if not self.TryConsume(token):

1356 raise self.ParseError('Expected "%s".' % token)

1357

1358 def ConsumeComment(self):

1359 result = self.token

1360 if not self._COMMENT.match(result):

1361 raise self.ParseError('Expected comment.')

1362 self.NextToken()

1363 return result

1364

1365 def ConsumeCommentOrTrailingComment(self):

1366 """Consumes a comment, returns a 2-tuple (trailing bool, comment str)."""

1367

1368 # Tokenizer initializes _previous_line and _previous_column to 0. As the

1369 # tokenizer starts, it looks like there is a previous token on the line.

1370 just_started = self._line == 0 and self._column == 0

1371

1372 before_parsing = self._previous_line

1373 comment = self.ConsumeComment()

1374

1375 # A trailing comment is a comment on the same line than the previous token.

1376 trailing = (self._previous_line == before_parsing

1377 and not just_started)

1378

1379 return trailing, comment

1380

1381 def TryConsumeIdentifier(self):

1382 try:

1383 self.ConsumeIdentifier()

1384 return True

1385 except ParseError:

1386 return False

1387

1388 def ConsumeIdentifier(self):

1389 """Consumes protocol message field identifier.

1390

1391 Returns:

1392 Identifier string.

1393

1394 Raises:

1395 ParseError: If an identifier couldn't be consumed.

1396 """

1397 result = self.token

1398 if not self._IDENTIFIER.match(result):

1399 raise self.ParseError('Expected identifier.')

1400 self.NextToken()

1401 return result

1402

1403 def TryConsumeIdentifierOrNumber(self):

1404 try:

1405 self.ConsumeIdentifierOrNumber()

1406 return True

1407 except ParseError:

1408 return False

1409

1410 def ConsumeIdentifierOrNumber(self):

1411 """Consumes protocol message field identifier.

1412

1413 Returns:

1414 Identifier string.

1415

1416 Raises:

1417 ParseError: If an identifier couldn't be consumed.

1418 """

1419 result = self.token

1420 if not self._IDENTIFIER_OR_NUMBER.match(result):

1421 raise self.ParseError('Expected identifier or number, got %s.' % result)

1422 self.NextToken()

1423 return result

1424

1425 def TryConsumeInteger(self):

1426 try:

1427 self.ConsumeInteger()

1428 return True

1429 except ParseError:

1430 return False

1431

1432 def ConsumeInteger(self):

1433 """Consumes an integer number.

1434

1435 Returns:

1436 The integer parsed.

1437

1438 Raises:

1439 ParseError: If an integer couldn't be consumed.

1440 """

1441 try:

1442 result = _ParseAbstractInteger(self.token)

1443 except ValueError as e:

1444 raise self.ParseError(str(e))

1445 self.NextToken()

1446 return result

1447

1448 def TryConsumeFloat(self):

1449 try:

1450 self.ConsumeFloat()

1451 return True

1452 except ParseError:

1453 return False

1454

1455 def ConsumeFloat(self):

1456 """Consumes an floating point number.

1457

1458 Returns:

1459 The number parsed.

1460

1461 Raises:

1462 ParseError: If a floating point number couldn't be consumed.

1463 """

1464 try:

1465 result = ParseFloat(self.token)

1466 except ValueError as e:

1467 raise self.ParseError(str(e))

1468 self.NextToken()

1469 return result

1470

1471 def ConsumeBool(self):

1472 """Consumes a boolean value.

1473

1474 Returns:

1475 The bool parsed.

1476

1477 Raises:

1478 ParseError: If a boolean value couldn't be consumed.

1479 """

1480 try:

1481 result = ParseBool(self.token)

1482 except ValueError as e:

1483 raise self.ParseError(str(e))

1484 self.NextToken()

1485 return result

1486

1487 def TryConsumeByteString(self):

1488 try:

1489 self.ConsumeByteString()

1490 return True

1491 except ParseError:

1492 return False

1493

1494 def ConsumeString(self):

1495 """Consumes a string value.

1496

1497 Returns:

1498 The string parsed.

1499

1500 Raises:

1501 ParseError: If a string value couldn't be consumed.

1502 """

1503 the_bytes = self.ConsumeByteString()

1504 try:

1505 return str(the_bytes, 'utf-8')

1506 except UnicodeDecodeError as e:

1507 raise self._StringParseError(e)

1508

1509 def ConsumeByteString(self):

1510 """Consumes a byte array value.

1511

1512 Returns:

1513 The array parsed (as a string).

1514

1515 Raises:

1516 ParseError: If a byte array value couldn't be consumed.

1517 """

1518 the_list = [self._ConsumeSingleByteString()]

1519 while self.token and self.token[0] in _QUOTES:

1520 the_list.append(self._ConsumeSingleByteString())

1521 return b''.join(the_list)

1522

1523 def _ConsumeSingleByteString(self):

1524 """Consume one token of a string literal.

1525

1526 String literals (whether bytes or text) can come in multiple adjacent

1527 tokens which are automatically concatenated, like in C or Python. This

1528 method only consumes one token.

1529

1530 Returns:

1531 The token parsed.

1532 Raises:

1533 ParseError: When the wrong format data is found.

1534 """

1535 text = self.token

1536 if len(text) < 1 or text[0] not in _QUOTES:

1537 raise self.ParseError('Expected string but found: %r' % (text,))

1538

1539 if len(text) < 2 or text[-1] != text[0]:

1540 raise self.ParseError('String missing ending quote: %r' % (text,))

1541

1542 try:

1543 result = text_encoding.CUnescape(text[1:-1])

1544 except ValueError as e:

1545 raise self.ParseError(str(e))

1546 self.NextToken()

1547 return result

1548

1549 def ConsumeEnum(self, field):

1550 try:

1551 result = ParseEnum(field, self.token)

1552 except ValueError as e:

1553 raise self.ParseError(str(e))

1554 self.NextToken()

1555 return result

1556

1557 def ParseErrorPreviousToken(self, message):

1558 """Creates and *returns* a ParseError for the previously read token.

1559

1560 Args:

1561 message: A message to set for the exception.

1562

1563 Returns:

1564 A ParseError instance.

1565 """

1566 return ParseError(message, self._previous_line + 1,

1567 self._previous_column + 1)

1568

1569 def ParseError(self, message):

1570 """Creates and *returns* a ParseError for the current token."""

1571 return ParseError('\'' + self._current_line + '\': ' + message,

1572 self._line + 1, self._column + 1)

1573

1574 def _StringParseError(self, e):

1575 return self.ParseError('Couldn\'t parse string: ' + str(e))

1576

1577 def NextToken(self):

1578 """Reads the next meaningful token."""

1579 self._previous_line = self._line

1580 self._previous_column = self._column

1581 self.contains_silent_marker_before_current_token = False

1582

1583 self._column += len(self.token)

1584 self._SkipWhitespace()

1585

1586 if not self._more_lines:

1587 self.token = ''

1588 return

1589

1590 match = self._TOKEN.match(self._current_line, self._column)

1591 if not match and not self._skip_comments:

1592 match = self._COMMENT.match(self._current_line, self._column)

1593 if match:

1594 token = match.group(0)

1595 self.token = token

1596 else:

1597 self.token = self._current_line[self._column]

1598

1599# Aliased so it can still be accessed by current visibility violators.

1600# TODO: Migrate violators to textformat_tokenizer.

1601_Tokenizer = Tokenizer # pylint: disable=invalid-name

1602

1603

1604def _ConsumeInt32(tokenizer):

1605 """Consumes a signed 32bit integer number from tokenizer.

1606

1607 Args:

1608 tokenizer: A tokenizer used to parse the number.

1609

1610 Returns:

1611 The integer parsed.

1612

1613 Raises:

1614 ParseError: If a signed 32bit integer couldn't be consumed.

1615 """

1616 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)

1617

1618

1619def _ConsumeUint32(tokenizer):

1620 """Consumes an unsigned 32bit integer number from tokenizer.

1621

1622 Args:

1623 tokenizer: A tokenizer used to parse the number.

1624

1625 Returns:

1626 The integer parsed.

1627

1628 Raises:

1629 ParseError: If an unsigned 32bit integer couldn't be consumed.

1630 """

1631 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)

1632

1633

1634def _TryConsumeInt64(tokenizer):

1635 try:

1636 _ConsumeInt64(tokenizer)

1637 return True

1638 except ParseError:

1639 return False

1640

1641

1642def _ConsumeInt64(tokenizer):

1643 """Consumes a signed 32bit integer number from tokenizer.

1644

1645 Args:

1646 tokenizer: A tokenizer used to parse the number.

1647

1648 Returns:

1649 The integer parsed.

1650

1651 Raises:

1652 ParseError: If a signed 32bit integer couldn't be consumed.

1653 """

1654 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)

1655

1656

1657def _TryConsumeUint64(tokenizer):

1658 try:

1659 _ConsumeUint64(tokenizer)

1660 return True

1661 except ParseError:

1662 return False

1663

1664

1665def _ConsumeUint64(tokenizer):

1666 """Consumes an unsigned 64bit integer number from tokenizer.

1667

1668 Args:

1669 tokenizer: A tokenizer used to parse the number.

1670

1671 Returns:

1672 The integer parsed.

1673

1674 Raises:

1675 ParseError: If an unsigned 64bit integer couldn't be consumed.

1676 """

1677 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)

1678

1679

1680def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):

1681 """Consumes an integer number from tokenizer.

1682

1683 Args:

1684 tokenizer: A tokenizer used to parse the number.

1685 is_signed: True if a signed integer must be parsed.

1686 is_long: True if a long integer must be parsed.

1687

1688 Returns:

1689 The integer parsed.

1690

1691 Raises:

1692 ParseError: If an integer with given characteristics couldn't be consumed.

1693 """

1694 try:

1695 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)

1696 except ValueError as e:

1697 raise tokenizer.ParseError(str(e))

1698 tokenizer.NextToken()

1699 return result

1700

1701

1702def ParseInteger(text, is_signed=False, is_long=False):

1703 """Parses an integer.

1704

1705 Args:

1706 text: The text to parse.

1707 is_signed: True if a signed integer must be parsed.

1708 is_long: True if a long integer must be parsed.

1709

1710 Returns:

1711 The integer value.

1712

1713 Raises:

1714 ValueError: Thrown Iff the text is not a valid integer.

1715 """

1716 # Do the actual parsing. Exception handling is propagated to caller.

1717 result = _ParseAbstractInteger(text)

1718

1719 # Check if the integer is sane. Exceptions handled by callers.

1720 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

1721 checker.CheckValue(result)

1722 return result

1723

1724

1725def _ParseAbstractInteger(text):

1726 """Parses an integer without checking size/signedness.

1727

1728 Args:

1729 text: The text to parse.

1730

1731 Returns:

1732 The integer value.

1733

1734 Raises:

1735 ValueError: Thrown Iff the text is not a valid integer.

1736 """

1737 # Do the actual parsing. Exception handling is propagated to caller.

1738 orig_text = text

1739 c_octal_match = re.match(r'(-?)0(\d+)$', text)

1740 if c_octal_match:

1741 # Python 3 no longer supports 0755 octal syntax without the 'o', so

1742 # we always use the '0o' prefix for multi-digit numbers starting with 0.

1743 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2)

1744 try:

1745 return int(text, 0)

1746 except ValueError:

1747 raise ValueError('Couldn\'t parse integer: %s' % orig_text)

1748

1749

1750def ParseFloat(text):

1751 """Parse a floating point number.

1752

1753 Args:

1754 text: Text to parse.

1755

1756 Returns:

1757 The number parsed.

1758

1759 Raises:

1760 ValueError: If a floating point number couldn't be parsed.

1761 """

1762 try:

1763 # Assume Python compatible syntax.

1764 return float(text)

1765 except ValueError:

1766 # Check alternative spellings.

1767 if _FLOAT_INFINITY.match(text):

1768 if text[0] == '-':

1769 return float('-inf')

1770 else:

1771 return float('inf')

1772 elif _FLOAT_NAN.match(text):

1773 return float('nan')

1774 else:

1775 # assume '1.0f' format

1776 try:

1777 return float(text.rstrip('f'))

1778 except ValueError:

1779 raise ValueError('Couldn\'t parse float: %s' % text)

1780

1781

1782def ParseBool(text):

1783 """Parse a boolean value.

1784

1785 Args:

1786 text: Text to parse.

1787

1788 Returns:

1789 Boolean values parsed

1790

1791 Raises:

1792 ValueError: If text is not a valid boolean.

1793 """

1794 if text in ('true', 't', '1', 'True'):

1795 return True

1796 elif text in ('false', 'f', '0', 'False'):

1797 return False

1798 else:

1799 raise ValueError('Expected "true" or "false".')

1800

1801

1802def ParseEnum(field, value):

1803 """Parse an enum value.

1804

1805 The value can be specified by a number (the enum value), or by

1806 a string literal (the enum name).

1807

1808 Args:

1809 field: Enum field descriptor.

1810 value: String value.

1811

1812 Returns:

1813 Enum value number.

1814

1815 Raises:

1816 ValueError: If the enum value could not be parsed.

1817 """

1818 enum_descriptor = field.enum_type

1819 try:

1820 number = int(value, 0)

1821 except ValueError:

1822 # Identifier.

1823 enum_value = enum_descriptor.values_by_name.get(value, None)

1824 if enum_value is None:

1825 raise ValueError('Enum type "%s" has no value named %s.' %

1826 (enum_descriptor.full_name, value))

1827 else:

1828 if not field.enum_type.is_closed:

1829 return number

1830 enum_value = enum_descriptor.values_by_number.get(number, None)

1831 if enum_value is None:

1832 raise ValueError('Enum type "%s" has no value with number %d.' %

1833 (enum_descriptor.full_name, number))

1834 return enum_value.number

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text_format.py: 15%

749 statements