Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text

1# Protocol Buffers - Google's data interchange format

4# Use of this source code is governed by a BSD-style

5# license that can be found in the LICENSE file or at

6# https://developers.google.com/open-source/licenses/bsd

8"""Contains routines for printing protocol messages in text format.

10Simple usage example::

12 # Create a proto object and serialize it to a text proto string.

13 message = my_proto_pb2.MyMessage(foo='bar')

14 text_proto = text_format.MessageToString(message)

16 # Parse a text proto string.

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())

18"""

20__author__ = 'kenton@google.com (Kenton Varda)'

22# TODO Import thread contention leads to test failures.

23import encodings.raw_unicode_escape # pylint: disable=unused-import

24import encodings.unicode_escape # pylint: disable=unused-import

25import io

26import math

27import re

29from google.protobuf.internal import decoder

30from google.protobuf.internal import type_checkers

31from google.protobuf import descriptor

32from google.protobuf import text_encoding

33from google.protobuf import unknown_fields

35# pylint: disable=g-import-not-at-top

36__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField',

37 'PrintFieldValue', 'Merge', 'MessageToBytes']

39_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),

40 type_checkers.Int32ValueChecker(),

41 type_checkers.Uint64ValueChecker(),

42 type_checkers.Int64ValueChecker())

43_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE)

44_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE)

45_QUOTES = frozenset(("'", '"'))

46_ANY_FULL_TYPE_NAME = 'google.protobuf.Any'

47_DEBUG_STRING_SILENT_MARKER = '\t '

49_as_utf8_default = True

52class Error(Exception):

53 """Top-level module error for text_format."""

56class ParseError(Error):

57 """Thrown in case of text parsing or tokenizing error."""

59 def __init__(self, message=None, line=None, column=None):

60 if message is not None and line is not None:

61 loc = str(line)

62 if column is not None:

63 loc += ':{0}'.format(column)

64 message = '{0} : {1}'.format(loc, message)

65 if message is not None:

66 super(ParseError, self).__init__(message)

67 else:

68 super(ParseError, self).__init__()

69 self._line = line

70 self._column = column

72 def GetLine(self):

73 return self._line

75 def GetColumn(self):

76 return self._column

79class TextWriter(object):

81 def __init__(self, as_utf8):

82 self._writer = io.StringIO()

84 def write(self, val):

85 return self._writer.write(val)

87 def close(self):

88 return self._writer.close()

90 def getvalue(self):

91 return self._writer.getvalue()

94def MessageToString(

95 message,

96 as_utf8=_as_utf8_default,

97 as_one_line=False,

98 use_short_repeated_primitives=False,

99 pointy_brackets=False,

100 use_index_order=False,

101 float_format=None,

102 double_format=None,

103 use_field_number=False,

104 descriptor_pool=None,

105 indent=0,

106 message_formatter=None,

107 print_unknown_fields=False,

108 force_colon=False) -> str:

109 """Convert protobuf message to text format.

110

111 Double values can be formatted compactly with 15 digits of

112 precision (which is the most that IEEE 754 "double" can guarantee)

113 using double_format='.15g'. To ensure that converting to text and back to a

114 proto will result in an identical value, double_format='.17g' should be used.

115

116 Args:

117 message: The protocol buffers message.

118 as_utf8: Return unescaped Unicode for non-ASCII characters.

119 as_one_line: Don't introduce newlines between fields.

120 use_short_repeated_primitives: Use short repeated format for primitives.

121 pointy_brackets: If True, use angle brackets instead of curly braces for

122 nesting.

123 use_index_order: If True, fields of a proto message will be printed using

124 the order defined in source code instead of the field number, extensions

125 will be printed at the end of the message and their relative order is

126 determined by the extension number. By default, use the field number

127 order.

128 float_format (str): If set, use this to specify float field formatting

129 (per the "Format Specification Mini-Language"); otherwise, shortest float

130 that has same value in wire will be printed. Also affect double field

131 if double_format is not set but float_format is set.

132 double_format (str): If set, use this to specify double field formatting

133 (per the "Format Specification Mini-Language"); if it is not set but

134 float_format is set, use float_format. Otherwise, use ``str()``

135 use_field_number: If True, print field numbers instead of names.

136 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

137 indent (int): The initial indent level, in terms of spaces, for pretty

138 print.

139 message_formatter (function(message, indent, as_one_line) -> unicode|None):

140 Custom formatter for selected sub-messages (usually based on message

141 type). Use to pretty print parts of the protobuf for easier diffing.

142 print_unknown_fields: If True, unknown fields will be printed.

143 force_colon: If set, a colon will be added after the field name even if the

144 field is a proto message.

145

146 Returns:

147 str: A string of the text formatted protocol buffer message.

148 """

149 out = TextWriter(as_utf8)

150 printer = _Printer(

151 out,

152 indent,

153 as_utf8,

154 as_one_line,

155 use_short_repeated_primitives,

156 pointy_brackets,

157 use_index_order,

158 float_format,

159 double_format,

160 use_field_number,

161 descriptor_pool,

162 message_formatter,

163 print_unknown_fields=print_unknown_fields,

164 force_colon=force_colon)

165 printer.PrintMessage(message)

166 result = out.getvalue()

167 out.close()

168 if as_one_line:

169 return result.rstrip()

170 return result

171

172

173def MessageToBytes(message, **kwargs) -> bytes:

174 """Convert protobuf message to encoded text format. See MessageToString."""

175 text = MessageToString(message, **kwargs)

176 if isinstance(text, bytes):

177 return text

178 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii'

179 return text.encode(codec)

180

181

182def _IsMapEntry(field):

183 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

184 field.message_type.has_options and

185 field.message_type.GetOptions().map_entry)

186

187

188def _IsGroupLike(field):

189 """Determines if a field is consistent with a proto2 group.

190

191 Args:

192 field: The field descriptor.

193

194 Returns:

195 True if this field is group-like, false otherwise.

196 """

197 # Groups are always tag-delimited.

198 if field.type != descriptor.FieldDescriptor.TYPE_GROUP:

199 return False

200

201 # Group fields always are always the lowercase type name.

202 if field.name != field.message_type.name.lower():

203 return False

204

205 if field.message_type.file != field.file:

206 return False

207

208 # Group messages are always defined in the same scope as the field. File

209 # level extensions will compare NULL == NULL here, which is why the file

210 # comparison above is necessary to ensure both come from the same file.

211 return (

212 field.message_type.containing_type == field.extension_scope

213 if field.is_extension

214 else field.message_type.containing_type == field.containing_type

215 )

216

217

218def PrintMessage(message,

219 out,

220 indent=0,

221 as_utf8=_as_utf8_default,

222 as_one_line=False,

223 use_short_repeated_primitives=False,

224 pointy_brackets=False,

225 use_index_order=False,

226 float_format=None,

227 double_format=None,

228 use_field_number=False,

229 descriptor_pool=None,

230 message_formatter=None,

231 print_unknown_fields=False,

232 force_colon=False):

233 """Convert the message to text format and write it to the out stream.

234

235 Args:

236 message: The Message object to convert to text format.

237 out: A file handle to write the message to.

238 indent: The initial indent level for pretty print.

239 as_utf8: Return unescaped Unicode for non-ASCII characters.

240 as_one_line: Don't introduce newlines between fields.

241 use_short_repeated_primitives: Use short repeated format for primitives.

242 pointy_brackets: If True, use angle brackets instead of curly braces for

243 nesting.

244 use_index_order: If True, print fields of a proto message using the order

245 defined in source code instead of the field number. By default, use the

246 field number order.

247 float_format: If set, use this to specify float field formatting

248 (per the "Format Specification Mini-Language"); otherwise, shortest

249 float that has same value in wire will be printed. Also affect double

250 field if double_format is not set but float_format is set.

251 double_format: If set, use this to specify double field formatting

252 (per the "Format Specification Mini-Language"); if it is not set but

253 float_format is set, use float_format. Otherwise, str() is used.

254 use_field_number: If True, print field numbers instead of names.

255 descriptor_pool: A DescriptorPool used to resolve Any types.

256 message_formatter: A function(message, indent, as_one_line): unicode|None

257 to custom format selected sub-messages (usually based on message type).

258 Use to pretty print parts of the protobuf for easier diffing.

259 print_unknown_fields: If True, unknown fields will be printed.

260 force_colon: If set, a colon will be added after the field name even if

261 the field is a proto message.

262 """

263 printer = _Printer(

264 out=out, indent=indent, as_utf8=as_utf8,

265 as_one_line=as_one_line,

266 use_short_repeated_primitives=use_short_repeated_primitives,

267 pointy_brackets=pointy_brackets,

268 use_index_order=use_index_order,

269 float_format=float_format,

270 double_format=double_format,

271 use_field_number=use_field_number,

272 descriptor_pool=descriptor_pool,

273 message_formatter=message_formatter,

274 print_unknown_fields=print_unknown_fields,

275 force_colon=force_colon)

276 printer.PrintMessage(message)

277

278

279def PrintField(field,

280 value,

281 out,

282 indent=0,

283 as_utf8=_as_utf8_default,

284 as_one_line=False,

285 use_short_repeated_primitives=False,

286 pointy_brackets=False,

287 use_index_order=False,

288 float_format=None,

289 double_format=None,

290 message_formatter=None,

291 print_unknown_fields=False,

292 force_colon=False):

293 """Print a single field name/value pair."""

294 printer = _Printer(out, indent, as_utf8, as_one_line,

295 use_short_repeated_primitives, pointy_brackets,

296 use_index_order, float_format, double_format,

297 message_formatter=message_formatter,

298 print_unknown_fields=print_unknown_fields,

299 force_colon=force_colon)

300 printer.PrintField(field, value)

301

302

303def PrintFieldValue(field,

304 value,

305 out,

306 indent=0,

307 as_utf8=_as_utf8_default,

308 as_one_line=False,

309 use_short_repeated_primitives=False,

310 pointy_brackets=False,

311 use_index_order=False,

312 float_format=None,

313 double_format=None,

314 message_formatter=None,

315 print_unknown_fields=False,

316 force_colon=False):

317 """Print a single field value (not including name)."""

318 printer = _Printer(out, indent, as_utf8, as_one_line,

319 use_short_repeated_primitives, pointy_brackets,

320 use_index_order, float_format, double_format,

321 message_formatter=message_formatter,

322 print_unknown_fields=print_unknown_fields,

323 force_colon=force_colon)

324 printer.PrintFieldValue(field, value)

325

326

327def _BuildMessageFromTypeName(type_name, descriptor_pool):

328 """Returns a protobuf message instance.

329

330 Args:

331 type_name: Fully-qualified protobuf message type name string.

332 descriptor_pool: DescriptorPool instance.

333

334 Returns:

335 A Message instance of type matching type_name, or None if the a Descriptor

336 wasn't found matching type_name.

337 """

338 # pylint: disable=g-import-not-at-top

339 if descriptor_pool is None:

340 from google.protobuf import descriptor_pool as pool_mod

341 descriptor_pool = pool_mod.Default()

342 from google.protobuf import message_factory

343 try:

344 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)

345 except KeyError:

346 return None

347 message_type = message_factory.GetMessageClass(message_descriptor)

348 return message_type()

349

350

351# These values must match WireType enum in //google/protobuf/wire_format.h.

352WIRETYPE_LENGTH_DELIMITED = 2

353WIRETYPE_START_GROUP = 3

354

355

356class _Printer(object):

357 """Text format printer for protocol message."""

358

359 def __init__(

360 self,

361 out,

362 indent=0,

363 as_utf8=_as_utf8_default,

364 as_one_line=False,

365 use_short_repeated_primitives=False,

366 pointy_brackets=False,

367 use_index_order=False,

368 float_format=None,

369 double_format=None,

370 use_field_number=False,

371 descriptor_pool=None,

372 message_formatter=None,

373 print_unknown_fields=False,

374 force_colon=False):

375 """Initialize the Printer.

376

377 Double values can be formatted compactly with 15 digits of precision

378 (which is the most that IEEE 754 "double" can guarantee) using

379 double_format='.15g'. To ensure that converting to text and back to a proto

380 will result in an identical value, double_format='.17g' should be used.

381

382 Args:

383 out: To record the text format result.

384 indent: The initial indent level for pretty print.

385 as_utf8: Return unescaped Unicode for non-ASCII characters.

386 as_one_line: Don't introduce newlines between fields.

387 use_short_repeated_primitives: Use short repeated format for primitives.

388 pointy_brackets: If True, use angle brackets instead of curly braces for

389 nesting.

390 use_index_order: If True, print fields of a proto message using the order

391 defined in source code instead of the field number. By default, use the

392 field number order.

393 float_format: If set, use this to specify float field formatting

394 (per the "Format Specification Mini-Language"); otherwise, shortest

395 float that has same value in wire will be printed. Also affect double

396 field if double_format is not set but float_format is set.

397 double_format: If set, use this to specify double field formatting

398 (per the "Format Specification Mini-Language"); if it is not set but

399 float_format is set, use float_format. Otherwise, str() is used.

400 use_field_number: If True, print field numbers instead of names.

401 descriptor_pool: A DescriptorPool used to resolve Any types.

402 message_formatter: A function(message, indent, as_one_line): unicode|None

403 to custom format selected sub-messages (usually based on message type).

404 Use to pretty print parts of the protobuf for easier diffing.

405 print_unknown_fields: If True, unknown fields will be printed.

406 force_colon: If set, a colon will be added after the field name even if

407 the field is a proto message.

408 """

409 self.out = out

410 self.indent = indent

411 self.as_utf8 = as_utf8

412 self.as_one_line = as_one_line

413 self.use_short_repeated_primitives = use_short_repeated_primitives

414 self.pointy_brackets = pointy_brackets

415 self.use_index_order = use_index_order

416 self.float_format = float_format

417 if double_format is not None:

418 self.double_format = double_format

419 else:

420 self.double_format = float_format

421 self.use_field_number = use_field_number

422 self.descriptor_pool = descriptor_pool

423 self.message_formatter = message_formatter

424 self.print_unknown_fields = print_unknown_fields

425 self.force_colon = force_colon

426

427 def _TryPrintAsAnyMessage(self, message):

428 """Serializes if message is a google.protobuf.Any field."""

429 if '/' not in message.type_url:

430 return False

431 packed_message = _BuildMessageFromTypeName(message.TypeName(),

432 self.descriptor_pool)

433 if packed_message:

434 packed_message.MergeFromString(message.value)

435 colon = ':' if self.force_colon else ''

436 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon))

437 self._PrintMessageFieldValue(packed_message)

438 self.out.write(' ' if self.as_one_line else '\n')

439 return True

440 else:

441 return False

442

443 def _TryCustomFormatMessage(self, message):

444 formatted = self.message_formatter(message, self.indent, self.as_one_line)

445 if formatted is None:

446 return False

447

448 out = self.out

449 out.write(' ' * self.indent)

450 out.write(formatted)

451 out.write(' ' if self.as_one_line else '\n')

452 return True

453

454 def PrintMessage(self, message):

455 """Convert protobuf message to text format.

456

457 Args:

458 message: The protocol buffers message.

459 """

460 if self.message_formatter and self._TryCustomFormatMessage(message):

461 return

462 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and

463 self._TryPrintAsAnyMessage(message)):

464 return

465 fields = message.ListFields()

466 if self.use_index_order:

467 fields.sort(

468 key=lambda x: x[0].number if x[0].is_extension else x[0].index)

469 for field, value in fields:

470 if _IsMapEntry(field):

471 for key in sorted(value):

472 # This is slow for maps with submessage entries because it copies the

473 # entire tree. Unfortunately this would take significant refactoring

474 # of this file to work around.

475 #

476 # TODO: refactor and optimize if this becomes an issue.

477 entry_submsg = value.GetEntryClass()(key=key, value=value[key])

478 self.PrintField(field, entry_submsg)

479 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

480 if (self.use_short_repeated_primitives

481 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE

482 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING):

483 self._PrintShortRepeatedPrimitivesValue(field, value)

484 else:

485 for element in value:

486 self.PrintField(field, element)

487 else:

488 self.PrintField(field, value)

489

490 if self.print_unknown_fields:

491 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message))

492

493 def _PrintUnknownFields(self, unknown_field_set):

494 """Print unknown fields."""

495 out = self.out

496 for field in unknown_field_set:

497 out.write(' ' * self.indent)

498 out.write(str(field.field_number))

499 if field.wire_type == WIRETYPE_START_GROUP:

500 if self.as_one_line:

501 out.write(' { ')

502 else:

503 out.write(' {\n')

504 self.indent += 2

505

506 self._PrintUnknownFields(field.data)

507

508 if self.as_one_line:

509 out.write('} ')

510 else:

511 self.indent -= 2

512 out.write(' ' * self.indent + '}\n')

513 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED:

514 try:

515 # If this field is parseable as a Message, it is probably

516 # an embedded message.

517 # pylint: disable=protected-access

518 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet(

519 memoryview(field.data), 0, len(field.data))

520 except Exception: # pylint: disable=broad-except

521 pos = 0

522

523 if pos == len(field.data):

524 if self.as_one_line:

525 out.write(' { ')

526 else:

527 out.write(' {\n')

528 self.indent += 2

529

530 self._PrintUnknownFields(embedded_unknown_message)

531

532 if self.as_one_line:

533 out.write('} ')

534 else:

535 self.indent -= 2

536 out.write(' ' * self.indent + '}\n')

537 else:

538 # A string or bytes field. self.as_utf8 may not work.

539 out.write(': \"')

540 out.write(text_encoding.CEscape(field.data, False))

541 out.write('\" ' if self.as_one_line else '\"\n')

542 else:

543 # varint, fixed32, fixed64

544 out.write(': ')

545 out.write(str(field.data))

546 out.write(' ' if self.as_one_line else '\n')

547

548 def _PrintFieldName(self, field):

549 """Print field name."""

550 out = self.out

551 out.write(' ' * self.indent)

552 if self.use_field_number:

553 out.write(str(field.number))

554 else:

555 if field.is_extension:

556 out.write('[')

557 if (field.containing_type.GetOptions().message_set_wire_format and

558 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and

559 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):

560 out.write(field.message_type.full_name)

561 else:

562 out.write(field.full_name)

563 out.write(']')

564 elif _IsGroupLike(field):

565 # For groups, use the capitalized name.

566 out.write(field.message_type.name)

567 else:

568 out.write(field.name)

569

570 if (self.force_colon or

571 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):

572 # The colon is optional in this case, but our cross-language golden files

573 # don't include it. Here, the colon is only included if force_colon is

574 # set to True

575 out.write(':')

576

577 def PrintField(self, field, value):

578 """Print a single field name/value pair."""

579 self._PrintFieldName(field)

580 self.out.write(' ')

581 self.PrintFieldValue(field, value)

582 self.out.write(' ' if self.as_one_line else '\n')

583

584 def _PrintShortRepeatedPrimitivesValue(self, field, value):

585 """"Prints short repeated primitives value."""

586 # Note: this is called only when value has at least one element.

587 self._PrintFieldName(field)

588 self.out.write(' [')

589 for i in range(len(value) - 1):

590 self.PrintFieldValue(field, value[i])

591 self.out.write(', ')

592 self.PrintFieldValue(field, value[-1])

593 self.out.write(']')

594 self.out.write(' ' if self.as_one_line else '\n')

595

596 def _PrintMessageFieldValue(self, value):

597 if self.pointy_brackets:

598 openb = '<'

599 closeb = '>'

600 else:

601 openb = '{'

602 closeb = '}'

603

604 if self.as_one_line:

605 self.out.write('%s ' % openb)

606 self.PrintMessage(value)

607 self.out.write(closeb)

608 else:

609 self.out.write('%s\n' % openb)

610 self.indent += 2

611 self.PrintMessage(value)

612 self.indent -= 2

613 self.out.write(' ' * self.indent + closeb)

614

615 def PrintFieldValue(self, field, value):

616 """Print a single field value (not including name).

617

618 For repeated fields, the value should be a single element.

619

620 Args:

621 field: The descriptor of the field to be printed.

622 value: The value of the field.

623 """

624 out = self.out

625 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

626 self._PrintMessageFieldValue(value)

627 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:

628 enum_value = field.enum_type.values_by_number.get(value, None)

629 if enum_value is not None:

630 out.write(enum_value.name)

631 else:

632 out.write(str(value))

633 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:

634 out.write('\"')

635 if isinstance(value, str) and not self.as_utf8:

636 out_value = value.encode('utf-8')

637 else:

638 out_value = value

639 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:

640 # We always need to escape all binary data in TYPE_BYTES fields.

641 out_as_utf8 = False

642 else:

643 out_as_utf8 = self.as_utf8

644 out.write(text_encoding.CEscape(out_value, out_as_utf8))

645 out.write('\"')

646 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:

647 if value:

648 out.write('true')

649 else:

650 out.write('false')

651 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:

652 if self.float_format is not None:

653 out.write('{1:{0}}'.format(self.float_format, value))

654 else:

655 if math.isnan(value):

656 out.write(str(value))

657 else:

658 out.write(str(type_checkers.ToShortestFloat(value)))

659 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and

660 self.double_format is not None):

661 out.write('{1:{0}}'.format(self.double_format, value))

662 else:

663 out.write(str(value))

664

665

666def Parse(text,

667 message,

668 allow_unknown_extension=False,

669 allow_field_number=False,

670 descriptor_pool=None,

671 allow_unknown_field=False):

672 """Parses a text representation of a protocol message into a message.

673

674 NOTE: for historical reasons this function does not clear the input

675 message. This is different from what the binary msg.ParseFrom(...) does.

676 If text contains a field already set in message, the value is appended if the

677 field is repeated. Otherwise, an error is raised.

678

679 Example::

680

681 a = MyProto()

682 a.repeated_field.append('test')

683 b = MyProto()

684

685 # Repeated fields are combined

686 text_format.Parse(repr(a), b)

687 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]

688

689 # Non-repeated fields cannot be overwritten

690 a.singular_field = 1

691 b.singular_field = 2

692 text_format.Parse(repr(a), b) # ParseError

693

694 # Binary version:

695 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"

696

697 Caller is responsible for clearing the message as needed.

698

699 Args:

700 text (str): Message text representation.

701 message (Message): A protocol buffer message to merge into.

702 allow_unknown_extension: if True, skip over missing extensions and keep

703 parsing

704 allow_field_number: if True, both field number and field name are allowed.

705 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

706 allow_unknown_field: if True, skip over unknown field and keep

707 parsing. Avoid to use this option if possible. It may hide some

708 errors (e.g. spelling error on field name)

709

710 Returns:

711 Message: The same message passed as argument.

712

713 Raises:

714 ParseError: On text parsing problems.

715 """

716 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'),

717 message,

718 allow_unknown_extension,

719 allow_field_number,

720 descriptor_pool=descriptor_pool,

721 allow_unknown_field=allow_unknown_field)

722

723

724def Merge(text,

725 message,

726 allow_unknown_extension=False,

727 allow_field_number=False,

728 descriptor_pool=None,

729 allow_unknown_field=False):

730 """Parses a text representation of a protocol message into a message.

731

732 Like Parse(), but allows repeated values for a non-repeated field, and uses

733 the last one. This means any non-repeated, top-level fields specified in text

734 replace those in the message.

735

736 Args:

737 text (str): Message text representation.

738 message (Message): A protocol buffer message to merge into.

739 allow_unknown_extension: if True, skip over missing extensions and keep

740 parsing

741 allow_field_number: if True, both field number and field name are allowed.

742 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.

743 allow_unknown_field: if True, skip over unknown field and keep

744 parsing. Avoid to use this option if possible. It may hide some

745 errors (e.g. spelling error on field name)

746

747 Returns:

748 Message: The same message passed as argument.

749

750 Raises:

751 ParseError: On text parsing problems.

752 """

753 return MergeLines(

754 text.split(b'\n' if isinstance(text, bytes) else u'\n'),

755 message,

756 allow_unknown_extension,

757 allow_field_number,

758 descriptor_pool=descriptor_pool,

759 allow_unknown_field=allow_unknown_field)

760

761

762def ParseLines(lines,

763 message,

764 allow_unknown_extension=False,

765 allow_field_number=False,

766 descriptor_pool=None,

767 allow_unknown_field=False):

768 """Parses a text representation of a protocol message into a message.

769

770 See Parse() for caveats.

771

772 Args:

773 lines: An iterable of lines of a message's text representation.

774 message: A protocol buffer message to merge into.

775 allow_unknown_extension: if True, skip over missing extensions and keep

776 parsing

777 allow_field_number: if True, both field number and field name are allowed.

778 descriptor_pool: A DescriptorPool used to resolve Any types.

779 allow_unknown_field: if True, skip over unknown field and keep

780 parsing. Avoid to use this option if possible. It may hide some

781 errors (e.g. spelling error on field name)

782

783 Returns:

784 The same message passed as argument.

785

786 Raises:

787 ParseError: On text parsing problems.

788 """

789 parser = _Parser(allow_unknown_extension,

790 allow_field_number,

791 descriptor_pool=descriptor_pool,

792 allow_unknown_field=allow_unknown_field)

793 return parser.ParseLines(lines, message)

794

795

796def MergeLines(lines,

797 message,

798 allow_unknown_extension=False,

799 allow_field_number=False,

800 descriptor_pool=None,

801 allow_unknown_field=False):

802 """Parses a text representation of a protocol message into a message.

803

804 See Merge() for more details.

805

806 Args:

807 lines: An iterable of lines of a message's text representation.

808 message: A protocol buffer message to merge into.

809 allow_unknown_extension: if True, skip over missing extensions and keep

810 parsing

811 allow_field_number: if True, both field number and field name are allowed.

812 descriptor_pool: A DescriptorPool used to resolve Any types.

813 allow_unknown_field: if True, skip over unknown field and keep

814 parsing. Avoid to use this option if possible. It may hide some

815 errors (e.g. spelling error on field name)

816

817 Returns:

818 The same message passed as argument.

819

820 Raises:

821 ParseError: On text parsing problems.

822 """

823 parser = _Parser(allow_unknown_extension,

824 allow_field_number,

825 descriptor_pool=descriptor_pool,

826 allow_unknown_field=allow_unknown_field)

827 return parser.MergeLines(lines, message)

828

829

830class _Parser(object):

831 """Text format parser for protocol message."""

832

833 def __init__(self,

834 allow_unknown_extension=False,

835 allow_field_number=False,

836 descriptor_pool=None,

837 allow_unknown_field=False):

838 self.allow_unknown_extension = allow_unknown_extension

839 self.allow_field_number = allow_field_number

840 self.descriptor_pool = descriptor_pool

841 self.allow_unknown_field = allow_unknown_field

842

843 def ParseLines(self, lines, message):

844 """Parses a text representation of a protocol message into a message."""

845 self._allow_multiple_scalars = False

846 self._ParseOrMerge(lines, message)

847 return message

848

849 def MergeLines(self, lines, message):

850 """Merges a text representation of a protocol message into a message."""

851 self._allow_multiple_scalars = True

852 self._ParseOrMerge(lines, message)

853 return message

854

855 def _ParseOrMerge(self, lines, message):

856 """Converts a text representation of a protocol message into a message.

857

858 Args:

859 lines: Lines of a message's text representation.

860 message: A protocol buffer message to merge into.

861

862 Raises:

863 ParseError: On text parsing problems.

864 """

865 # Tokenize expects native str lines.

866 try:

867 str_lines = (

868 line if isinstance(line, str) else line.decode('utf-8')

869 for line in lines)

870 tokenizer = Tokenizer(str_lines)

871 except UnicodeDecodeError as e:

872 raise ParseError from e

873 if message:

874 self.root_type = message.DESCRIPTOR.full_name

875 while not tokenizer.AtEnd():

876 self._MergeField(tokenizer, message)

877

878 def _MergeField(self, tokenizer, message):

879 """Merges a single protocol message field into a message.

880

881 Args:

882 tokenizer: A tokenizer to parse the field name and values.

883 message: A protocol message to record the data.

884

885 Raises:

886 ParseError: In case of text parsing problems.

887 """

888 message_descriptor = message.DESCRIPTOR

889 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and

890 tokenizer.TryConsume('[')):

891 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer)

892 tokenizer.Consume(']')

893 tokenizer.TryConsume(':')

894 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

895 type_url_prefix + '/' + packed_type_name)

896 if tokenizer.TryConsume('<'):

897 expanded_any_end_token = '>'

898 else:

899 tokenizer.Consume('{')

900 expanded_any_end_token = '}'

901 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name,

902 self.descriptor_pool)

903 # Direct comparison with None is used instead of implicit bool conversion

904 # to avoid false positives with falsy initial values, e.g. for

905 # google.protobuf.ListValue.

906 if expanded_any_sub_message is None:

907 raise ParseError('Type %s not found in descriptor pool' %

908 packed_type_name)

909 while not tokenizer.TryConsume(expanded_any_end_token):

910 if tokenizer.AtEnd():

911 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' %

912 (expanded_any_end_token,))

913 self._MergeField(tokenizer, expanded_any_sub_message)

914 deterministic = False

915

916 message.Pack(expanded_any_sub_message,

917 type_url_prefix=type_url_prefix,

918 deterministic=deterministic)

919 return

920

921 if tokenizer.TryConsume('['):

922 name = [tokenizer.ConsumeIdentifier()]

923 while tokenizer.TryConsume('.'):

924 name.append(tokenizer.ConsumeIdentifier())

925 name = '.'.join(name)

926

927 if not message_descriptor.is_extendable:

928 raise tokenizer.ParseErrorPreviousToken(

929 'Message type "%s" does not have extensions.' %

930 message_descriptor.full_name)

931 # pylint: disable=protected-access

932 field = message.Extensions._FindExtensionByName(name)

933 # pylint: enable=protected-access

934 if not field:

935 if self.allow_unknown_extension:

936 field = None

937 else:

938 raise tokenizer.ParseErrorPreviousToken(

939 'Extension "%s" not registered. '

940 'Did you import the _pb2 module which defines it? '

941 'If you are trying to place the extension in the MessageSet '

942 'field of another message that is in an Any or MessageSet field, '

943 'that message\'s _pb2 module must be imported as well' % name)

944 elif message_descriptor != field.containing_type:

945 raise tokenizer.ParseErrorPreviousToken(

946 'Extension "%s" does not extend message type "%s".' %

947 (name, message_descriptor.full_name))

948

949 tokenizer.Consume(']')

950

951 else:

952 name = tokenizer.ConsumeIdentifierOrNumber()

953 if self.allow_field_number and name.isdigit():

954 number = ParseInteger(name, True, True)

955 field = message_descriptor.fields_by_number.get(number, None)

956 if not field and message_descriptor.is_extendable:

957 field = message.Extensions._FindExtensionByNumber(number)

958 else:

959 field = message_descriptor.fields_by_name.get(name, None)

960

961 # Group names are expected to be capitalized as they appear in the

962 # .proto file, which actually matches their type names, not their field

963 # names.

964 if not field:

965 field = message_descriptor.fields_by_name.get(name.lower(), None)

966 if field and not _IsGroupLike(field):

967 field = None

968 if field and field.message_type.name != name:

969 field = None

970

971 if not field and not self.allow_unknown_field:

972 raise tokenizer.ParseErrorPreviousToken(

973 'Message type "%s" has no field named "%s".' %

974 (message_descriptor.full_name, name))

975

976 if field:

977 if not self._allow_multiple_scalars and field.containing_oneof:

978 # Check if there's a different field set in this oneof.

979 # Note that we ignore the case if the same field was set before, and we

980 # apply _allow_multiple_scalars to non-scalar fields as well.

981 which_oneof = message.WhichOneof(field.containing_oneof.name)

982 if which_oneof is not None and which_oneof != field.name:

983 raise tokenizer.ParseErrorPreviousToken(

984 'Field "%s" is specified along with field "%s", another member '

985 'of oneof "%s" for message type "%s".' %

986 (field.name, which_oneof, field.containing_oneof.name,

987 message_descriptor.full_name))

988

989 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

990 tokenizer.TryConsume(':')

991 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

992 field.full_name)

993 merger = self._MergeMessageField

994 else:

995 tokenizer.Consume(':')

996 self._DetectSilentMarker(tokenizer, message_descriptor.full_name,

997 field.full_name)

998 merger = self._MergeScalarField

999

1000 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and

1001 tokenizer.TryConsume('[')):

1002 # Short repeated format, e.g. "foo: [1, 2, 3]"

1003 if not tokenizer.TryConsume(']'):

1004 while True:

1005 merger(tokenizer, message, field)

1006 if tokenizer.TryConsume(']'):

1007 break

1008 tokenizer.Consume(',')

1009

1010 else:

1011 merger(tokenizer, message, field)

1012

1013 else: # Proto field is unknown.

1014 assert (self.allow_unknown_extension or self.allow_unknown_field)

1015 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name)

1016

1017 # For historical reasons, fields may optionally be separated by commas or

1018 # semicolons.

1019 if not tokenizer.TryConsume(','):

1020 tokenizer.TryConsume(';')

1021

1022 def _LogSilentMarker(self, immediate_message_type, field_name):

1023 pass

1024

1025 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name):

1026 if tokenizer.contains_silent_marker_before_current_token:

1027 self._LogSilentMarker(immediate_message_type, field_name)

1028

1029 def _ConsumeAnyTypeUrl(self, tokenizer):

1030 """Consumes a google.protobuf.Any type URL and returns the type name."""

1031 # Consume "type.googleapis.com/".

1032 prefix = [tokenizer.ConsumeIdentifier()]

1033 tokenizer.Consume('.')

1034 prefix.append(tokenizer.ConsumeIdentifier())

1035 tokenizer.Consume('.')

1036 prefix.append(tokenizer.ConsumeIdentifier())

1037 tokenizer.Consume('/')

1038 # Consume the fully-qualified type name.

1039 name = [tokenizer.ConsumeIdentifier()]

1040 while tokenizer.TryConsume('.'):

1041 name.append(tokenizer.ConsumeIdentifier())

1042 return '.'.join(prefix), '.'.join(name)

1043

1044 def _MergeMessageField(self, tokenizer, message, field):

1045 """Merges a single scalar field into a message.

1046

1047 Args:

1048 tokenizer: A tokenizer to parse the field value.

1049 message: The message of which field is a member.

1050 field: The descriptor of the field to be merged.

1051

1052 Raises:

1053 ParseError: In case of text parsing problems.

1054 """

1055 is_map_entry = _IsMapEntry(field)

1056

1057 if tokenizer.TryConsume('<'):

1058 end_token = '>'

1059 else:

1060 tokenizer.Consume('{')

1061 end_token = '}'

1062

1063 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

1064 if field.is_extension:

1065 sub_message = message.Extensions[field].add()

1066 elif is_map_entry:

1067 sub_message = getattr(message, field.name).GetEntryClass()()

1068 else:

1069 sub_message = getattr(message, field.name).add()

1070 else:

1071 if field.is_extension:

1072 if (not self._allow_multiple_scalars and

1073 message.HasExtension(field)):

1074 raise tokenizer.ParseErrorPreviousToken(

1075 'Message type "%s" should not have multiple "%s" extensions.' %

1076 (message.DESCRIPTOR.full_name, field.full_name))

1077 sub_message = message.Extensions[field]

1078 else:

1079 # Also apply _allow_multiple_scalars to message field.

1080 # TODO: Change to _allow_singular_overwrites.

1081 if (not self._allow_multiple_scalars and

1082 message.HasField(field.name)):

1083 raise tokenizer.ParseErrorPreviousToken(

1084 'Message type "%s" should not have multiple "%s" fields.' %

1085 (message.DESCRIPTOR.full_name, field.name))

1086 sub_message = getattr(message, field.name)

1087 sub_message.SetInParent()

1088

1089 while not tokenizer.TryConsume(end_token):

1090 if tokenizer.AtEnd():

1091 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,))

1092 self._MergeField(tokenizer, sub_message)

1093

1094 if is_map_entry:

1095 value_cpptype = field.message_type.fields_by_name['value'].cpp_type

1096 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:

1097 value = getattr(message, field.name)[sub_message.key]

1098 value.CopyFrom(sub_message.value)

1099 else:

1100 getattr(message, field.name)[sub_message.key] = sub_message.value

1101

1102 def _MergeScalarField(self, tokenizer, message, field):

1103 """Merges a single scalar field into a message.

1104

1105 Args:

1106 tokenizer: A tokenizer to parse the field value.

1107 message: A protocol message to record the data.

1108 field: The descriptor of the field to be merged.

1109

1110 Raises:

1111 ParseError: In case of text parsing problems.

1112 RuntimeError: On runtime errors.

1113 """

1114 _ = self.allow_unknown_extension

1115 value = None

1116

1117 if field.type in (descriptor.FieldDescriptor.TYPE_INT32,

1118 descriptor.FieldDescriptor.TYPE_SINT32,

1119 descriptor.FieldDescriptor.TYPE_SFIXED32):

1120 value = _ConsumeInt32(tokenizer)

1121 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,

1122 descriptor.FieldDescriptor.TYPE_SINT64,

1123 descriptor.FieldDescriptor.TYPE_SFIXED64):

1124 value = _ConsumeInt64(tokenizer)

1125 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,

1126 descriptor.FieldDescriptor.TYPE_FIXED32):

1127 value = _ConsumeUint32(tokenizer)

1128 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,

1129 descriptor.FieldDescriptor.TYPE_FIXED64):

1130 value = _ConsumeUint64(tokenizer)

1131 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,

1132 descriptor.FieldDescriptor.TYPE_DOUBLE):

1133 value = tokenizer.ConsumeFloat()

1134 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:

1135 value = tokenizer.ConsumeBool()

1136 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:

1137 value = tokenizer.ConsumeString()

1138 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:

1139 value = tokenizer.ConsumeByteString()

1140 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:

1141 value = tokenizer.ConsumeEnum(field)

1142 else:

1143 raise RuntimeError('Unknown field type %d' % field.type)

1144

1145 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:

1146 if field.is_extension:

1147 message.Extensions[field].append(value)

1148 else:

1149 getattr(message, field.name).append(value)

1150 else:

1151 if field.is_extension:

1152 if (not self._allow_multiple_scalars and

1153 field.has_presence and

1154 message.HasExtension(field)):

1155 raise tokenizer.ParseErrorPreviousToken(

1156 'Message type "%s" should not have multiple "%s" extensions.' %

1157 (message.DESCRIPTOR.full_name, field.full_name))

1158 else:

1159 message.Extensions[field] = value

1160 else:

1161 duplicate_error = False

1162 if not self._allow_multiple_scalars:

1163 if field.has_presence:

1164 duplicate_error = message.HasField(field.name)

1165 else:

1166 # For field that doesn't represent presence, try best effort to

1167 # check multiple scalars by compare to default values.

1168 duplicate_error = bool(getattr(message, field.name))

1169

1170 if duplicate_error:

1171 raise tokenizer.ParseErrorPreviousToken(

1172 'Message type "%s" should not have multiple "%s" fields.' %

1173 (message.DESCRIPTOR.full_name, field.name))

1174 else:

1175 setattr(message, field.name, value)

1176

1177 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type):

1178 """Skips over contents (value or message) of a field.

1179

1180 Args:

1181 tokenizer: A tokenizer to parse the field name and values.

1182 field_name: The field name currently being parsed.

1183 immediate_message_type: The type of the message immediately containing

1184 the silent marker.

1185 """

1186 # Try to guess the type of this field.

1187 # If this field is not a message, there should be a ":" between the

1188 # field name and the field value and also the field value should not

1189 # start with "{" or "<" which indicates the beginning of a message body.

1190 # If there is no ":" or there is a "{" or "<" after ":", this field has

1191 # to be a message or the input is ill-formed.

1192 if tokenizer.TryConsume(

1193 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'):

1194 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name)

1195 if tokenizer.LookingAt('['):

1196 self._SkipRepeatedFieldValue(tokenizer)

1197 else:

1198 self._SkipFieldValue(tokenizer)

1199 else:

1200 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name)

1201 self._SkipFieldMessage(tokenizer, immediate_message_type)

1202

1203 def _SkipField(self, tokenizer, immediate_message_type):

1204 """Skips over a complete field (name and value/message).

1205

1206 Args:

1207 tokenizer: A tokenizer to parse the field name and values.

1208 immediate_message_type: The type of the message immediately containing

1209 the silent marker.

1210 """

1211 field_name = ''

1212 if tokenizer.TryConsume('['):

1213 # Consume extension or google.protobuf.Any type URL

1214 field_name += '[' + tokenizer.ConsumeIdentifier()

1215 num_identifiers = 1

1216 while tokenizer.TryConsume('.'):

1217 field_name += '.' + tokenizer.ConsumeIdentifier()

1218 num_identifiers += 1

1219 # This is possibly a type URL for an Any message.

1220 if num_identifiers == 3 and tokenizer.TryConsume('/'):

1221 field_name += '/' + tokenizer.ConsumeIdentifier()

1222 while tokenizer.TryConsume('.'):

1223 field_name += '.' + tokenizer.ConsumeIdentifier()

1224 tokenizer.Consume(']')

1225 field_name += ']'

1226 else:

1227 field_name += tokenizer.ConsumeIdentifierOrNumber()

1228

1229 self._SkipFieldContents(tokenizer, field_name, immediate_message_type)

1230

1231 # For historical reasons, fields may optionally be separated by commas or

1232 # semicolons.

1233 if not tokenizer.TryConsume(','):

1234 tokenizer.TryConsume(';')

1235

1236 def _SkipFieldMessage(self, tokenizer, immediate_message_type):

1237 """Skips over a field message.

1238

1239 Args:

1240 tokenizer: A tokenizer to parse the field name and values.

1241 immediate_message_type: The type of the message immediately containing

1242 the silent marker

1243 """

1244 if tokenizer.TryConsume('<'):

1245 delimiter = '>'

1246 else:

1247 tokenizer.Consume('{')

1248 delimiter = '}'

1249

1250 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'):

1251 self._SkipField(tokenizer, immediate_message_type)

1252

1253 tokenizer.Consume(delimiter)

1254

1255 def _SkipFieldValue(self, tokenizer):

1256 """Skips over a field value.

1257

1258 Args:

1259 tokenizer: A tokenizer to parse the field name and values.

1260

1261 Raises:

1262 ParseError: In case an invalid field value is found.

1263 """

1264 if (not tokenizer.TryConsumeByteString()and

1265 not tokenizer.TryConsumeIdentifier() and

1266 not _TryConsumeInt64(tokenizer) and

1267 not _TryConsumeUint64(tokenizer) and

1268 not tokenizer.TryConsumeFloat()):

1269 raise ParseError('Invalid field value: ' + tokenizer.token)

1270

1271 def _SkipRepeatedFieldValue(self, tokenizer):

1272 """Skips over a repeated field value.

1273

1274 Args:

1275 tokenizer: A tokenizer to parse the field value.

1276 """

1277 tokenizer.Consume('[')

1278 if not tokenizer.LookingAt(']'):

1279 self._SkipFieldValue(tokenizer)

1280 while tokenizer.TryConsume(','):

1281 self._SkipFieldValue(tokenizer)

1282 tokenizer.Consume(']')

1283

1284

1285class Tokenizer(object):

1286 """Protocol buffer text representation tokenizer.

1287

1288 This class handles the lower level string parsing by splitting it into

1289 meaningful tokens.

1290

1291 It was directly ported from the Java protocol buffer API.

1292 """

1293

1294 _WHITESPACE = re.compile(r'\s+')

1295 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE)

1296 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE)

1297 _TOKEN = re.compile('|'.join([

1298 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier

1299 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number

1300 ] + [ # quoted str for each quote mark

1301 # Avoid backtracking! https://stackoverflow.com/a/844267

1302 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark)

1303 for mark in _QUOTES

1304 ]))

1305

1306 _IDENTIFIER = re.compile(r'[^\d\W]\w*')

1307 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+')

1308

1309 def __init__(self, lines, skip_comments=True):

1310 self._position = 0

1311 self._line = -1

1312 self._column = 0

1313 self._token_start = None

1314 self.token = ''

1315 self._lines = iter(lines)

1316 self._current_line = ''

1317 self._previous_line = 0

1318 self._previous_column = 0

1319 self._more_lines = True

1320 self._skip_comments = skip_comments

1321 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT

1322 or self._WHITESPACE)

1323 self.contains_silent_marker_before_current_token = False

1324

1325 self._SkipWhitespace()

1326 self.NextToken()

1327

1328 def LookingAt(self, token):

1329 return self.token == token

1330

1331 def AtEnd(self):

1332 """Checks the end of the text was reached.

1333

1334 Returns:

1335 True iff the end was reached.

1336 """

1337 return not self.token

1338

1339 def _PopLine(self):

1340 while len(self._current_line) <= self._column:

1341 try:

1342 self._current_line = next(self._lines)

1343 except StopIteration:

1344 self._current_line = ''

1345 self._more_lines = False

1346 return

1347 else:

1348 self._line += 1

1349 self._column = 0

1350

1351 def _SkipWhitespace(self):

1352 while True:

1353 self._PopLine()

1354 match = self._whitespace_pattern.match(self._current_line, self._column)

1355 if not match:

1356 break

1357 self.contains_silent_marker_before_current_token = match.group(0) == (

1358 ' ' + _DEBUG_STRING_SILENT_MARKER)

1359 length = len(match.group(0))

1360 self._column += length

1361

1362 def TryConsume(self, token):

1363 """Tries to consume a given piece of text.

1364

1365 Args:

1366 token: Text to consume.

1367

1368 Returns:

1369 True iff the text was consumed.

1370 """

1371 if self.token == token:

1372 self.NextToken()

1373 return True

1374 return False

1375

1376 def Consume(self, token):

1377 """Consumes a piece of text.

1378

1379 Args:

1380 token: Text to consume.

1381

1382 Raises:

1383 ParseError: If the text couldn't be consumed.

1384 """

1385 if not self.TryConsume(token):

1386 raise self.ParseError('Expected "%s".' % token)

1387

1388 def ConsumeComment(self):

1389 result = self.token

1390 if not self._COMMENT.match(result):

1391 raise self.ParseError('Expected comment.')

1392 self.NextToken()

1393 return result

1394

1395 def ConsumeCommentOrTrailingComment(self):

1396 """Consumes a comment, returns a 2-tuple (trailing bool, comment str)."""

1397

1398 # Tokenizer initializes _previous_line and _previous_column to 0. As the

1399 # tokenizer starts, it looks like there is a previous token on the line.

1400 just_started = self._line == 0 and self._column == 0

1401

1402 before_parsing = self._previous_line

1403 comment = self.ConsumeComment()

1404

1405 # A trailing comment is a comment on the same line than the previous token.

1406 trailing = (self._previous_line == before_parsing

1407 and not just_started)

1408

1409 return trailing, comment

1410

1411 def TryConsumeIdentifier(self):

1412 try:

1413 self.ConsumeIdentifier()

1414 return True

1415 except ParseError:

1416 return False

1417

1418 def ConsumeIdentifier(self):

1419 """Consumes protocol message field identifier.

1420

1421 Returns:

1422 Identifier string.

1423

1424 Raises:

1425 ParseError: If an identifier couldn't be consumed.

1426 """

1427 result = self.token

1428 if not self._IDENTIFIER.match(result):

1429 raise self.ParseError('Expected identifier.')

1430 self.NextToken()

1431 return result

1432

1433 def TryConsumeIdentifierOrNumber(self):

1434 try:

1435 self.ConsumeIdentifierOrNumber()

1436 return True

1437 except ParseError:

1438 return False

1439

1440 def ConsumeIdentifierOrNumber(self):

1441 """Consumes protocol message field identifier.

1442

1443 Returns:

1444 Identifier string.

1445

1446 Raises:

1447 ParseError: If an identifier couldn't be consumed.

1448 """

1449 result = self.token

1450 if not self._IDENTIFIER_OR_NUMBER.match(result):

1451 raise self.ParseError('Expected identifier or number, got %s.' % result)

1452 self.NextToken()

1453 return result

1454

1455 def TryConsumeInteger(self):

1456 try:

1457 self.ConsumeInteger()

1458 return True

1459 except ParseError:

1460 return False

1461

1462 def ConsumeInteger(self):

1463 """Consumes an integer number.

1464

1465 Returns:

1466 The integer parsed.

1467

1468 Raises:

1469 ParseError: If an integer couldn't be consumed.

1470 """

1471 try:

1472 result = _ParseAbstractInteger(self.token)

1473 except ValueError as e:

1474 raise self.ParseError(str(e))

1475 self.NextToken()

1476 return result

1477

1478 def TryConsumeFloat(self):

1479 try:

1480 self.ConsumeFloat()

1481 return True

1482 except ParseError:

1483 return False

1484

1485 def ConsumeFloat(self):

1486 """Consumes an floating point number.

1487

1488 Returns:

1489 The number parsed.

1490

1491 Raises:

1492 ParseError: If a floating point number couldn't be consumed.

1493 """

1494 try:

1495 result = ParseFloat(self.token)

1496 except ValueError as e:

1497 raise self.ParseError(str(e))

1498 self.NextToken()

1499 return result

1500

1501 def ConsumeBool(self):

1502 """Consumes a boolean value.

1503

1504 Returns:

1505 The bool parsed.

1506

1507 Raises:

1508 ParseError: If a boolean value couldn't be consumed.

1509 """

1510 try:

1511 result = ParseBool(self.token)

1512 except ValueError as e:

1513 raise self.ParseError(str(e))

1514 self.NextToken()

1515 return result

1516

1517 def TryConsumeByteString(self):

1518 try:

1519 self.ConsumeByteString()

1520 return True

1521 except ParseError:

1522 return False

1523

1524 def ConsumeString(self):

1525 """Consumes a string value.

1526

1527 Returns:

1528 The string parsed.

1529

1530 Raises:

1531 ParseError: If a string value couldn't be consumed.

1532 """

1533 the_bytes = self.ConsumeByteString()

1534 try:

1535 return str(the_bytes, 'utf-8')

1536 except UnicodeDecodeError as e:

1537 raise self._StringParseError(e)

1538

1539 def ConsumeByteString(self):

1540 """Consumes a byte array value.

1541

1542 Returns:

1543 The array parsed (as a string).

1544

1545 Raises:

1546 ParseError: If a byte array value couldn't be consumed.

1547 """

1548 the_list = [self._ConsumeSingleByteString()]

1549 while self.token and self.token[0] in _QUOTES:

1550 the_list.append(self._ConsumeSingleByteString())

1551 return b''.join(the_list)

1552

1553 def _ConsumeSingleByteString(self):

1554 """Consume one token of a string literal.

1555

1556 String literals (whether bytes or text) can come in multiple adjacent

1557 tokens which are automatically concatenated, like in C or Python. This

1558 method only consumes one token.

1559

1560 Returns:

1561 The token parsed.

1562 Raises:

1563 ParseError: When the wrong format data is found.

1564 """

1565 text = self.token

1566 if len(text) < 1 or text[0] not in _QUOTES:

1567 raise self.ParseError('Expected string but found: %r' % (text,))

1568

1569 if len(text) < 2 or text[-1] != text[0]:

1570 raise self.ParseError('String missing ending quote: %r' % (text,))

1571

1572 try:

1573 result = text_encoding.CUnescape(text[1:-1])

1574 except ValueError as e:

1575 raise self.ParseError(str(e))

1576 self.NextToken()

1577 return result

1578

1579 def ConsumeEnum(self, field):

1580 try:

1581 result = ParseEnum(field, self.token)

1582 except ValueError as e:

1583 raise self.ParseError(str(e))

1584 self.NextToken()

1585 return result

1586

1587 def ParseErrorPreviousToken(self, message):

1588 """Creates and *returns* a ParseError for the previously read token.

1589

1590 Args:

1591 message: A message to set for the exception.

1592

1593 Returns:

1594 A ParseError instance.

1595 """

1596 return ParseError(message, self._previous_line + 1,

1597 self._previous_column + 1)

1598

1599 def ParseError(self, message):

1600 """Creates and *returns* a ParseError for the current token."""

1601 return ParseError('\'' + self._current_line + '\': ' + message,

1602 self._line + 1, self._column + 1)

1603

1604 def _StringParseError(self, e):

1605 return self.ParseError('Couldn\'t parse string: ' + str(e))

1606

1607 def NextToken(self):

1608 """Reads the next meaningful token."""

1609 self._previous_line = self._line

1610 self._previous_column = self._column

1611 self.contains_silent_marker_before_current_token = False

1612

1613 self._column += len(self.token)

1614 self._SkipWhitespace()

1615

1616 if not self._more_lines:

1617 self.token = ''

1618 return

1619

1620 match = self._TOKEN.match(self._current_line, self._column)

1621 if not match and not self._skip_comments:

1622 match = self._COMMENT.match(self._current_line, self._column)

1623 if match:

1624 token = match.group(0)

1625 self.token = token

1626 else:

1627 self.token = self._current_line[self._column]

1628

1629# Aliased so it can still be accessed by current visibility violators.

1630# TODO: Migrate violators to textformat_tokenizer.

1631_Tokenizer = Tokenizer # pylint: disable=invalid-name

1632

1633

1634def _ConsumeInt32(tokenizer):

1635 """Consumes a signed 32bit integer number from tokenizer.

1636

1637 Args:

1638 tokenizer: A tokenizer used to parse the number.

1639

1640 Returns:

1641 The integer parsed.

1642

1643 Raises:

1644 ParseError: If a signed 32bit integer couldn't be consumed.

1645 """

1646 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False)

1647

1648

1649def _ConsumeUint32(tokenizer):

1650 """Consumes an unsigned 32bit integer number from tokenizer.

1651

1652 Args:

1653 tokenizer: A tokenizer used to parse the number.

1654

1655 Returns:

1656 The integer parsed.

1657

1658 Raises:

1659 ParseError: If an unsigned 32bit integer couldn't be consumed.

1660 """

1661 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False)

1662

1663

1664def _TryConsumeInt64(tokenizer):

1665 try:

1666 _ConsumeInt64(tokenizer)

1667 return True

1668 except ParseError:

1669 return False

1670

1671

1672def _ConsumeInt64(tokenizer):

1673 """Consumes a signed 32bit integer number from tokenizer.

1674

1675 Args:

1676 tokenizer: A tokenizer used to parse the number.

1677

1678 Returns:

1679 The integer parsed.

1680

1681 Raises:

1682 ParseError: If a signed 32bit integer couldn't be consumed.

1683 """

1684 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True)

1685

1686

1687def _TryConsumeUint64(tokenizer):

1688 try:

1689 _ConsumeUint64(tokenizer)

1690 return True

1691 except ParseError:

1692 return False

1693

1694

1695def _ConsumeUint64(tokenizer):

1696 """Consumes an unsigned 64bit integer number from tokenizer.

1697

1698 Args:

1699 tokenizer: A tokenizer used to parse the number.

1700

1701 Returns:

1702 The integer parsed.

1703

1704 Raises:

1705 ParseError: If an unsigned 64bit integer couldn't be consumed.

1706 """

1707 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True)

1708

1709

1710def _ConsumeInteger(tokenizer, is_signed=False, is_long=False):

1711 """Consumes an integer number from tokenizer.

1712

1713 Args:

1714 tokenizer: A tokenizer used to parse the number.

1715 is_signed: True if a signed integer must be parsed.

1716 is_long: True if a long integer must be parsed.

1717

1718 Returns:

1719 The integer parsed.

1720

1721 Raises:

1722 ParseError: If an integer with given characteristics couldn't be consumed.

1723 """

1724 try:

1725 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)

1726 except ValueError as e:

1727 raise tokenizer.ParseError(str(e))

1728 tokenizer.NextToken()

1729 return result

1730

1731

1732def ParseInteger(text, is_signed=False, is_long=False):

1733 """Parses an integer.

1734

1735 Args:

1736 text: The text to parse.

1737 is_signed: True if a signed integer must be parsed.

1738 is_long: True if a long integer must be parsed.

1739

1740 Returns:

1741 The integer value.

1742

1743 Raises:

1744 ValueError: Thrown Iff the text is not a valid integer.

1745 """

1746 # Do the actual parsing. Exception handling is propagated to caller.

1747 result = _ParseAbstractInteger(text)

1748

1749 # Check if the integer is sane. Exceptions handled by callers.

1750 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]

1751 checker.CheckValue(result)

1752 return result

1753

1754

1755def _ParseAbstractInteger(text):

1756 """Parses an integer without checking size/signedness.

1757

1758 Args:

1759 text: The text to parse.

1760

1761 Returns:

1762 The integer value.

1763

1764 Raises:

1765 ValueError: Thrown Iff the text is not a valid integer.

1766 """

1767 # Do the actual parsing. Exception handling is propagated to caller.

1768 orig_text = text

1769 c_octal_match = re.match(r'(-?)0(\d+)$', text)

1770 if c_octal_match:

1771 # Python 3 no longer supports 0755 octal syntax without the 'o', so

1772 # we always use the '0o' prefix for multi-digit numbers starting with 0.

1773 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2)

1774 try:

1775 return int(text, 0)

1776 except ValueError:

1777 raise ValueError('Couldn\'t parse integer: %s' % orig_text)

1778

1779

1780def ParseFloat(text):

1781 """Parse a floating point number.

1782

1783 Args:

1784 text: Text to parse.

1785

1786 Returns:

1787 The number parsed.

1788

1789 Raises:

1790 ValueError: If a floating point number couldn't be parsed.

1791 """

1792 try:

1793 # Assume Python compatible syntax.

1794 return float(text)

1795 except ValueError:

1796 # Check alternative spellings.

1797 if _FLOAT_INFINITY.match(text):

1798 if text[0] == '-':

1799 return float('-inf')

1800 else:

1801 return float('inf')

1802 elif _FLOAT_NAN.match(text):

1803 return float('nan')

1804 else:

1805 # assume '1.0f' format

1806 try:

1807 return float(text.rstrip('f'))

1808 except ValueError:

1809 raise ValueError('Couldn\'t parse float: %s' % text)

1810

1811

1812def ParseBool(text):

1813 """Parse a boolean value.

1814

1815 Args:

1816 text: Text to parse.

1817

1818 Returns:

1819 Boolean values parsed

1820

1821 Raises:

1822 ValueError: If text is not a valid boolean.

1823 """

1824 if text in ('true', 't', '1', 'True'):

1825 return True

1826 elif text in ('false', 'f', '0', 'False'):

1827 return False

1828 else:

1829 raise ValueError('Expected "true" or "false".')

1830

1831

1832def ParseEnum(field, value):

1833 """Parse an enum value.

1834

1835 The value can be specified by a number (the enum value), or by

1836 a string literal (the enum name).

1837

1838 Args:

1839 field: Enum field descriptor.

1840 value: String value.

1841

1842 Returns:

1843 Enum value number.

1844

1845 Raises:

1846 ValueError: If the enum value could not be parsed.

1847 """

1848 enum_descriptor = field.enum_type

1849 try:

1850 number = int(value, 0)

1851 except ValueError:

1852 # Identifier.

1853 enum_value = enum_descriptor.values_by_name.get(value, None)

1854 if enum_value is None:

1855 raise ValueError('Enum type "%s" has no value named %s.' %

1856 (enum_descriptor.full_name, value))

1857 else:

1858 if not field.enum_type.is_closed:

1859 return number

1860 enum_value = enum_descriptor.values_by_number.get(number, None)

1861 if enum_value is None:

1862 raise ValueError('Enum type "%s" has no value with number %d.' %

1863 (enum_descriptor.full_name, number))

1864 return enum_value.number

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text_format.py: 16%

758 statements