Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/protobuf/text_format.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

800 statements  

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# 

4# Use of this source code is governed by a BSD-style 

5# license that can be found in the LICENSE file or at 

6# https://developers.google.com/open-source/licenses/bsd 

7 

8"""Contains routines for printing protocol messages in text format. 

9 

10Simple usage example:: 

11 

12 # Create a proto object and serialize it to a text proto string. 

13 message = my_proto_pb2.MyMessage(foo='bar') 

14 text_proto = text_format.MessageToString(message) 

15 

16 # Parse a text proto string. 

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

18""" 

19 

20__author__ = 'kenton@google.com (Kenton Varda)' 

21 

22# TODO Import thread contention leads to test failures. 

23import encodings.raw_unicode_escape # pylint: disable=unused-import 

24import encodings.unicode_escape # pylint: disable=unused-import 

25import io 

26import math 

27import re 

28import warnings 

29 

30from google.protobuf.internal import decoder 

31from google.protobuf.internal import type_checkers 

32from google.protobuf import descriptor 

33from google.protobuf import text_encoding 

34from google.protobuf import unknown_fields 

35 

36# pylint: disable=g-import-not-at-top 

37__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

38 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

39 

40_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

41 type_checkers.Int32ValueChecker(), 

42 type_checkers.Uint64ValueChecker(), 

43 type_checkers.Int64ValueChecker()) 

44_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

45_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

46_FLOAT_OCTAL_PREFIX = re.compile('-?0[0-9]+') 

47_PERCENT_ENCODING = re.compile(r'^%[\da-fA-F][\da-fA-F]$') 

48_TYPE_NAME = re.compile(r'^[^\d\W]\w*(\.[^\d\W]\w*)*$') 

49_QUOTES = frozenset(("'", '"')) 

50_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

51_DEBUG_STRING_SILENT_MARKER = '\t ' 

52 

53_as_utf8_default = True 

54 

55 

56class Error(Exception): 

57 """Top-level module error for text_format.""" 

58 

59 

60class ParseError(Error): 

61 """Thrown in case of text parsing or tokenizing error.""" 

62 

63 def __init__(self, message=None, line=None, column=None): 

64 if message is not None and line is not None: 

65 loc = str(line) 

66 if column is not None: 

67 loc += ':{0}'.format(column) 

68 message = '{0} : {1}'.format(loc, message) 

69 if message is not None: 

70 super(ParseError, self).__init__(message) 

71 else: 

72 super(ParseError, self).__init__() 

73 self._line = line 

74 self._column = column 

75 

76 def GetLine(self): 

77 return self._line 

78 

79 def GetColumn(self): 

80 return self._column 

81 

82 

83class TextWriter(object): 

84 

85 def __init__(self, as_utf8): 

86 self._writer = io.StringIO() 

87 

88 def write(self, val): 

89 return self._writer.write(val) 

90 

91 def close(self): 

92 return self._writer.close() 

93 

94 def getvalue(self): 

95 return self._writer.getvalue() 

96 

97 

98def MessageToString( 

99 message, 

100 as_utf8=_as_utf8_default, 

101 as_one_line=False, 

102 use_short_repeated_primitives=False, 

103 pointy_brackets=False, 

104 use_index_order=False, 

105 use_field_number=False, 

106 descriptor_pool=None, 

107 indent=0, 

108 message_formatter=None, 

109 print_unknown_fields=False, 

110 force_colon=False) -> str: 

111 """Convert protobuf message to text format. 

112 

113 Args: 

114 message: The protocol buffers message. 

115 as_utf8: Return unescaped Unicode for non-ASCII characters. 

116 as_one_line: Don't introduce newlines between fields. 

117 use_short_repeated_primitives: Use short repeated format for primitives. 

118 pointy_brackets: If True, use angle brackets instead of curly braces for 

119 nesting. 

120 use_index_order: If True, fields of a proto message will be printed using 

121 the order defined in source code instead of the field number, extensions 

122 will be printed at the end of the message and their relative order is 

123 determined by the extension number. By default, use the field number 

124 order. 

125 use_field_number: If True, print field numbers instead of names. 

126 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

127 indent (int): The initial indent level, in terms of spaces, for pretty 

128 print. 

129 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

130 Custom formatter for selected sub-messages (usually based on message 

131 type). Use to pretty print parts of the protobuf for easier diffing. 

132 print_unknown_fields: If True, unknown fields will be printed. 

133 force_colon: If set, a colon will be added after the field name even if the 

134 field is a proto message. 

135 

136 Returns: 

137 str: A string of the text formatted protocol buffer message. 

138 """ 

139 out = TextWriter(as_utf8) 

140 printer = _Printer( 

141 out=out, 

142 indent=indent, 

143 as_utf8=as_utf8, 

144 as_one_line=as_one_line, 

145 use_short_repeated_primitives=use_short_repeated_primitives, 

146 pointy_brackets=pointy_brackets, 

147 use_index_order=use_index_order, 

148 use_field_number=use_field_number, 

149 descriptor_pool=descriptor_pool, 

150 message_formatter=message_formatter, 

151 print_unknown_fields=print_unknown_fields, 

152 force_colon=force_colon, 

153 ) 

154 printer.PrintMessage(message) 

155 result = out.getvalue() 

156 out.close() 

157 if as_one_line: 

158 return result.rstrip() 

159 return result 

160 

161 

162def MessageToBytes(message, **kwargs) -> bytes: 

163 """Convert protobuf message to encoded text format. See MessageToString.""" 

164 text = MessageToString(message, **kwargs) 

165 if isinstance(text, bytes): 

166 return text 

167 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

168 return text.encode(codec) 

169 

170 

171def _IsMapEntry(field): 

172 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

173 field.message_type.has_options and 

174 field.message_type.GetOptions().map_entry) 

175 

176 

177def _IsGroupLike(field): 

178 """Determines if a field is consistent with a proto2 group. 

179 

180 Args: 

181 field: The field descriptor. 

182 

183 Returns: 

184 True if this field is group-like, false otherwise. 

185 """ 

186 # Groups are always tag-delimited. 

187 if field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

188 return False 

189 

190 # Group fields always are always the lowercase type name. 

191 if field.name != field.message_type.name.lower(): 

192 return False 

193 

194 if field.message_type.file != field.file: 

195 return False 

196 

197 # Group messages are always defined in the same scope as the field. File 

198 # level extensions will compare NULL == NULL here, which is why the file 

199 # comparison above is necessary to ensure both come from the same file. 

200 return ( 

201 field.message_type.containing_type == field.extension_scope 

202 if field.is_extension 

203 else field.message_type.containing_type == field.containing_type 

204 ) 

205 

206 

207def PrintMessage(message, 

208 out, 

209 indent=0, 

210 as_utf8=_as_utf8_default, 

211 as_one_line=False, 

212 use_short_repeated_primitives=False, 

213 pointy_brackets=False, 

214 use_index_order=False, 

215 use_field_number=False, 

216 descriptor_pool=None, 

217 message_formatter=None, 

218 print_unknown_fields=False, 

219 force_colon=False): 

220 """Convert the message to text format and write it to the out stream. 

221 

222 Args: 

223 message: The Message object to convert to text format. 

224 out: A file handle to write the message to. 

225 indent: The initial indent level for pretty print. 

226 as_utf8: Return unescaped Unicode for non-ASCII characters. 

227 as_one_line: Don't introduce newlines between fields. 

228 use_short_repeated_primitives: Use short repeated format for primitives. 

229 pointy_brackets: If True, use angle brackets instead of curly braces for 

230 nesting. 

231 use_index_order: If True, print fields of a proto message using the order 

232 defined in source code instead of the field number. By default, use the 

233 field number order. 

234 use_field_number: If True, print field numbers instead of names. 

235 descriptor_pool: A DescriptorPool used to resolve Any types. 

236 message_formatter: A function(message, indent, as_one_line): unicode|None 

237 to custom format selected sub-messages (usually based on message type). 

238 Use to pretty print parts of the protobuf for easier diffing. 

239 print_unknown_fields: If True, unknown fields will be printed. 

240 force_colon: If set, a colon will be added after the field name even if 

241 the field is a proto message. 

242 """ 

243 printer = _Printer( 

244 out=out, indent=indent, as_utf8=as_utf8, 

245 as_one_line=as_one_line, 

246 use_short_repeated_primitives=use_short_repeated_primitives, 

247 pointy_brackets=pointy_brackets, 

248 use_index_order=use_index_order, 

249 use_field_number=use_field_number, 

250 descriptor_pool=descriptor_pool, 

251 message_formatter=message_formatter, 

252 print_unknown_fields=print_unknown_fields, 

253 force_colon=force_colon) 

254 printer.PrintMessage(message) 

255 

256 

257def PrintField(field, 

258 value, 

259 out, 

260 indent=0, 

261 as_utf8=_as_utf8_default, 

262 as_one_line=False, 

263 use_short_repeated_primitives=False, 

264 pointy_brackets=False, 

265 use_index_order=False, 

266 message_formatter=None, 

267 print_unknown_fields=False, 

268 force_colon=False): 

269 """Print a single field name/value pair.""" 

270 printer = _Printer( 

271 out, 

272 indent, 

273 as_utf8, 

274 as_one_line, 

275 use_short_repeated_primitives, 

276 pointy_brackets, 

277 use_index_order, 

278 message_formatter=message_formatter, 

279 print_unknown_fields=print_unknown_fields, 

280 force_colon=force_colon, 

281 ) 

282 printer.PrintField(field, value) 

283 

284 

285def PrintFieldValue(field, 

286 value, 

287 out, 

288 indent=0, 

289 as_utf8=_as_utf8_default, 

290 as_one_line=False, 

291 use_short_repeated_primitives=False, 

292 pointy_brackets=False, 

293 use_index_order=False, 

294 message_formatter=None, 

295 print_unknown_fields=False, 

296 force_colon=False): 

297 """Print a single field value (not including name).""" 

298 printer = _Printer( 

299 out, 

300 indent, 

301 as_utf8, 

302 as_one_line, 

303 use_short_repeated_primitives, 

304 pointy_brackets, 

305 use_index_order, 

306 message_formatter=message_formatter, 

307 print_unknown_fields=print_unknown_fields, 

308 force_colon=force_colon, 

309 ) 

310 printer.PrintFieldValue(field, value) 

311 

312 

313def _BuildMessageFromTypeName(type_name, descriptor_pool): 

314 """Returns a protobuf message instance. 

315 

316 Args: 

317 type_name: Fully-qualified protobuf message type name string. 

318 descriptor_pool: DescriptorPool instance. 

319 

320 Returns: 

321 A Message instance of type matching type_name, or None if the a Descriptor 

322 wasn't found matching type_name. 

323 """ 

324 # pylint: disable=g-import-not-at-top 

325 if descriptor_pool is None: 

326 from google.protobuf import descriptor_pool as pool_mod 

327 

328 descriptor_pool = pool_mod.Default() 

329 from google.protobuf import message_factory 

330 

331 try: 

332 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

333 except KeyError: 

334 return None 

335 message_type = message_factory.GetMessageClass(message_descriptor) 

336 return message_type() 

337 

338 

339# These values must match WireType enum in //google/protobuf/wire_format.h. 

340WIRETYPE_LENGTH_DELIMITED = 2 

341WIRETYPE_START_GROUP = 3 

342 

343 

344class _Printer(object): 

345 """Text format printer for protocol message.""" 

346 

347 def __init__( 

348 self, 

349 out, 

350 indent=0, 

351 as_utf8=_as_utf8_default, 

352 as_one_line=False, 

353 use_short_repeated_primitives=False, 

354 pointy_brackets=False, 

355 use_index_order=False, 

356 use_field_number=False, 

357 descriptor_pool=None, 

358 message_formatter=None, 

359 print_unknown_fields=False, 

360 force_colon=False, 

361 ): 

362 """Initialize the Printer. 

363 

364 Args: 

365 out: To record the text format result. 

366 indent: The initial indent level for pretty print. 

367 as_utf8: Return unescaped Unicode for non-ASCII characters. 

368 as_one_line: Don't introduce newlines between fields. 

369 use_short_repeated_primitives: Use short repeated format for primitives. 

370 pointy_brackets: If True, use angle brackets instead of curly braces for 

371 nesting. 

372 use_index_order: If True, print fields of a proto message using the order 

373 defined in source code instead of the field number. By default, use the 

374 field number order. 

375 use_field_number: If True, print field numbers instead of names. 

376 descriptor_pool: A DescriptorPool used to resolve Any types. 

377 message_formatter: A function(message, indent, as_one_line): unicode|None 

378 to custom format selected sub-messages (usually based on message type). 

379 Use to pretty print parts of the protobuf for easier diffing. 

380 print_unknown_fields: If True, unknown fields will be printed. 

381 force_colon: If set, a colon will be added after the field name even if 

382 the field is a proto message. 

383 """ 

384 self.out = out 

385 self.indent = indent 

386 self.as_utf8 = as_utf8 

387 self.as_one_line = as_one_line 

388 self.use_short_repeated_primitives = use_short_repeated_primitives 

389 self.pointy_brackets = pointy_brackets 

390 self.use_index_order = use_index_order 

391 self.use_field_number = use_field_number 

392 self.descriptor_pool = descriptor_pool 

393 self.message_formatter = message_formatter 

394 self.print_unknown_fields = print_unknown_fields 

395 self.force_colon = force_colon 

396 

397 def _TryPrintAsAnyMessage(self, message): 

398 """Serializes if message is a google.protobuf.Any field.""" 

399 if '/' not in message.type_url: 

400 return False 

401 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

402 self.descriptor_pool) 

403 if packed_message is not None: 

404 packed_message.MergeFromString(message.value) 

405 colon = ':' if self.force_colon else '' 

406 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

407 self._PrintMessageFieldValue(packed_message) 

408 self.out.write(' ' if self.as_one_line else '\n') 

409 return True 

410 else: 

411 return False 

412 

413 def _TryCustomFormatMessage(self, message): 

414 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

415 if formatted is None: 

416 return False 

417 

418 out = self.out 

419 out.write(' ' * self.indent) 

420 out.write(formatted) 

421 out.write(' ' if self.as_one_line else '\n') 

422 return True 

423 

424 def PrintMessage(self, message): 

425 """Convert protobuf message to text format. 

426 

427 Args: 

428 message: The protocol buffers message. 

429 """ 

430 if self.message_formatter and self._TryCustomFormatMessage(message): 

431 return 

432 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

433 self._TryPrintAsAnyMessage(message)): 

434 return 

435 fields = message.ListFields() 

436 if self.use_index_order: 

437 fields.sort( 

438 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

439 for field, value in fields: 

440 if _IsMapEntry(field): 

441 for key in sorted(value): 

442 # This is slow for maps with submessage entries because it copies the 

443 # entire tree. Unfortunately this would take significant refactoring 

444 # of this file to work around. 

445 # 

446 # TODO: refactor and optimize if this becomes an issue. 

447 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

448 self.PrintField(field, entry_submsg) 

449 elif field.is_repeated: 

450 if (self.use_short_repeated_primitives 

451 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

452 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

453 self._PrintShortRepeatedPrimitivesValue(field, value) 

454 else: 

455 for element in value: 

456 self.PrintField(field, element) 

457 else: 

458 self.PrintField(field, value) 

459 

460 if self.print_unknown_fields: 

461 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

462 

463 def _PrintUnknownFields(self, unknown_field_set): 

464 """Print unknown fields.""" 

465 out = self.out 

466 for field in unknown_field_set: 

467 out.write(' ' * self.indent) 

468 out.write(str(field.field_number)) 

469 if field.wire_type == WIRETYPE_START_GROUP: 

470 if self.as_one_line: 

471 out.write(' { ') 

472 else: 

473 out.write(' {\n') 

474 self.indent += 2 

475 

476 self._PrintUnknownFields(field.data) 

477 

478 if self.as_one_line: 

479 out.write('} ') 

480 else: 

481 self.indent -= 2 

482 out.write(' ' * self.indent + '}\n') 

483 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

484 try: 

485 # If this field is parseable as a Message, it is probably 

486 # an embedded message. 

487 # pylint: disable=protected-access 

488 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

489 memoryview(field.data), 0, len(field.data)) 

490 except Exception: # pylint: disable=broad-except 

491 pos = 0 

492 

493 if pos == len(field.data): 

494 if self.as_one_line: 

495 out.write(' { ') 

496 else: 

497 out.write(' {\n') 

498 self.indent += 2 

499 

500 self._PrintUnknownFields(embedded_unknown_message) 

501 

502 if self.as_one_line: 

503 out.write('} ') 

504 else: 

505 self.indent -= 2 

506 out.write(' ' * self.indent + '}\n') 

507 else: 

508 # A string or bytes field. self.as_utf8 may not work. 

509 out.write(': \"') 

510 out.write(text_encoding.CEscape(field.data, False)) 

511 out.write('\" ' if self.as_one_line else '\"\n') 

512 else: 

513 # varint, fixed32, fixed64 

514 out.write(': ') 

515 out.write(str(field.data)) 

516 out.write(' ' if self.as_one_line else '\n') 

517 

518 def _PrintFieldName(self, field): 

519 """Print field name.""" 

520 out = self.out 

521 out.write(' ' * self.indent) 

522 if self.use_field_number: 

523 out.write(str(field.number)) 

524 else: 

525 if field.is_extension: 

526 out.write('[') 

527 if (field.containing_type.GetOptions().message_set_wire_format and 

528 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

529 not field.is_required and 

530 not field.is_repeated): 

531 out.write(field.message_type.full_name) 

532 else: 

533 out.write(field.full_name) 

534 out.write(']') 

535 elif _IsGroupLike(field): 

536 # For groups, use the capitalized name. 

537 out.write(field.message_type.name) 

538 else: 

539 out.write(field.name) 

540 

541 if (self.force_colon or 

542 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

543 # The colon is optional in this case, but our cross-language golden files 

544 # don't include it. Here, the colon is only included if force_colon is 

545 # set to True 

546 out.write(':') 

547 

548 def PrintField(self, field, value): 

549 """Print a single field name/value pair.""" 

550 self._PrintFieldName(field) 

551 self.out.write(' ') 

552 self.PrintFieldValue(field, value) 

553 self.out.write(' ' if self.as_one_line else '\n') 

554 

555 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

556 """"Prints short repeated primitives value.""" 

557 # Note: this is called only when value has at least one element. 

558 self._PrintFieldName(field) 

559 self.out.write(' [') 

560 for i in range(len(value) - 1): 

561 self.PrintFieldValue(field, value[i]) 

562 self.out.write(', ') 

563 self.PrintFieldValue(field, value[-1]) 

564 self.out.write(']') 

565 self.out.write(' ' if self.as_one_line else '\n') 

566 

567 def _PrintMessageFieldValue(self, value): 

568 if self.pointy_brackets: 

569 openb = '<' 

570 closeb = '>' 

571 else: 

572 openb = '{' 

573 closeb = '}' 

574 

575 if self.as_one_line: 

576 self.out.write('%s ' % openb) 

577 self.PrintMessage(value) 

578 self.out.write(closeb) 

579 else: 

580 self.out.write('%s\n' % openb) 

581 self.indent += 2 

582 self.PrintMessage(value) 

583 self.indent -= 2 

584 self.out.write(' ' * self.indent + closeb) 

585 

586 def PrintFieldValue(self, field, value): 

587 """Print a single field value (not including name). 

588 

589 For repeated fields, the value should be a single element. 

590 

591 Args: 

592 field: The descriptor of the field to be printed. 

593 value: The value of the field. 

594 """ 

595 out = self.out 

596 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

597 self._PrintMessageFieldValue(value) 

598 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

599 enum_value = field.enum_type.values_by_number.get(value, None) 

600 if enum_value is not None: 

601 out.write(enum_value.name) 

602 else: 

603 out.write(str(value)) 

604 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

605 out.write('\"') 

606 if isinstance(value, str) and not self.as_utf8: 

607 out_value = value.encode('utf-8') 

608 else: 

609 out_value = value 

610 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

611 # We always need to escape all binary data in TYPE_BYTES fields. 

612 out_as_utf8 = False 

613 else: 

614 out_as_utf8 = self.as_utf8 

615 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

616 out.write('\"') 

617 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

618 if value: 

619 out.write('true') 

620 else: 

621 out.write('false') 

622 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

623 if math.isnan(value): 

624 out.write(str(value)) 

625 else: 

626 out.write(str(type_checkers.ToShortestFloat(value))) 

627 else: 

628 out.write(str(value)) 

629 

630 

631def Parse(text, 

632 message, 

633 allow_unknown_extension=False, 

634 allow_field_number=False, 

635 descriptor_pool=None, 

636 allow_unknown_field=False, 

637 max_recursion_depth=None): 

638 """Parses a text representation of a protocol message into a message. 

639 

640 NOTE: for historical reasons this function does not clear the input 

641 message. This is different from what the binary msg.ParseFrom(...) does. 

642 If text contains a field already set in message, the value is appended if the 

643 field is repeated. Otherwise, an error is raised. 

644 

645 Example:: 

646 

647 a = MyProto() 

648 a.repeated_field.append('test') 

649 b = MyProto() 

650 

651 # Repeated fields are combined 

652 text_format.Parse(repr(a), b) 

653 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

654 

655 # Non-repeated fields cannot be overwritten 

656 a.singular_field = 1 

657 b.singular_field = 2 

658 text_format.Parse(repr(a), b) # ParseError 

659 

660 # Binary version: 

661 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

662 

663 Caller is responsible for clearing the message as needed. 

664 

665 Args: 

666 text (str): Message text representation. 

667 message (Message): A protocol buffer message to merge into. 

668 allow_unknown_extension: if True, skip over missing extensions and keep 

669 parsing 

670 allow_field_number: if True, both field number and field name are allowed. 

671 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

672 allow_unknown_field: if True, skip over unknown field and keep 

673 parsing. Avoid to use this option if possible. It may hide some 

674 errors (e.g. spelling error on field name) 

675 max_recursion_depth: Optional maximum recursion depth of a text proto 

676 message to be deserialized. Text proto messages over this depth will 

677 fail to parse. ``None`` keeps the historical unbounded behavior. 

678 

679 Returns: 

680 Message: The same message passed as argument. 

681 

682 Raises: 

683 ParseError: On text parsing problems. 

684 """ 

685 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

686 message, 

687 allow_unknown_extension, 

688 allow_field_number, 

689 descriptor_pool=descriptor_pool, 

690 allow_unknown_field=allow_unknown_field, 

691 max_recursion_depth=max_recursion_depth) 

692 

693 

694def Merge(text, 

695 message, 

696 allow_unknown_extension=False, 

697 allow_field_number=False, 

698 descriptor_pool=None, 

699 allow_unknown_field=False, 

700 max_recursion_depth=None): 

701 """Parses a text representation of a protocol message into a message. 

702 

703 Like Parse(), but allows repeated values for a non-repeated field, and uses 

704 the last one. This means any non-repeated, top-level fields specified in text 

705 replace those in the message. 

706 

707 Args: 

708 text (str): Message text representation. 

709 message (Message): A protocol buffer message to merge into. 

710 allow_unknown_extension: if True, skip over missing extensions and keep 

711 parsing 

712 allow_field_number: if True, both field number and field name are allowed. 

713 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

714 allow_unknown_field: if True, skip over unknown field and keep 

715 parsing. Avoid to use this option if possible. It may hide some 

716 errors (e.g. spelling error on field name) 

717 max_recursion_depth: Optional maximum recursion depth of a text proto 

718 message to be deserialized. Text proto messages over this depth will 

719 fail to parse. ``None`` keeps the historical unbounded behavior. 

720 

721 Returns: 

722 Message: The same message passed as argument. 

723 

724 Raises: 

725 ParseError: On text parsing problems. 

726 """ 

727 return MergeLines( 

728 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

729 message, 

730 allow_unknown_extension, 

731 allow_field_number, 

732 descriptor_pool=descriptor_pool, 

733 allow_unknown_field=allow_unknown_field, 

734 max_recursion_depth=max_recursion_depth) 

735 

736 

737def ParseLines(lines, 

738 message, 

739 allow_unknown_extension=False, 

740 allow_field_number=False, 

741 descriptor_pool=None, 

742 allow_unknown_field=False, 

743 max_recursion_depth=None): 

744 """Parses a text representation of a protocol message into a message. 

745 

746 See Parse() for caveats. 

747 

748 Args: 

749 lines: An iterable of lines of a message's text representation. 

750 message: A protocol buffer message to merge into. 

751 allow_unknown_extension: if True, skip over missing extensions and keep 

752 parsing 

753 allow_field_number: if True, both field number and field name are allowed. 

754 descriptor_pool: A DescriptorPool used to resolve Any types. 

755 allow_unknown_field: if True, skip over unknown field and keep 

756 parsing. Avoid to use this option if possible. It may hide some 

757 errors (e.g. spelling error on field name) 

758 max_recursion_depth: Optional maximum recursion depth of a text proto 

759 message to be deserialized. Text proto messages over this depth will 

760 fail to parse. ``None`` keeps the historical unbounded behavior. 

761 

762 Returns: 

763 The same message passed as argument. 

764 

765 Raises: 

766 ParseError: On text parsing problems. 

767 """ 

768 parser = _Parser(allow_unknown_extension, 

769 allow_field_number, 

770 descriptor_pool=descriptor_pool, 

771 allow_unknown_field=allow_unknown_field, 

772 max_recursion_depth=max_recursion_depth) 

773 return parser.ParseLines(lines, message) 

774 

775 

776def MergeLines(lines, 

777 message, 

778 allow_unknown_extension=False, 

779 allow_field_number=False, 

780 descriptor_pool=None, 

781 allow_unknown_field=False, 

782 max_recursion_depth=None): 

783 """Parses a text representation of a protocol message into a message. 

784 

785 See Merge() for more details. 

786 

787 Args: 

788 lines: An iterable of lines of a message's text representation. 

789 message: A protocol buffer message to merge into. 

790 allow_unknown_extension: if True, skip over missing extensions and keep 

791 parsing 

792 allow_field_number: if True, both field number and field name are allowed. 

793 descriptor_pool: A DescriptorPool used to resolve Any types. 

794 allow_unknown_field: if True, skip over unknown field and keep 

795 parsing. Avoid to use this option if possible. It may hide some 

796 errors (e.g. spelling error on field name) 

797 max_recursion_depth: Optional maximum recursion depth of a text proto 

798 message to be deserialized. Text proto messages over this depth will 

799 fail to parse. ``None`` keeps the historical unbounded behavior. 

800 

801 Returns: 

802 The same message passed as argument. 

803 

804 Raises: 

805 ParseError: On text parsing problems. 

806 """ 

807 parser = _Parser(allow_unknown_extension, 

808 allow_field_number, 

809 descriptor_pool=descriptor_pool, 

810 allow_unknown_field=allow_unknown_field, 

811 max_recursion_depth=max_recursion_depth) 

812 return parser.MergeLines(lines, message) 

813 

814 

815class _Parser(object): 

816 """Text format parser for protocol message.""" 

817 

818 def __init__(self, 

819 allow_unknown_extension=False, 

820 allow_field_number=False, 

821 descriptor_pool=None, 

822 allow_unknown_field=False, 

823 max_recursion_depth=None): 

824 self.allow_unknown_extension = allow_unknown_extension 

825 self.allow_field_number = allow_field_number 

826 self.descriptor_pool = descriptor_pool 

827 self.allow_unknown_field = allow_unknown_field 

828 self.max_recursion_depth = max_recursion_depth 

829 self.recursion_depth = 0 

830 

831 def ParseLines(self, lines, message): 

832 """Parses a text representation of a protocol message into a message.""" 

833 self._allow_multiple_scalars = False 

834 self._ParseOrMerge(lines, message) 

835 return message 

836 

837 def MergeLines(self, lines, message): 

838 """Merges a text representation of a protocol message into a message.""" 

839 self._allow_multiple_scalars = True 

840 self._ParseOrMerge(lines, message) 

841 return message 

842 

843 def _ParseOrMerge(self, lines, message): 

844 """Converts a text representation of a protocol message into a message. 

845 

846 Args: 

847 lines: Lines of a message's text representation. 

848 message: A protocol buffer message to merge into. 

849 

850 Raises: 

851 ParseError: On text parsing problems. 

852 """ 

853 # Tokenize expects native str lines. 

854 try: 

855 str_lines = ( 

856 line if isinstance(line, str) else line.decode('utf-8') 

857 for line in lines) 

858 tokenizer = Tokenizer(str_lines) 

859 except UnicodeDecodeError as e: 

860 raise ParseError from e 

861 if message: 

862 self.root_type = message.DESCRIPTOR.full_name 

863 self.recursion_depth += 1 

864 if ( 

865 self.max_recursion_depth is not None 

866 and self.recursion_depth > self.max_recursion_depth 

867 ): 

868 raise ParseError( 

869 'Message too deep. Max recursion depth is {0}'.format( 

870 self.max_recursion_depth 

871 ) 

872 ) 

873 while not tokenizer.AtEnd(): 

874 self._MergeField(tokenizer, message) 

875 self.recursion_depth -= 1 

876 

877 def _MergeMessage(self, tokenizer, message, end_token): 

878 self.recursion_depth += 1 

879 if ( 

880 self.max_recursion_depth is not None 

881 and self.recursion_depth > self.max_recursion_depth 

882 ): 

883 raise ParseError( 

884 'Message too deep. Max recursion depth is {0}'.format( 

885 self.max_recursion_depth 

886 ) 

887 ) 

888 while not tokenizer.TryConsume(end_token): 

889 if tokenizer.AtEnd(): 

890 raise tokenizer.ParseErrorPreviousToken( 

891 'Expected "%s".' % (end_token,) 

892 ) 

893 self._MergeField(tokenizer, message) 

894 self.recursion_depth -= 1 

895 

896 def _MergeField(self, tokenizer, message): 

897 """Merges a single protocol message field into a message. 

898 

899 Args: 

900 tokenizer: A tokenizer to parse the field name and values. 

901 message: A protocol message to record the data. 

902 

903 Raises: 

904 ParseError: In case of text parsing problems. 

905 """ 

906 message_descriptor = message.DESCRIPTOR 

907 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

908 tokenizer.TryConsume('[')): 

909 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

910 tokenizer.TryConsume(':') 

911 self._DetectSilentMarker( 

912 tokenizer, 

913 message_descriptor.full_name, 

914 type_url_prefix + '/' + packed_type_name, 

915 ) 

916 if tokenizer.TryConsume('<'): 

917 expanded_any_end_token = '>' 

918 else: 

919 tokenizer.Consume('{') 

920 expanded_any_end_token = '}' 

921 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

922 self.descriptor_pool) 

923 # Direct comparison with None is used instead of implicit bool conversion 

924 # to avoid false positives with falsy initial values, e.g. for 

925 # google.protobuf.ListValue. 

926 if expanded_any_sub_message is None: 

927 raise ParseError('Type %s not found in descriptor pool' % 

928 packed_type_name) 

929 self._MergeMessage( 

930 tokenizer, expanded_any_sub_message, expanded_any_end_token 

931 ) 

932 deterministic = False 

933 

934 message.Pack( 

935 expanded_any_sub_message, 

936 type_url_prefix=type_url_prefix + '/', 

937 deterministic=deterministic, 

938 ) 

939 return 

940 

941 if tokenizer.TryConsume('['): 

942 name = [tokenizer.ConsumeIdentifier()] 

943 while tokenizer.TryConsume('.'): 

944 name.append(tokenizer.ConsumeIdentifier()) 

945 name = '.'.join(name) 

946 

947 if not message_descriptor.is_extendable: 

948 raise tokenizer.ParseErrorPreviousToken( 

949 'Message type "%s" does not have extensions.' % 

950 message_descriptor.full_name) 

951 # pylint: disable=protected-access 

952 field = message.Extensions._FindExtensionByName(name) 

953 # pylint: enable=protected-access 

954 if not field: 

955 if self.allow_unknown_extension: 

956 field = None 

957 else: 

958 raise tokenizer.ParseErrorPreviousToken( 

959 'Extension "%s" not registered. ' 

960 'Did you import the _pb2 module which defines it? ' 

961 'If you are trying to place the extension in the MessageSet ' 

962 'field of another message that is in an Any or MessageSet field, ' 

963 'that message\'s _pb2 module must be imported as well' % name) 

964 elif message_descriptor != field.containing_type: 

965 raise tokenizer.ParseErrorPreviousToken( 

966 'Extension "%s" does not extend message type "%s".' % 

967 (name, message_descriptor.full_name)) 

968 

969 tokenizer.Consume(']') 

970 

971 else: 

972 name = tokenizer.ConsumeIdentifierOrNumber() 

973 if self.allow_field_number and name.isdigit(): 

974 number = ParseInteger(name, True, True) 

975 field = message_descriptor.fields_by_number.get(number, None) 

976 if not field and message_descriptor.is_extendable: 

977 field = message.Extensions._FindExtensionByNumber(number) 

978 else: 

979 field = message_descriptor.fields_by_name.get(name, None) 

980 

981 # Group names are expected to be capitalized as they appear in the 

982 # .proto file, which actually matches their type names, not their field 

983 # names. 

984 if not field: 

985 field = message_descriptor.fields_by_name.get(name.lower(), None) 

986 if field and not _IsGroupLike(field): 

987 field = None 

988 if field and field.message_type.name != name: 

989 field = None 

990 

991 if not field and not self.allow_unknown_field: 

992 raise tokenizer.ParseErrorPreviousToken( 

993 'Message type "%s" has no field named "%s".' % 

994 (message_descriptor.full_name, name)) 

995 

996 if field: 

997 if not self._allow_multiple_scalars and field.containing_oneof: 

998 # Check if there's a different field set in this oneof. 

999 # Note that we ignore the case if the same field was set before, and we 

1000 # apply _allow_multiple_scalars to non-scalar fields as well. 

1001 which_oneof = message.WhichOneof(field.containing_oneof.name) 

1002 if which_oneof is not None and which_oneof != field.name: 

1003 raise tokenizer.ParseErrorPreviousToken( 

1004 'Field "%s" is specified along with field "%s", another member ' 

1005 'of oneof "%s" for message type "%s".' % 

1006 (field.name, which_oneof, field.containing_oneof.name, 

1007 message_descriptor.full_name)) 

1008 

1009 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1010 tokenizer.TryConsume(':') 

1011 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

1012 field.full_name) 

1013 merger = self._MergeMessageField 

1014 else: 

1015 tokenizer.Consume(':') 

1016 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

1017 field.full_name) 

1018 merger = self._MergeScalarField 

1019 

1020 if (field.is_repeated and 

1021 tokenizer.TryConsume('[')): 

1022 # Short repeated format, e.g. "foo: [1, 2, 3]" 

1023 if not tokenizer.TryConsume(']'): 

1024 while True: 

1025 merger(tokenizer, message, field) 

1026 if tokenizer.TryConsume(']'): 

1027 break 

1028 tokenizer.Consume(',') 

1029 

1030 else: 

1031 merger(tokenizer, message, field) 

1032 

1033 else: # Proto field is unknown. 

1034 assert (self.allow_unknown_extension or self.allow_unknown_field) 

1035 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

1036 

1037 # For historical reasons, fields may optionally be separated by commas or 

1038 # semicolons. 

1039 if not tokenizer.TryConsume(','): 

1040 tokenizer.TryConsume(';') 

1041 

1042 def _LogSilentMarker(self, immediate_message_type, field_name): 

1043 pass 

1044 

1045 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

1046 if tokenizer.contains_silent_marker_before_current_token: 

1047 self._LogSilentMarker(immediate_message_type, field_name) 

1048 

1049 def _ConsumeAnyTypeUrl(self, tokenizer): 

1050 """Consumes a google.protobuf.Any type URL. 

1051 

1052 Assumes the caller has already consumed the opening [ and consumes up to the 

1053 closing ]. 

1054 

1055 Args: 

1056 tokenizer: A tokenizer to parse the type URL. 

1057 

1058 Returns: 

1059 A tuple of type URL prefix (without trailing slash) and type name. 

1060 """ 

1061 # Consume all tokens with valid URL characters until ]. Whitespace and 

1062 # comments are ignored/skipped by the Tokenizer. 

1063 tokens = [] 

1064 last_slash = -1 

1065 while True: 

1066 try: 

1067 tokens.append(tokenizer.ConsumeUrlChars()) 

1068 continue 

1069 except ParseError: 

1070 pass 

1071 if tokenizer.TryConsume('/'): 

1072 last_slash = len(tokens) 

1073 tokens.append('/') 

1074 else: 

1075 tokenizer.Consume(']') 

1076 break 

1077 

1078 if last_slash == -1: 

1079 raise tokenizer.ParseError('Type URL does not contain "/".') 

1080 

1081 prefix = ''.join(tokens[:last_slash]) 

1082 name = ''.join(tokens[last_slash + 1 :]) 

1083 

1084 if not prefix: 

1085 raise tokenizer.ParseError('Type URL prefix is empty.') 

1086 if prefix.startswith('/'): 

1087 raise tokenizer.ParseError('Type URL prefix starts with "/".') 

1088 

1089 # Check for invalid percent encodings. '%' needs to be followed by exactly 

1090 # two valid hexadecimal digits. 

1091 for i, char in enumerate(prefix): 

1092 if char == '%' and not _PERCENT_ENCODING.match(prefix[i : i + 3]): 

1093 raise tokenizer.ParseError( 

1094 f'Invalid percent escape, got "{prefix[i : i + 3]}".' 

1095 ) 

1096 

1097 # After the last slash we expect a valid type name, not just any sequence of 

1098 # URL characters. 

1099 if not _TYPE_NAME.match(name): 

1100 raise tokenizer.ParseError('Expected type name, got "%s".' % name) 

1101 

1102 return prefix, name 

1103 

1104 def _MergeMessageField(self, tokenizer, message, field): 

1105 """Merges a single scalar field into a message. 

1106 

1107 Args: 

1108 tokenizer: A tokenizer to parse the field value. 

1109 message: The message of which field is a member. 

1110 field: The descriptor of the field to be merged. 

1111 

1112 Raises: 

1113 ParseError: In case of text parsing problems. 

1114 """ 

1115 is_map_entry = _IsMapEntry(field) 

1116 

1117 if tokenizer.TryConsume('<'): 

1118 end_token = '>' 

1119 else: 

1120 tokenizer.Consume('{') 

1121 end_token = '}' 

1122 

1123 if field.is_repeated: 

1124 if field.is_extension: 

1125 sub_message = message.Extensions[field].add() 

1126 elif is_map_entry: 

1127 sub_message = getattr(message, field.name).GetEntryClass()() 

1128 else: 

1129 sub_message = getattr(message, field.name).add() 

1130 else: 

1131 if field.is_extension: 

1132 if (not self._allow_multiple_scalars and 

1133 message.HasExtension(field)): 

1134 raise tokenizer.ParseErrorPreviousToken( 

1135 'Message type "%s" should not have multiple "%s" extensions.' % 

1136 (message.DESCRIPTOR.full_name, field.full_name)) 

1137 sub_message = message.Extensions[field] 

1138 else: 

1139 # Also apply _allow_multiple_scalars to message field. 

1140 # TODO: Change to _allow_singular_overwrites. 

1141 if (not self._allow_multiple_scalars and 

1142 message.HasField(field.name)): 

1143 raise tokenizer.ParseErrorPreviousToken( 

1144 'Message type "%s" should not have multiple "%s" fields.' % 

1145 (message.DESCRIPTOR.full_name, field.name)) 

1146 sub_message = getattr(message, field.name) 

1147 sub_message.SetInParent() 

1148 

1149 self._MergeMessage(tokenizer, sub_message, end_token) 

1150 

1151 if is_map_entry: 

1152 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1153 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1154 value = getattr(message, field.name)[sub_message.key] 

1155 value.CopyFrom(sub_message.value) 

1156 else: 

1157 getattr(message, field.name)[sub_message.key] = sub_message.value 

1158 

1159 def _MergeScalarField(self, tokenizer, message, field): 

1160 """Merges a single scalar field into a message. 

1161 

1162 Args: 

1163 tokenizer: A tokenizer to parse the field value. 

1164 message: A protocol message to record the data. 

1165 field: The descriptor of the field to be merged. 

1166 

1167 Raises: 

1168 ParseError: In case of text parsing problems. 

1169 RuntimeError: On runtime errors. 

1170 """ 

1171 _ = self.allow_unknown_extension 

1172 value = None 

1173 

1174 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1175 descriptor.FieldDescriptor.TYPE_SINT32, 

1176 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1177 value = _ConsumeInt32(tokenizer) 

1178 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1179 descriptor.FieldDescriptor.TYPE_SINT64, 

1180 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1181 value = _ConsumeInt64(tokenizer) 

1182 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1183 descriptor.FieldDescriptor.TYPE_FIXED32): 

1184 value = _ConsumeUint32(tokenizer) 

1185 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1186 descriptor.FieldDescriptor.TYPE_FIXED64): 

1187 value = _ConsumeUint64(tokenizer) 

1188 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1189 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1190 value = tokenizer.ConsumeFloat() 

1191 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1192 value = tokenizer.ConsumeBool() 

1193 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1194 value = tokenizer.ConsumeString() 

1195 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1196 value = tokenizer.ConsumeByteString() 

1197 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1198 value = tokenizer.ConsumeEnum(field) 

1199 else: 

1200 raise RuntimeError('Unknown field type %d' % field.type) 

1201 

1202 if field.is_repeated: 

1203 if field.is_extension: 

1204 message.Extensions[field].append(value) 

1205 else: 

1206 getattr(message, field.name).append(value) 

1207 else: 

1208 if field.is_extension: 

1209 if (not self._allow_multiple_scalars and 

1210 field.has_presence and 

1211 message.HasExtension(field)): 

1212 raise tokenizer.ParseErrorPreviousToken( 

1213 'Message type "%s" should not have multiple "%s" extensions.' % 

1214 (message.DESCRIPTOR.full_name, field.full_name)) 

1215 else: 

1216 message.Extensions[field] = value 

1217 else: 

1218 duplicate_error = False 

1219 if not self._allow_multiple_scalars: 

1220 if field.has_presence: 

1221 duplicate_error = message.HasField(field.name) 

1222 else: 

1223 # For field that doesn't represent presence, try best effort to 

1224 # check multiple scalars by compare to default values. 

1225 duplicate_error = not decoder.IsDefaultScalarValue( 

1226 getattr(message, field.name) 

1227 ) 

1228 

1229 if duplicate_error: 

1230 raise tokenizer.ParseErrorPreviousToken( 

1231 'Message type "%s" should not have multiple "%s" fields.' % 

1232 (message.DESCRIPTOR.full_name, field.name)) 

1233 else: 

1234 setattr(message, field.name, value) 

1235 

1236 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1237 """Skips over contents (value or message) of a field. 

1238 

1239 Args: 

1240 tokenizer: A tokenizer to parse the field name and values. 

1241 field_name: The field name currently being parsed. 

1242 immediate_message_type: The type of the message immediately containing 

1243 the silent marker. 

1244 """ 

1245 # Try to guess the type of this field. 

1246 # If this field is not a message, there should be a ":" between the 

1247 # field name and the field value and also the field value should not 

1248 # start with "{" or "<" which indicates the beginning of a message body. 

1249 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1250 # to be a message or the input is ill-formed. 

1251 if tokenizer.TryConsume( 

1252 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1253 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1254 if tokenizer.LookingAt('['): 

1255 self._SkipRepeatedFieldValue(tokenizer, immediate_message_type) 

1256 else: 

1257 self._SkipFieldValue(tokenizer) 

1258 else: 

1259 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1260 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1261 

1262 def _SkipField(self, tokenizer, immediate_message_type): 

1263 """Skips over a complete field (name and value/message). 

1264 

1265 Args: 

1266 tokenizer: A tokenizer to parse the field name and values. 

1267 immediate_message_type: The type of the message immediately containing 

1268 the silent marker. 

1269 """ 

1270 field_name = '' 

1271 if tokenizer.TryConsume('['): 

1272 # Consume extension or google.protobuf.Any type URL 

1273 field_name += '[' + tokenizer.ConsumeIdentifier() 

1274 num_identifiers = 1 

1275 while tokenizer.TryConsume('.'): 

1276 field_name += '.' + tokenizer.ConsumeIdentifier() 

1277 num_identifiers += 1 

1278 # This is possibly a type URL for an Any message. 

1279 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1280 field_name += '/' + tokenizer.ConsumeIdentifier() 

1281 while tokenizer.TryConsume('.'): 

1282 field_name += '.' + tokenizer.ConsumeIdentifier() 

1283 tokenizer.Consume(']') 

1284 field_name += ']' 

1285 else: 

1286 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1287 

1288 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1289 

1290 # For historical reasons, fields may optionally be separated by commas or 

1291 # semicolons. 

1292 if not tokenizer.TryConsume(','): 

1293 tokenizer.TryConsume(';') 

1294 

1295 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1296 """Skips over a field message. 

1297 

1298 Args: 

1299 tokenizer: A tokenizer to parse the field name and values. 

1300 immediate_message_type: The type of the message immediately containing 

1301 the silent marker 

1302 """ 

1303 if tokenizer.TryConsume('<'): 

1304 delimiter = '>' 

1305 else: 

1306 tokenizer.Consume('{') 

1307 delimiter = '}' 

1308 

1309 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1310 self._SkipField(tokenizer, immediate_message_type) 

1311 

1312 tokenizer.Consume(delimiter) 

1313 

1314 def _SkipFieldValue(self, tokenizer): 

1315 """Skips over a field value. 

1316 

1317 Args: 

1318 tokenizer: A tokenizer to parse the field name and values. 

1319 

1320 Raises: 

1321 ParseError: In case an invalid field value is found. 

1322 """ 

1323 if (not tokenizer.TryConsumeByteString()and 

1324 not tokenizer.TryConsumeIdentifier() and 

1325 not _TryConsumeInt64(tokenizer) and 

1326 not _TryConsumeUint64(tokenizer) and 

1327 not tokenizer.TryConsumeFloat()): 

1328 raise ParseError('Invalid field value: ' + tokenizer.token) 

1329 

1330 def _SkipRepeatedFieldValue(self, tokenizer, immediate_message_type): 

1331 """Skips over a repeated field value. 

1332 

1333 Args: 

1334 tokenizer: A tokenizer to parse the field value. 

1335 """ 

1336 tokenizer.Consume('[') 

1337 if not tokenizer.TryConsume(']'): 

1338 while True: 

1339 if tokenizer.LookingAt('<') or tokenizer.LookingAt('{'): 

1340 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1341 else: 

1342 self._SkipFieldValue(tokenizer) 

1343 if tokenizer.TryConsume(']'): 

1344 break 

1345 tokenizer.Consume(',') 

1346 

1347 

1348class Tokenizer(object): 

1349 """Protocol buffer text representation tokenizer. 

1350 

1351 This class handles the lower level string parsing by splitting it into 

1352 meaningful tokens. 

1353 

1354 It was directly ported from the Java protocol buffer API. 

1355 """ 

1356 

1357 _WHITESPACE = re.compile(r'\s+') 

1358 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1359 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1360 _TOKEN = re.compile( 

1361 '|'.join( 

1362 [ 

1363 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1364 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1365 ] 

1366 + [ # quoted str for each quote mark 

1367 # Avoid backtracking! https://stackoverflow.com/a/844267 

1368 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format( 

1369 qt=mark 

1370 ) 

1371 for mark in _QUOTES 

1372 ] 

1373 ) 

1374 ) 

1375 

1376 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1377 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1378 # Accepted URL characters (excluding "/") 

1379 _URL_CHARS = re.compile(r'^[0-9a-zA-Z-.~_ !$&()*+,;=%]+$') 

1380 

1381 def __init__(self, lines, skip_comments=True): 

1382 self._position = 0 

1383 self._line = -1 

1384 self._column = 0 

1385 self._token_start = None 

1386 self.token = '' 

1387 self._lines = iter(lines) 

1388 self._current_line = '' 

1389 self._previous_line = 0 

1390 self._previous_column = 0 

1391 self._more_lines = True 

1392 self._skip_comments = skip_comments 

1393 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1394 or self._WHITESPACE) 

1395 self.contains_silent_marker_before_current_token = False 

1396 

1397 self._SkipWhitespace() 

1398 self.NextToken() 

1399 

1400 def LookingAt(self, token): 

1401 return self.token == token 

1402 

1403 def AtEnd(self): 

1404 """Checks the end of the text was reached. 

1405 

1406 Returns: 

1407 True iff the end was reached. 

1408 """ 

1409 return not self.token 

1410 

1411 def _PopLine(self): 

1412 while len(self._current_line) <= self._column: 

1413 try: 

1414 self._current_line = next(self._lines) 

1415 except StopIteration: 

1416 self._current_line = '' 

1417 self._more_lines = False 

1418 return 

1419 else: 

1420 self._line += 1 

1421 self._column = 0 

1422 

1423 def _SkipWhitespace(self): 

1424 while True: 

1425 self._PopLine() 

1426 match = self._whitespace_pattern.match(self._current_line, self._column) 

1427 if not match: 

1428 break 

1429 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1430 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1431 length = len(match.group(0)) 

1432 self._column += length 

1433 

1434 def TryConsume(self, token): 

1435 """Tries to consume a given piece of text. 

1436 

1437 Args: 

1438 token: Text to consume. 

1439 

1440 Returns: 

1441 True iff the text was consumed. 

1442 """ 

1443 if self.token == token: 

1444 self.NextToken() 

1445 return True 

1446 return False 

1447 

1448 def Consume(self, token): 

1449 """Consumes a piece of text. 

1450 

1451 Args: 

1452 token: Text to consume. 

1453 

1454 Raises: 

1455 ParseError: If the text couldn't be consumed. 

1456 """ 

1457 if not self.TryConsume(token): 

1458 raise self.ParseError('Expected "%s".' % token) 

1459 

1460 def ConsumeComment(self): 

1461 result = self.token 

1462 if not self._COMMENT.match(result): 

1463 raise self.ParseError('Expected comment.') 

1464 self.NextToken() 

1465 return result 

1466 

1467 def ConsumeCommentOrTrailingComment(self): 

1468 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1469 

1470 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1471 # tokenizer starts, it looks like there is a previous token on the line. 

1472 just_started = self._line == 0 and self._column == 0 

1473 

1474 before_parsing = self._previous_line 

1475 comment = self.ConsumeComment() 

1476 

1477 # A trailing comment is a comment on the same line than the previous token. 

1478 trailing = (self._previous_line == before_parsing 

1479 and not just_started) 

1480 

1481 return trailing, comment 

1482 

1483 def TryConsumeIdentifier(self): 

1484 try: 

1485 self.ConsumeIdentifier() 

1486 return True 

1487 except ParseError: 

1488 return False 

1489 

1490 def ConsumeIdentifier(self): 

1491 """Consumes protocol message field identifier. 

1492 

1493 Returns: 

1494 Identifier string. 

1495 

1496 Raises: 

1497 ParseError: If an identifier couldn't be consumed. 

1498 """ 

1499 result = self.token 

1500 if not self._IDENTIFIER.match(result): 

1501 raise self.ParseError('Expected identifier.') 

1502 self.NextToken() 

1503 return result 

1504 

1505 def TryConsumeIdentifierOrNumber(self): 

1506 try: 

1507 self.ConsumeIdentifierOrNumber() 

1508 return True 

1509 except ParseError: 

1510 return False 

1511 

1512 def ConsumeIdentifierOrNumber(self): 

1513 """Consumes protocol message field identifier. 

1514 

1515 Returns: 

1516 Identifier string. 

1517 

1518 Raises: 

1519 ParseError: If an identifier couldn't be consumed. 

1520 """ 

1521 result = self.token 

1522 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1523 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1524 self.NextToken() 

1525 return result 

1526 

1527 def TryConsumeInteger(self): 

1528 try: 

1529 self.ConsumeInteger() 

1530 return True 

1531 except ParseError: 

1532 return False 

1533 

1534 def ConsumeInteger(self): 

1535 """Consumes an integer number. 

1536 

1537 Returns: 

1538 The integer parsed. 

1539 

1540 Raises: 

1541 ParseError: If an integer couldn't be consumed. 

1542 """ 

1543 try: 

1544 result = _ParseAbstractInteger(self.token) 

1545 except ValueError as e: 

1546 raise self.ParseError(str(e)) 

1547 self.NextToken() 

1548 return result 

1549 

1550 def TryConsumeFloat(self): 

1551 try: 

1552 self.ConsumeFloat() 

1553 return True 

1554 except ParseError: 

1555 return False 

1556 

1557 def ConsumeFloat(self): 

1558 """Consumes an floating point number. 

1559 

1560 Returns: 

1561 The number parsed. 

1562 

1563 Raises: 

1564 ParseError: If a floating point number couldn't be consumed. 

1565 """ 

1566 try: 

1567 result = ParseFloat(self.token) 

1568 except ValueError as e: 

1569 raise self.ParseError(str(e)) 

1570 self.NextToken() 

1571 return result 

1572 

1573 def ConsumeBool(self): 

1574 """Consumes a boolean value. 

1575 

1576 Returns: 

1577 The bool parsed. 

1578 

1579 Raises: 

1580 ParseError: If a boolean value couldn't be consumed. 

1581 """ 

1582 try: 

1583 result = ParseBool(self.token) 

1584 except ValueError as e: 

1585 raise self.ParseError(str(e)) 

1586 self.NextToken() 

1587 return result 

1588 

1589 def TryConsumeByteString(self): 

1590 try: 

1591 self.ConsumeByteString() 

1592 return True 

1593 except ParseError: 

1594 return False 

1595 

1596 def ConsumeString(self): 

1597 """Consumes a string value. 

1598 

1599 Returns: 

1600 The string parsed. 

1601 

1602 Raises: 

1603 ParseError: If a string value couldn't be consumed. 

1604 """ 

1605 the_bytes = self.ConsumeByteString() 

1606 try: 

1607 return str(the_bytes, 'utf-8') 

1608 except UnicodeDecodeError as e: 

1609 raise self._StringParseError(e) 

1610 

1611 def ConsumeByteString(self): 

1612 """Consumes a byte array value. 

1613 

1614 Returns: 

1615 The array parsed (as a string). 

1616 

1617 Raises: 

1618 ParseError: If a byte array value couldn't be consumed. 

1619 """ 

1620 the_list = [self._ConsumeSingleByteString()] 

1621 while self.token and self.token[0] in _QUOTES: 

1622 the_list.append(self._ConsumeSingleByteString()) 

1623 return b''.join(the_list) 

1624 

1625 def _ConsumeSingleByteString(self): 

1626 """Consume one token of a string literal. 

1627 

1628 String literals (whether bytes or text) can come in multiple adjacent 

1629 tokens which are automatically concatenated, like in C or Python. This 

1630 method only consumes one token. 

1631 

1632 Returns: 

1633 The token parsed. 

1634 Raises: 

1635 ParseError: When the wrong format data is found. 

1636 """ 

1637 text = self.token 

1638 if len(text) < 1 or text[0] not in _QUOTES: 

1639 raise self.ParseError('Expected string but found: %r' % (text,)) 

1640 

1641 if len(text) < 2 or text[-1] != text[0]: 

1642 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1643 

1644 try: 

1645 result = text_encoding.CUnescape(text[1:-1]) 

1646 except ValueError as e: 

1647 raise self.ParseError(str(e)) 

1648 self.NextToken() 

1649 return result 

1650 

1651 def ConsumeEnum(self, field): 

1652 try: 

1653 result = ParseEnum(field, self.token) 

1654 except ValueError as e: 

1655 raise self.ParseError(str(e)) 

1656 self.NextToken() 

1657 return result 

1658 

1659 def ConsumeUrlChars(self): 

1660 """Consumes a token containing valid URL characters. 

1661 

1662 Excludes '/' so that it can be treated specially as a delimiter. 

1663 

1664 Returns: 

1665 The next token containing one or more URL characters. 

1666 

1667 Raises: 

1668 ParseError: If the next token contains unaccepted URL characters. 

1669 """ 

1670 if not self._URL_CHARS.match(self.token): 

1671 raise self.ParseError('Expected URL character(s), got "%s"' % self.token) 

1672 

1673 result = self.token 

1674 self.NextToken() 

1675 return result 

1676 

1677 def TryConsumeUrlChars(self): 

1678 try: 

1679 self.ConsumeUrlChars() 

1680 return True 

1681 except ParseError: 

1682 return False 

1683 

1684 def ParseErrorPreviousToken(self, message): 

1685 """Creates and *returns* a ParseError for the previously read token. 

1686 

1687 Args: 

1688 message: A message to set for the exception. 

1689 

1690 Returns: 

1691 A ParseError instance. 

1692 """ 

1693 return ParseError(message, self._previous_line + 1, 

1694 self._previous_column + 1) 

1695 

1696 def ParseError(self, message): 

1697 """Creates and *returns* a ParseError for the current token.""" 

1698 return ParseError('\'' + self._current_line + '\': ' + message, 

1699 self._line + 1, self._column + 1) 

1700 

1701 def _StringParseError(self, e): 

1702 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1703 

1704 def NextToken(self): 

1705 """Reads the next meaningful token.""" 

1706 self._previous_line = self._line 

1707 self._previous_column = self._column 

1708 self.contains_silent_marker_before_current_token = False 

1709 

1710 self._column += len(self.token) 

1711 self._SkipWhitespace() 

1712 

1713 if not self._more_lines: 

1714 self.token = '' 

1715 return 

1716 

1717 match = self._TOKEN.match(self._current_line, self._column) 

1718 if not match and not self._skip_comments: 

1719 match = self._COMMENT.match(self._current_line, self._column) 

1720 if match: 

1721 token = match.group(0) 

1722 self.token = token 

1723 else: 

1724 self.token = self._current_line[self._column] 

1725 

1726# Aliased so it can still be accessed by current visibility violators. 

1727# TODO: Migrate violators to textformat_tokenizer. 

1728_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1729 

1730 

1731def _ConsumeInt32(tokenizer): 

1732 """Consumes a signed 32bit integer number from tokenizer. 

1733 

1734 Args: 

1735 tokenizer: A tokenizer used to parse the number. 

1736 

1737 Returns: 

1738 The integer parsed. 

1739 

1740 Raises: 

1741 ParseError: If a signed 32bit integer couldn't be consumed. 

1742 """ 

1743 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1744 

1745 

1746def _ConsumeUint32(tokenizer): 

1747 """Consumes an unsigned 32bit integer number from tokenizer. 

1748 

1749 Args: 

1750 tokenizer: A tokenizer used to parse the number. 

1751 

1752 Returns: 

1753 The integer parsed. 

1754 

1755 Raises: 

1756 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1757 """ 

1758 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1759 

1760 

1761def _TryConsumeInt64(tokenizer): 

1762 try: 

1763 _ConsumeInt64(tokenizer) 

1764 return True 

1765 except ParseError: 

1766 return False 

1767 

1768 

1769def _ConsumeInt64(tokenizer): 

1770 """Consumes a signed 32bit integer number from tokenizer. 

1771 

1772 Args: 

1773 tokenizer: A tokenizer used to parse the number. 

1774 

1775 Returns: 

1776 The integer parsed. 

1777 

1778 Raises: 

1779 ParseError: If a signed 32bit integer couldn't be consumed. 

1780 """ 

1781 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1782 

1783 

1784def _TryConsumeUint64(tokenizer): 

1785 try: 

1786 _ConsumeUint64(tokenizer) 

1787 return True 

1788 except ParseError: 

1789 return False 

1790 

1791 

1792def _ConsumeUint64(tokenizer): 

1793 """Consumes an unsigned 64bit integer number from tokenizer. 

1794 

1795 Args: 

1796 tokenizer: A tokenizer used to parse the number. 

1797 

1798 Returns: 

1799 The integer parsed. 

1800 

1801 Raises: 

1802 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1803 """ 

1804 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1805 

1806 

1807def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1808 """Consumes an integer number from tokenizer. 

1809 

1810 Args: 

1811 tokenizer: A tokenizer used to parse the number. 

1812 is_signed: True if a signed integer must be parsed. 

1813 is_long: True if a long integer must be parsed. 

1814 

1815 Returns: 

1816 The integer parsed. 

1817 

1818 Raises: 

1819 ParseError: If an integer with given characteristics couldn't be consumed. 

1820 """ 

1821 try: 

1822 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1823 except ValueError as e: 

1824 raise tokenizer.ParseError(str(e)) 

1825 tokenizer.NextToken() 

1826 return result 

1827 

1828 

1829def ParseInteger(text, is_signed=False, is_long=False): 

1830 """Parses an integer. 

1831 

1832 Args: 

1833 text: The text to parse. 

1834 is_signed: True if a signed integer must be parsed. 

1835 is_long: True if a long integer must be parsed. 

1836 

1837 Returns: 

1838 The integer value. 

1839 

1840 Raises: 

1841 ValueError: Thrown Iff the text is not a valid integer. 

1842 """ 

1843 # Do the actual parsing. Exception handling is propagated to caller. 

1844 result = _ParseAbstractInteger(text) 

1845 

1846 # Check if the integer is sane. Exceptions handled by callers. 

1847 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1848 checker.CheckValue(result) 

1849 return result 

1850 

1851 

1852def _ParseAbstractInteger(text): 

1853 """Parses an integer without checking size/signedness. 

1854 

1855 Args: 

1856 text: The text to parse. 

1857 

1858 Returns: 

1859 The integer value. 

1860 

1861 Raises: 

1862 ValueError: Thrown Iff the text is not a valid integer. 

1863 """ 

1864 # Do the actual parsing. Exception handling is propagated to caller. 

1865 orig_text = text 

1866 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1867 if c_octal_match: 

1868 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1869 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1870 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1871 try: 

1872 return int(text, 0) 

1873 except ValueError: 

1874 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1875 

1876 

1877def ParseFloat(text): 

1878 """Parse a floating point number. 

1879 

1880 Args: 

1881 text: Text to parse. 

1882 

1883 Returns: 

1884 The number parsed. 

1885 

1886 Raises: 

1887 ValueError: If a floating point number couldn't be parsed. 

1888 """ 

1889 if _FLOAT_OCTAL_PREFIX.match(text): 

1890 raise ValueError('Invalid octal float: %s' % text) 

1891 try: 

1892 # Assume Python compatible syntax. 

1893 return float(text) 

1894 except ValueError: 

1895 # Check alternative spellings. 

1896 if _FLOAT_INFINITY.match(text): 

1897 if text[0] == '-': 

1898 return float('-inf') 

1899 else: 

1900 return float('inf') 

1901 elif _FLOAT_NAN.match(text): 

1902 return float('nan') 

1903 else: 

1904 # assume '1.0f' format 

1905 try: 

1906 return float(text.rstrip('fF')) 

1907 except ValueError: 

1908 raise ValueError("Couldn't parse float: %s" % text) 

1909 

1910 

1911def ParseBool(text): 

1912 """Parse a boolean value. 

1913 

1914 Args: 

1915 text: Text to parse. 

1916 

1917 Returns: 

1918 Boolean values parsed 

1919 

1920 Raises: 

1921 ValueError: If text is not a valid boolean. 

1922 """ 

1923 if text in ('true', 't', '1', 'True'): 

1924 return True 

1925 elif text in ('false', 'f', '0', 'False'): 

1926 return False 

1927 else: 

1928 raise ValueError('Expected "true" or "false".') 

1929 

1930 

1931def ParseEnum(field, value): 

1932 """Parse an enum value. 

1933 

1934 The value can be specified by a number (the enum value), or by 

1935 a string literal (the enum name). 

1936 

1937 Args: 

1938 field: Enum field descriptor. 

1939 value: String value. 

1940 

1941 Returns: 

1942 Enum value number. 

1943 

1944 Raises: 

1945 ValueError: If the enum value could not be parsed. 

1946 """ 

1947 enum_descriptor = field.enum_type 

1948 try: 

1949 number = int(value, 0) 

1950 except ValueError: 

1951 # Identifier. 

1952 enum_value = enum_descriptor.values_by_name.get(value, None) 

1953 if enum_value is None: 

1954 raise ValueError('Enum type "%s" has no value named %s.' % 

1955 (enum_descriptor.full_name, value)) 

1956 else: 

1957 if not field.enum_type.is_closed: 

1958 return number 

1959 enum_value = enum_descriptor.values_by_number.get(number, None) 

1960 if enum_value is None: 

1961 raise ValueError('Enum type "%s" has no value with number %d.' % 

1962 (enum_descriptor.full_name, number)) 

1963 return enum_value.number