Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text_format.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

758 statements  

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# 

4# Use of this source code is governed by a BSD-style 

5# license that can be found in the LICENSE file or at 

6# https://developers.google.com/open-source/licenses/bsd 

7 

8"""Contains routines for printing protocol messages in text format. 

9 

10Simple usage example:: 

11 

12 # Create a proto object and serialize it to a text proto string. 

13 message = my_proto_pb2.MyMessage(foo='bar') 

14 text_proto = text_format.MessageToString(message) 

15 

16 # Parse a text proto string. 

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

18""" 

19 

20__author__ = 'kenton@google.com (Kenton Varda)' 

21 

22# TODO Import thread contention leads to test failures. 

23import encodings.raw_unicode_escape # pylint: disable=unused-import 

24import encodings.unicode_escape # pylint: disable=unused-import 

25import io 

26import math 

27import re 

28 

29from google.protobuf.internal import decoder 

30from google.protobuf.internal import type_checkers 

31from google.protobuf import descriptor 

32from google.protobuf import text_encoding 

33from google.protobuf import unknown_fields 

34 

35# pylint: disable=g-import-not-at-top 

36__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

37 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

38 

39_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

40 type_checkers.Int32ValueChecker(), 

41 type_checkers.Uint64ValueChecker(), 

42 type_checkers.Int64ValueChecker()) 

43_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

44_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

45_QUOTES = frozenset(("'", '"')) 

46_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

47_DEBUG_STRING_SILENT_MARKER = '\t ' 

48 

49_as_utf8_default = True 

50 

51 

52class Error(Exception): 

53 """Top-level module error for text_format.""" 

54 

55 

56class ParseError(Error): 

57 """Thrown in case of text parsing or tokenizing error.""" 

58 

59 def __init__(self, message=None, line=None, column=None): 

60 if message is not None and line is not None: 

61 loc = str(line) 

62 if column is not None: 

63 loc += ':{0}'.format(column) 

64 message = '{0} : {1}'.format(loc, message) 

65 if message is not None: 

66 super(ParseError, self).__init__(message) 

67 else: 

68 super(ParseError, self).__init__() 

69 self._line = line 

70 self._column = column 

71 

72 def GetLine(self): 

73 return self._line 

74 

75 def GetColumn(self): 

76 return self._column 

77 

78 

79class TextWriter(object): 

80 

81 def __init__(self, as_utf8): 

82 self._writer = io.StringIO() 

83 

84 def write(self, val): 

85 return self._writer.write(val) 

86 

87 def close(self): 

88 return self._writer.close() 

89 

90 def getvalue(self): 

91 return self._writer.getvalue() 

92 

93 

94def MessageToString( 

95 message, 

96 as_utf8=_as_utf8_default, 

97 as_one_line=False, 

98 use_short_repeated_primitives=False, 

99 pointy_brackets=False, 

100 use_index_order=False, 

101 float_format=None, 

102 double_format=None, 

103 use_field_number=False, 

104 descriptor_pool=None, 

105 indent=0, 

106 message_formatter=None, 

107 print_unknown_fields=False, 

108 force_colon=False) -> str: 

109 """Convert protobuf message to text format. 

110 

111 Double values can be formatted compactly with 15 digits of 

112 precision (which is the most that IEEE 754 "double" can guarantee) 

113 using double_format='.15g'. To ensure that converting to text and back to a 

114 proto will result in an identical value, double_format='.17g' should be used. 

115 

116 Args: 

117 message: The protocol buffers message. 

118 as_utf8: Return unescaped Unicode for non-ASCII characters. 

119 as_one_line: Don't introduce newlines between fields. 

120 use_short_repeated_primitives: Use short repeated format for primitives. 

121 pointy_brackets: If True, use angle brackets instead of curly braces for 

122 nesting. 

123 use_index_order: If True, fields of a proto message will be printed using 

124 the order defined in source code instead of the field number, extensions 

125 will be printed at the end of the message and their relative order is 

126 determined by the extension number. By default, use the field number 

127 order. 

128 float_format (str): If set, use this to specify float field formatting 

129 (per the "Format Specification Mini-Language"); otherwise, shortest float 

130 that has same value in wire will be printed. Also affect double field 

131 if double_format is not set but float_format is set. 

132 double_format (str): If set, use this to specify double field formatting 

133 (per the "Format Specification Mini-Language"); if it is not set but 

134 float_format is set, use float_format. Otherwise, use ``str()`` 

135 use_field_number: If True, print field numbers instead of names. 

136 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

137 indent (int): The initial indent level, in terms of spaces, for pretty 

138 print. 

139 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

140 Custom formatter for selected sub-messages (usually based on message 

141 type). Use to pretty print parts of the protobuf for easier diffing. 

142 print_unknown_fields: If True, unknown fields will be printed. 

143 force_colon: If set, a colon will be added after the field name even if the 

144 field is a proto message. 

145 

146 Returns: 

147 str: A string of the text formatted protocol buffer message. 

148 """ 

149 out = TextWriter(as_utf8) 

150 printer = _Printer( 

151 out, 

152 indent, 

153 as_utf8, 

154 as_one_line, 

155 use_short_repeated_primitives, 

156 pointy_brackets, 

157 use_index_order, 

158 float_format, 

159 double_format, 

160 use_field_number, 

161 descriptor_pool, 

162 message_formatter, 

163 print_unknown_fields=print_unknown_fields, 

164 force_colon=force_colon) 

165 printer.PrintMessage(message) 

166 result = out.getvalue() 

167 out.close() 

168 if as_one_line: 

169 return result.rstrip() 

170 return result 

171 

172 

173def MessageToBytes(message, **kwargs) -> bytes: 

174 """Convert protobuf message to encoded text format. See MessageToString.""" 

175 text = MessageToString(message, **kwargs) 

176 if isinstance(text, bytes): 

177 return text 

178 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

179 return text.encode(codec) 

180 

181 

182def _IsMapEntry(field): 

183 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

184 field.message_type.has_options and 

185 field.message_type.GetOptions().map_entry) 

186 

187 

188def _IsGroupLike(field): 

189 """Determines if a field is consistent with a proto2 group. 

190 

191 Args: 

192 field: The field descriptor. 

193 

194 Returns: 

195 True if this field is group-like, false otherwise. 

196 """ 

197 # Groups are always tag-delimited. 

198 if field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

199 return False 

200 

201 # Group fields always are always the lowercase type name. 

202 if field.name != field.message_type.name.lower(): 

203 return False 

204 

205 if field.message_type.file != field.file: 

206 return False 

207 

208 # Group messages are always defined in the same scope as the field. File 

209 # level extensions will compare NULL == NULL here, which is why the file 

210 # comparison above is necessary to ensure both come from the same file. 

211 return ( 

212 field.message_type.containing_type == field.extension_scope 

213 if field.is_extension 

214 else field.message_type.containing_type == field.containing_type 

215 ) 

216 

217 

218def PrintMessage(message, 

219 out, 

220 indent=0, 

221 as_utf8=_as_utf8_default, 

222 as_one_line=False, 

223 use_short_repeated_primitives=False, 

224 pointy_brackets=False, 

225 use_index_order=False, 

226 float_format=None, 

227 double_format=None, 

228 use_field_number=False, 

229 descriptor_pool=None, 

230 message_formatter=None, 

231 print_unknown_fields=False, 

232 force_colon=False): 

233 """Convert the message to text format and write it to the out stream. 

234 

235 Args: 

236 message: The Message object to convert to text format. 

237 out: A file handle to write the message to. 

238 indent: The initial indent level for pretty print. 

239 as_utf8: Return unescaped Unicode for non-ASCII characters. 

240 as_one_line: Don't introduce newlines between fields. 

241 use_short_repeated_primitives: Use short repeated format for primitives. 

242 pointy_brackets: If True, use angle brackets instead of curly braces for 

243 nesting. 

244 use_index_order: If True, print fields of a proto message using the order 

245 defined in source code instead of the field number. By default, use the 

246 field number order. 

247 float_format: If set, use this to specify float field formatting 

248 (per the "Format Specification Mini-Language"); otherwise, shortest 

249 float that has same value in wire will be printed. Also affect double 

250 field if double_format is not set but float_format is set. 

251 double_format: If set, use this to specify double field formatting 

252 (per the "Format Specification Mini-Language"); if it is not set but 

253 float_format is set, use float_format. Otherwise, str() is used. 

254 use_field_number: If True, print field numbers instead of names. 

255 descriptor_pool: A DescriptorPool used to resolve Any types. 

256 message_formatter: A function(message, indent, as_one_line): unicode|None 

257 to custom format selected sub-messages (usually based on message type). 

258 Use to pretty print parts of the protobuf for easier diffing. 

259 print_unknown_fields: If True, unknown fields will be printed. 

260 force_colon: If set, a colon will be added after the field name even if 

261 the field is a proto message. 

262 """ 

263 printer = _Printer( 

264 out=out, indent=indent, as_utf8=as_utf8, 

265 as_one_line=as_one_line, 

266 use_short_repeated_primitives=use_short_repeated_primitives, 

267 pointy_brackets=pointy_brackets, 

268 use_index_order=use_index_order, 

269 float_format=float_format, 

270 double_format=double_format, 

271 use_field_number=use_field_number, 

272 descriptor_pool=descriptor_pool, 

273 message_formatter=message_formatter, 

274 print_unknown_fields=print_unknown_fields, 

275 force_colon=force_colon) 

276 printer.PrintMessage(message) 

277 

278 

279def PrintField(field, 

280 value, 

281 out, 

282 indent=0, 

283 as_utf8=_as_utf8_default, 

284 as_one_line=False, 

285 use_short_repeated_primitives=False, 

286 pointy_brackets=False, 

287 use_index_order=False, 

288 float_format=None, 

289 double_format=None, 

290 message_formatter=None, 

291 print_unknown_fields=False, 

292 force_colon=False): 

293 """Print a single field name/value pair.""" 

294 printer = _Printer(out, indent, as_utf8, as_one_line, 

295 use_short_repeated_primitives, pointy_brackets, 

296 use_index_order, float_format, double_format, 

297 message_formatter=message_formatter, 

298 print_unknown_fields=print_unknown_fields, 

299 force_colon=force_colon) 

300 printer.PrintField(field, value) 

301 

302 

303def PrintFieldValue(field, 

304 value, 

305 out, 

306 indent=0, 

307 as_utf8=_as_utf8_default, 

308 as_one_line=False, 

309 use_short_repeated_primitives=False, 

310 pointy_brackets=False, 

311 use_index_order=False, 

312 float_format=None, 

313 double_format=None, 

314 message_formatter=None, 

315 print_unknown_fields=False, 

316 force_colon=False): 

317 """Print a single field value (not including name).""" 

318 printer = _Printer(out, indent, as_utf8, as_one_line, 

319 use_short_repeated_primitives, pointy_brackets, 

320 use_index_order, float_format, double_format, 

321 message_formatter=message_formatter, 

322 print_unknown_fields=print_unknown_fields, 

323 force_colon=force_colon) 

324 printer.PrintFieldValue(field, value) 

325 

326 

327def _BuildMessageFromTypeName(type_name, descriptor_pool): 

328 """Returns a protobuf message instance. 

329 

330 Args: 

331 type_name: Fully-qualified protobuf message type name string. 

332 descriptor_pool: DescriptorPool instance. 

333 

334 Returns: 

335 A Message instance of type matching type_name, or None if the a Descriptor 

336 wasn't found matching type_name. 

337 """ 

338 # pylint: disable=g-import-not-at-top 

339 if descriptor_pool is None: 

340 from google.protobuf import descriptor_pool as pool_mod 

341 descriptor_pool = pool_mod.Default() 

342 from google.protobuf import message_factory 

343 try: 

344 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

345 except KeyError: 

346 return None 

347 message_type = message_factory.GetMessageClass(message_descriptor) 

348 return message_type() 

349 

350 

351# These values must match WireType enum in //google/protobuf/wire_format.h. 

352WIRETYPE_LENGTH_DELIMITED = 2 

353WIRETYPE_START_GROUP = 3 

354 

355 

356class _Printer(object): 

357 """Text format printer for protocol message.""" 

358 

359 def __init__( 

360 self, 

361 out, 

362 indent=0, 

363 as_utf8=_as_utf8_default, 

364 as_one_line=False, 

365 use_short_repeated_primitives=False, 

366 pointy_brackets=False, 

367 use_index_order=False, 

368 float_format=None, 

369 double_format=None, 

370 use_field_number=False, 

371 descriptor_pool=None, 

372 message_formatter=None, 

373 print_unknown_fields=False, 

374 force_colon=False): 

375 """Initialize the Printer. 

376 

377 Double values can be formatted compactly with 15 digits of precision 

378 (which is the most that IEEE 754 "double" can guarantee) using 

379 double_format='.15g'. To ensure that converting to text and back to a proto 

380 will result in an identical value, double_format='.17g' should be used. 

381 

382 Args: 

383 out: To record the text format result. 

384 indent: The initial indent level for pretty print. 

385 as_utf8: Return unescaped Unicode for non-ASCII characters. 

386 as_one_line: Don't introduce newlines between fields. 

387 use_short_repeated_primitives: Use short repeated format for primitives. 

388 pointy_brackets: If True, use angle brackets instead of curly braces for 

389 nesting. 

390 use_index_order: If True, print fields of a proto message using the order 

391 defined in source code instead of the field number. By default, use the 

392 field number order. 

393 float_format: If set, use this to specify float field formatting 

394 (per the "Format Specification Mini-Language"); otherwise, shortest 

395 float that has same value in wire will be printed. Also affect double 

396 field if double_format is not set but float_format is set. 

397 double_format: If set, use this to specify double field formatting 

398 (per the "Format Specification Mini-Language"); if it is not set but 

399 float_format is set, use float_format. Otherwise, str() is used. 

400 use_field_number: If True, print field numbers instead of names. 

401 descriptor_pool: A DescriptorPool used to resolve Any types. 

402 message_formatter: A function(message, indent, as_one_line): unicode|None 

403 to custom format selected sub-messages (usually based on message type). 

404 Use to pretty print parts of the protobuf for easier diffing. 

405 print_unknown_fields: If True, unknown fields will be printed. 

406 force_colon: If set, a colon will be added after the field name even if 

407 the field is a proto message. 

408 """ 

409 self.out = out 

410 self.indent = indent 

411 self.as_utf8 = as_utf8 

412 self.as_one_line = as_one_line 

413 self.use_short_repeated_primitives = use_short_repeated_primitives 

414 self.pointy_brackets = pointy_brackets 

415 self.use_index_order = use_index_order 

416 self.float_format = float_format 

417 if double_format is not None: 

418 self.double_format = double_format 

419 else: 

420 self.double_format = float_format 

421 self.use_field_number = use_field_number 

422 self.descriptor_pool = descriptor_pool 

423 self.message_formatter = message_formatter 

424 self.print_unknown_fields = print_unknown_fields 

425 self.force_colon = force_colon 

426 

427 def _TryPrintAsAnyMessage(self, message): 

428 """Serializes if message is a google.protobuf.Any field.""" 

429 if '/' not in message.type_url: 

430 return False 

431 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

432 self.descriptor_pool) 

433 if packed_message: 

434 packed_message.MergeFromString(message.value) 

435 colon = ':' if self.force_colon else '' 

436 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

437 self._PrintMessageFieldValue(packed_message) 

438 self.out.write(' ' if self.as_one_line else '\n') 

439 return True 

440 else: 

441 return False 

442 

443 def _TryCustomFormatMessage(self, message): 

444 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

445 if formatted is None: 

446 return False 

447 

448 out = self.out 

449 out.write(' ' * self.indent) 

450 out.write(formatted) 

451 out.write(' ' if self.as_one_line else '\n') 

452 return True 

453 

454 def PrintMessage(self, message): 

455 """Convert protobuf message to text format. 

456 

457 Args: 

458 message: The protocol buffers message. 

459 """ 

460 if self.message_formatter and self._TryCustomFormatMessage(message): 

461 return 

462 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

463 self._TryPrintAsAnyMessage(message)): 

464 return 

465 fields = message.ListFields() 

466 if self.use_index_order: 

467 fields.sort( 

468 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

469 for field, value in fields: 

470 if _IsMapEntry(field): 

471 for key in sorted(value): 

472 # This is slow for maps with submessage entries because it copies the 

473 # entire tree. Unfortunately this would take significant refactoring 

474 # of this file to work around. 

475 # 

476 # TODO: refactor and optimize if this becomes an issue. 

477 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

478 self.PrintField(field, entry_submsg) 

479 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

480 if (self.use_short_repeated_primitives 

481 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

482 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

483 self._PrintShortRepeatedPrimitivesValue(field, value) 

484 else: 

485 for element in value: 

486 self.PrintField(field, element) 

487 else: 

488 self.PrintField(field, value) 

489 

490 if self.print_unknown_fields: 

491 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

492 

493 def _PrintUnknownFields(self, unknown_field_set): 

494 """Print unknown fields.""" 

495 out = self.out 

496 for field in unknown_field_set: 

497 out.write(' ' * self.indent) 

498 out.write(str(field.field_number)) 

499 if field.wire_type == WIRETYPE_START_GROUP: 

500 if self.as_one_line: 

501 out.write(' { ') 

502 else: 

503 out.write(' {\n') 

504 self.indent += 2 

505 

506 self._PrintUnknownFields(field.data) 

507 

508 if self.as_one_line: 

509 out.write('} ') 

510 else: 

511 self.indent -= 2 

512 out.write(' ' * self.indent + '}\n') 

513 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

514 try: 

515 # If this field is parseable as a Message, it is probably 

516 # an embedded message. 

517 # pylint: disable=protected-access 

518 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

519 memoryview(field.data), 0, len(field.data)) 

520 except Exception: # pylint: disable=broad-except 

521 pos = 0 

522 

523 if pos == len(field.data): 

524 if self.as_one_line: 

525 out.write(' { ') 

526 else: 

527 out.write(' {\n') 

528 self.indent += 2 

529 

530 self._PrintUnknownFields(embedded_unknown_message) 

531 

532 if self.as_one_line: 

533 out.write('} ') 

534 else: 

535 self.indent -= 2 

536 out.write(' ' * self.indent + '}\n') 

537 else: 

538 # A string or bytes field. self.as_utf8 may not work. 

539 out.write(': \"') 

540 out.write(text_encoding.CEscape(field.data, False)) 

541 out.write('\" ' if self.as_one_line else '\"\n') 

542 else: 

543 # varint, fixed32, fixed64 

544 out.write(': ') 

545 out.write(str(field.data)) 

546 out.write(' ' if self.as_one_line else '\n') 

547 

548 def _PrintFieldName(self, field): 

549 """Print field name.""" 

550 out = self.out 

551 out.write(' ' * self.indent) 

552 if self.use_field_number: 

553 out.write(str(field.number)) 

554 else: 

555 if field.is_extension: 

556 out.write('[') 

557 if (field.containing_type.GetOptions().message_set_wire_format and 

558 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

559 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): 

560 out.write(field.message_type.full_name) 

561 else: 

562 out.write(field.full_name) 

563 out.write(']') 

564 elif _IsGroupLike(field): 

565 # For groups, use the capitalized name. 

566 out.write(field.message_type.name) 

567 else: 

568 out.write(field.name) 

569 

570 if (self.force_colon or 

571 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

572 # The colon is optional in this case, but our cross-language golden files 

573 # don't include it. Here, the colon is only included if force_colon is 

574 # set to True 

575 out.write(':') 

576 

577 def PrintField(self, field, value): 

578 """Print a single field name/value pair.""" 

579 self._PrintFieldName(field) 

580 self.out.write(' ') 

581 self.PrintFieldValue(field, value) 

582 self.out.write(' ' if self.as_one_line else '\n') 

583 

584 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

585 """"Prints short repeated primitives value.""" 

586 # Note: this is called only when value has at least one element. 

587 self._PrintFieldName(field) 

588 self.out.write(' [') 

589 for i in range(len(value) - 1): 

590 self.PrintFieldValue(field, value[i]) 

591 self.out.write(', ') 

592 self.PrintFieldValue(field, value[-1]) 

593 self.out.write(']') 

594 self.out.write(' ' if self.as_one_line else '\n') 

595 

596 def _PrintMessageFieldValue(self, value): 

597 if self.pointy_brackets: 

598 openb = '<' 

599 closeb = '>' 

600 else: 

601 openb = '{' 

602 closeb = '}' 

603 

604 if self.as_one_line: 

605 self.out.write('%s ' % openb) 

606 self.PrintMessage(value) 

607 self.out.write(closeb) 

608 else: 

609 self.out.write('%s\n' % openb) 

610 self.indent += 2 

611 self.PrintMessage(value) 

612 self.indent -= 2 

613 self.out.write(' ' * self.indent + closeb) 

614 

615 def PrintFieldValue(self, field, value): 

616 """Print a single field value (not including name). 

617 

618 For repeated fields, the value should be a single element. 

619 

620 Args: 

621 field: The descriptor of the field to be printed. 

622 value: The value of the field. 

623 """ 

624 out = self.out 

625 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

626 self._PrintMessageFieldValue(value) 

627 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

628 enum_value = field.enum_type.values_by_number.get(value, None) 

629 if enum_value is not None: 

630 out.write(enum_value.name) 

631 else: 

632 out.write(str(value)) 

633 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

634 out.write('\"') 

635 if isinstance(value, str) and not self.as_utf8: 

636 out_value = value.encode('utf-8') 

637 else: 

638 out_value = value 

639 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

640 # We always need to escape all binary data in TYPE_BYTES fields. 

641 out_as_utf8 = False 

642 else: 

643 out_as_utf8 = self.as_utf8 

644 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

645 out.write('\"') 

646 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

647 if value: 

648 out.write('true') 

649 else: 

650 out.write('false') 

651 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

652 if self.float_format is not None: 

653 out.write('{1:{0}}'.format(self.float_format, value)) 

654 else: 

655 if math.isnan(value): 

656 out.write(str(value)) 

657 else: 

658 out.write(str(type_checkers.ToShortestFloat(value))) 

659 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 

660 self.double_format is not None): 

661 out.write('{1:{0}}'.format(self.double_format, value)) 

662 else: 

663 out.write(str(value)) 

664 

665 

666def Parse(text, 

667 message, 

668 allow_unknown_extension=False, 

669 allow_field_number=False, 

670 descriptor_pool=None, 

671 allow_unknown_field=False): 

672 """Parses a text representation of a protocol message into a message. 

673 

674 NOTE: for historical reasons this function does not clear the input 

675 message. This is different from what the binary msg.ParseFrom(...) does. 

676 If text contains a field already set in message, the value is appended if the 

677 field is repeated. Otherwise, an error is raised. 

678 

679 Example:: 

680 

681 a = MyProto() 

682 a.repeated_field.append('test') 

683 b = MyProto() 

684 

685 # Repeated fields are combined 

686 text_format.Parse(repr(a), b) 

687 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

688 

689 # Non-repeated fields cannot be overwritten 

690 a.singular_field = 1 

691 b.singular_field = 2 

692 text_format.Parse(repr(a), b) # ParseError 

693 

694 # Binary version: 

695 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

696 

697 Caller is responsible for clearing the message as needed. 

698 

699 Args: 

700 text (str): Message text representation. 

701 message (Message): A protocol buffer message to merge into. 

702 allow_unknown_extension: if True, skip over missing extensions and keep 

703 parsing 

704 allow_field_number: if True, both field number and field name are allowed. 

705 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

706 allow_unknown_field: if True, skip over unknown field and keep 

707 parsing. Avoid to use this option if possible. It may hide some 

708 errors (e.g. spelling error on field name) 

709 

710 Returns: 

711 Message: The same message passed as argument. 

712 

713 Raises: 

714 ParseError: On text parsing problems. 

715 """ 

716 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

717 message, 

718 allow_unknown_extension, 

719 allow_field_number, 

720 descriptor_pool=descriptor_pool, 

721 allow_unknown_field=allow_unknown_field) 

722 

723 

724def Merge(text, 

725 message, 

726 allow_unknown_extension=False, 

727 allow_field_number=False, 

728 descriptor_pool=None, 

729 allow_unknown_field=False): 

730 """Parses a text representation of a protocol message into a message. 

731 

732 Like Parse(), but allows repeated values for a non-repeated field, and uses 

733 the last one. This means any non-repeated, top-level fields specified in text 

734 replace those in the message. 

735 

736 Args: 

737 text (str): Message text representation. 

738 message (Message): A protocol buffer message to merge into. 

739 allow_unknown_extension: if True, skip over missing extensions and keep 

740 parsing 

741 allow_field_number: if True, both field number and field name are allowed. 

742 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

743 allow_unknown_field: if True, skip over unknown field and keep 

744 parsing. Avoid to use this option if possible. It may hide some 

745 errors (e.g. spelling error on field name) 

746 

747 Returns: 

748 Message: The same message passed as argument. 

749 

750 Raises: 

751 ParseError: On text parsing problems. 

752 """ 

753 return MergeLines( 

754 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

755 message, 

756 allow_unknown_extension, 

757 allow_field_number, 

758 descriptor_pool=descriptor_pool, 

759 allow_unknown_field=allow_unknown_field) 

760 

761 

762def ParseLines(lines, 

763 message, 

764 allow_unknown_extension=False, 

765 allow_field_number=False, 

766 descriptor_pool=None, 

767 allow_unknown_field=False): 

768 """Parses a text representation of a protocol message into a message. 

769 

770 See Parse() for caveats. 

771 

772 Args: 

773 lines: An iterable of lines of a message's text representation. 

774 message: A protocol buffer message to merge into. 

775 allow_unknown_extension: if True, skip over missing extensions and keep 

776 parsing 

777 allow_field_number: if True, both field number and field name are allowed. 

778 descriptor_pool: A DescriptorPool used to resolve Any types. 

779 allow_unknown_field: if True, skip over unknown field and keep 

780 parsing. Avoid to use this option if possible. It may hide some 

781 errors (e.g. spelling error on field name) 

782 

783 Returns: 

784 The same message passed as argument. 

785 

786 Raises: 

787 ParseError: On text parsing problems. 

788 """ 

789 parser = _Parser(allow_unknown_extension, 

790 allow_field_number, 

791 descriptor_pool=descriptor_pool, 

792 allow_unknown_field=allow_unknown_field) 

793 return parser.ParseLines(lines, message) 

794 

795 

796def MergeLines(lines, 

797 message, 

798 allow_unknown_extension=False, 

799 allow_field_number=False, 

800 descriptor_pool=None, 

801 allow_unknown_field=False): 

802 """Parses a text representation of a protocol message into a message. 

803 

804 See Merge() for more details. 

805 

806 Args: 

807 lines: An iterable of lines of a message's text representation. 

808 message: A protocol buffer message to merge into. 

809 allow_unknown_extension: if True, skip over missing extensions and keep 

810 parsing 

811 allow_field_number: if True, both field number and field name are allowed. 

812 descriptor_pool: A DescriptorPool used to resolve Any types. 

813 allow_unknown_field: if True, skip over unknown field and keep 

814 parsing. Avoid to use this option if possible. It may hide some 

815 errors (e.g. spelling error on field name) 

816 

817 Returns: 

818 The same message passed as argument. 

819 

820 Raises: 

821 ParseError: On text parsing problems. 

822 """ 

823 parser = _Parser(allow_unknown_extension, 

824 allow_field_number, 

825 descriptor_pool=descriptor_pool, 

826 allow_unknown_field=allow_unknown_field) 

827 return parser.MergeLines(lines, message) 

828 

829 

830class _Parser(object): 

831 """Text format parser for protocol message.""" 

832 

833 def __init__(self, 

834 allow_unknown_extension=False, 

835 allow_field_number=False, 

836 descriptor_pool=None, 

837 allow_unknown_field=False): 

838 self.allow_unknown_extension = allow_unknown_extension 

839 self.allow_field_number = allow_field_number 

840 self.descriptor_pool = descriptor_pool 

841 self.allow_unknown_field = allow_unknown_field 

842 

843 def ParseLines(self, lines, message): 

844 """Parses a text representation of a protocol message into a message.""" 

845 self._allow_multiple_scalars = False 

846 self._ParseOrMerge(lines, message) 

847 return message 

848 

849 def MergeLines(self, lines, message): 

850 """Merges a text representation of a protocol message into a message.""" 

851 self._allow_multiple_scalars = True 

852 self._ParseOrMerge(lines, message) 

853 return message 

854 

855 def _ParseOrMerge(self, lines, message): 

856 """Converts a text representation of a protocol message into a message. 

857 

858 Args: 

859 lines: Lines of a message's text representation. 

860 message: A protocol buffer message to merge into. 

861 

862 Raises: 

863 ParseError: On text parsing problems. 

864 """ 

865 # Tokenize expects native str lines. 

866 try: 

867 str_lines = ( 

868 line if isinstance(line, str) else line.decode('utf-8') 

869 for line in lines) 

870 tokenizer = Tokenizer(str_lines) 

871 except UnicodeDecodeError as e: 

872 raise ParseError from e 

873 if message: 

874 self.root_type = message.DESCRIPTOR.full_name 

875 while not tokenizer.AtEnd(): 

876 self._MergeField(tokenizer, message) 

877 

878 def _MergeField(self, tokenizer, message): 

879 """Merges a single protocol message field into a message. 

880 

881 Args: 

882 tokenizer: A tokenizer to parse the field name and values. 

883 message: A protocol message to record the data. 

884 

885 Raises: 

886 ParseError: In case of text parsing problems. 

887 """ 

888 message_descriptor = message.DESCRIPTOR 

889 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

890 tokenizer.TryConsume('[')): 

891 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

892 tokenizer.Consume(']') 

893 tokenizer.TryConsume(':') 

894 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

895 type_url_prefix + '/' + packed_type_name) 

896 if tokenizer.TryConsume('<'): 

897 expanded_any_end_token = '>' 

898 else: 

899 tokenizer.Consume('{') 

900 expanded_any_end_token = '}' 

901 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

902 self.descriptor_pool) 

903 # Direct comparison with None is used instead of implicit bool conversion 

904 # to avoid false positives with falsy initial values, e.g. for 

905 # google.protobuf.ListValue. 

906 if expanded_any_sub_message is None: 

907 raise ParseError('Type %s not found in descriptor pool' % 

908 packed_type_name) 

909 while not tokenizer.TryConsume(expanded_any_end_token): 

910 if tokenizer.AtEnd(): 

911 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 

912 (expanded_any_end_token,)) 

913 self._MergeField(tokenizer, expanded_any_sub_message) 

914 deterministic = False 

915 

916 message.Pack(expanded_any_sub_message, 

917 type_url_prefix=type_url_prefix, 

918 deterministic=deterministic) 

919 return 

920 

921 if tokenizer.TryConsume('['): 

922 name = [tokenizer.ConsumeIdentifier()] 

923 while tokenizer.TryConsume('.'): 

924 name.append(tokenizer.ConsumeIdentifier()) 

925 name = '.'.join(name) 

926 

927 if not message_descriptor.is_extendable: 

928 raise tokenizer.ParseErrorPreviousToken( 

929 'Message type "%s" does not have extensions.' % 

930 message_descriptor.full_name) 

931 # pylint: disable=protected-access 

932 field = message.Extensions._FindExtensionByName(name) 

933 # pylint: enable=protected-access 

934 if not field: 

935 if self.allow_unknown_extension: 

936 field = None 

937 else: 

938 raise tokenizer.ParseErrorPreviousToken( 

939 'Extension "%s" not registered. ' 

940 'Did you import the _pb2 module which defines it? ' 

941 'If you are trying to place the extension in the MessageSet ' 

942 'field of another message that is in an Any or MessageSet field, ' 

943 'that message\'s _pb2 module must be imported as well' % name) 

944 elif message_descriptor != field.containing_type: 

945 raise tokenizer.ParseErrorPreviousToken( 

946 'Extension "%s" does not extend message type "%s".' % 

947 (name, message_descriptor.full_name)) 

948 

949 tokenizer.Consume(']') 

950 

951 else: 

952 name = tokenizer.ConsumeIdentifierOrNumber() 

953 if self.allow_field_number and name.isdigit(): 

954 number = ParseInteger(name, True, True) 

955 field = message_descriptor.fields_by_number.get(number, None) 

956 if not field and message_descriptor.is_extendable: 

957 field = message.Extensions._FindExtensionByNumber(number) 

958 else: 

959 field = message_descriptor.fields_by_name.get(name, None) 

960 

961 # Group names are expected to be capitalized as they appear in the 

962 # .proto file, which actually matches their type names, not their field 

963 # names. 

964 if not field: 

965 field = message_descriptor.fields_by_name.get(name.lower(), None) 

966 if field and not _IsGroupLike(field): 

967 field = None 

968 if field and field.message_type.name != name: 

969 field = None 

970 

971 if not field and not self.allow_unknown_field: 

972 raise tokenizer.ParseErrorPreviousToken( 

973 'Message type "%s" has no field named "%s".' % 

974 (message_descriptor.full_name, name)) 

975 

976 if field: 

977 if not self._allow_multiple_scalars and field.containing_oneof: 

978 # Check if there's a different field set in this oneof. 

979 # Note that we ignore the case if the same field was set before, and we 

980 # apply _allow_multiple_scalars to non-scalar fields as well. 

981 which_oneof = message.WhichOneof(field.containing_oneof.name) 

982 if which_oneof is not None and which_oneof != field.name: 

983 raise tokenizer.ParseErrorPreviousToken( 

984 'Field "%s" is specified along with field "%s", another member ' 

985 'of oneof "%s" for message type "%s".' % 

986 (field.name, which_oneof, field.containing_oneof.name, 

987 message_descriptor.full_name)) 

988 

989 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

990 tokenizer.TryConsume(':') 

991 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

992 field.full_name) 

993 merger = self._MergeMessageField 

994 else: 

995 tokenizer.Consume(':') 

996 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

997 field.full_name) 

998 merger = self._MergeScalarField 

999 

1000 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and 

1001 tokenizer.TryConsume('[')): 

1002 # Short repeated format, e.g. "foo: [1, 2, 3]" 

1003 if not tokenizer.TryConsume(']'): 

1004 while True: 

1005 merger(tokenizer, message, field) 

1006 if tokenizer.TryConsume(']'): 

1007 break 

1008 tokenizer.Consume(',') 

1009 

1010 else: 

1011 merger(tokenizer, message, field) 

1012 

1013 else: # Proto field is unknown. 

1014 assert (self.allow_unknown_extension or self.allow_unknown_field) 

1015 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

1016 

1017 # For historical reasons, fields may optionally be separated by commas or 

1018 # semicolons. 

1019 if not tokenizer.TryConsume(','): 

1020 tokenizer.TryConsume(';') 

1021 

1022 def _LogSilentMarker(self, immediate_message_type, field_name): 

1023 pass 

1024 

1025 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

1026 if tokenizer.contains_silent_marker_before_current_token: 

1027 self._LogSilentMarker(immediate_message_type, field_name) 

1028 

1029 def _ConsumeAnyTypeUrl(self, tokenizer): 

1030 """Consumes a google.protobuf.Any type URL and returns the type name.""" 

1031 # Consume "type.googleapis.com/". 

1032 prefix = [tokenizer.ConsumeIdentifier()] 

1033 tokenizer.Consume('.') 

1034 prefix.append(tokenizer.ConsumeIdentifier()) 

1035 tokenizer.Consume('.') 

1036 prefix.append(tokenizer.ConsumeIdentifier()) 

1037 tokenizer.Consume('/') 

1038 # Consume the fully-qualified type name. 

1039 name = [tokenizer.ConsumeIdentifier()] 

1040 while tokenizer.TryConsume('.'): 

1041 name.append(tokenizer.ConsumeIdentifier()) 

1042 return '.'.join(prefix), '.'.join(name) 

1043 

1044 def _MergeMessageField(self, tokenizer, message, field): 

1045 """Merges a single scalar field into a message. 

1046 

1047 Args: 

1048 tokenizer: A tokenizer to parse the field value. 

1049 message: The message of which field is a member. 

1050 field: The descriptor of the field to be merged. 

1051 

1052 Raises: 

1053 ParseError: In case of text parsing problems. 

1054 """ 

1055 is_map_entry = _IsMapEntry(field) 

1056 

1057 if tokenizer.TryConsume('<'): 

1058 end_token = '>' 

1059 else: 

1060 tokenizer.Consume('{') 

1061 end_token = '}' 

1062 

1063 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1064 if field.is_extension: 

1065 sub_message = message.Extensions[field].add() 

1066 elif is_map_entry: 

1067 sub_message = getattr(message, field.name).GetEntryClass()() 

1068 else: 

1069 sub_message = getattr(message, field.name).add() 

1070 else: 

1071 if field.is_extension: 

1072 if (not self._allow_multiple_scalars and 

1073 message.HasExtension(field)): 

1074 raise tokenizer.ParseErrorPreviousToken( 

1075 'Message type "%s" should not have multiple "%s" extensions.' % 

1076 (message.DESCRIPTOR.full_name, field.full_name)) 

1077 sub_message = message.Extensions[field] 

1078 else: 

1079 # Also apply _allow_multiple_scalars to message field. 

1080 # TODO: Change to _allow_singular_overwrites. 

1081 if (not self._allow_multiple_scalars and 

1082 message.HasField(field.name)): 

1083 raise tokenizer.ParseErrorPreviousToken( 

1084 'Message type "%s" should not have multiple "%s" fields.' % 

1085 (message.DESCRIPTOR.full_name, field.name)) 

1086 sub_message = getattr(message, field.name) 

1087 sub_message.SetInParent() 

1088 

1089 while not tokenizer.TryConsume(end_token): 

1090 if tokenizer.AtEnd(): 

1091 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 

1092 self._MergeField(tokenizer, sub_message) 

1093 

1094 if is_map_entry: 

1095 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1096 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1097 value = getattr(message, field.name)[sub_message.key] 

1098 value.CopyFrom(sub_message.value) 

1099 else: 

1100 getattr(message, field.name)[sub_message.key] = sub_message.value 

1101 

1102 def _MergeScalarField(self, tokenizer, message, field): 

1103 """Merges a single scalar field into a message. 

1104 

1105 Args: 

1106 tokenizer: A tokenizer to parse the field value. 

1107 message: A protocol message to record the data. 

1108 field: The descriptor of the field to be merged. 

1109 

1110 Raises: 

1111 ParseError: In case of text parsing problems. 

1112 RuntimeError: On runtime errors. 

1113 """ 

1114 _ = self.allow_unknown_extension 

1115 value = None 

1116 

1117 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1118 descriptor.FieldDescriptor.TYPE_SINT32, 

1119 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1120 value = _ConsumeInt32(tokenizer) 

1121 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1122 descriptor.FieldDescriptor.TYPE_SINT64, 

1123 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1124 value = _ConsumeInt64(tokenizer) 

1125 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1126 descriptor.FieldDescriptor.TYPE_FIXED32): 

1127 value = _ConsumeUint32(tokenizer) 

1128 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1129 descriptor.FieldDescriptor.TYPE_FIXED64): 

1130 value = _ConsumeUint64(tokenizer) 

1131 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1132 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1133 value = tokenizer.ConsumeFloat() 

1134 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1135 value = tokenizer.ConsumeBool() 

1136 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1137 value = tokenizer.ConsumeString() 

1138 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1139 value = tokenizer.ConsumeByteString() 

1140 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1141 value = tokenizer.ConsumeEnum(field) 

1142 else: 

1143 raise RuntimeError('Unknown field type %d' % field.type) 

1144 

1145 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1146 if field.is_extension: 

1147 message.Extensions[field].append(value) 

1148 else: 

1149 getattr(message, field.name).append(value) 

1150 else: 

1151 if field.is_extension: 

1152 if (not self._allow_multiple_scalars and 

1153 field.has_presence and 

1154 message.HasExtension(field)): 

1155 raise tokenizer.ParseErrorPreviousToken( 

1156 'Message type "%s" should not have multiple "%s" extensions.' % 

1157 (message.DESCRIPTOR.full_name, field.full_name)) 

1158 else: 

1159 message.Extensions[field] = value 

1160 else: 

1161 duplicate_error = False 

1162 if not self._allow_multiple_scalars: 

1163 if field.has_presence: 

1164 duplicate_error = message.HasField(field.name) 

1165 else: 

1166 # For field that doesn't represent presence, try best effort to 

1167 # check multiple scalars by compare to default values. 

1168 duplicate_error = bool(getattr(message, field.name)) 

1169 

1170 if duplicate_error: 

1171 raise tokenizer.ParseErrorPreviousToken( 

1172 'Message type "%s" should not have multiple "%s" fields.' % 

1173 (message.DESCRIPTOR.full_name, field.name)) 

1174 else: 

1175 setattr(message, field.name, value) 

1176 

1177 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1178 """Skips over contents (value or message) of a field. 

1179 

1180 Args: 

1181 tokenizer: A tokenizer to parse the field name and values. 

1182 field_name: The field name currently being parsed. 

1183 immediate_message_type: The type of the message immediately containing 

1184 the silent marker. 

1185 """ 

1186 # Try to guess the type of this field. 

1187 # If this field is not a message, there should be a ":" between the 

1188 # field name and the field value and also the field value should not 

1189 # start with "{" or "<" which indicates the beginning of a message body. 

1190 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1191 # to be a message or the input is ill-formed. 

1192 if tokenizer.TryConsume( 

1193 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1194 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1195 if tokenizer.LookingAt('['): 

1196 self._SkipRepeatedFieldValue(tokenizer) 

1197 else: 

1198 self._SkipFieldValue(tokenizer) 

1199 else: 

1200 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1201 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1202 

1203 def _SkipField(self, tokenizer, immediate_message_type): 

1204 """Skips over a complete field (name and value/message). 

1205 

1206 Args: 

1207 tokenizer: A tokenizer to parse the field name and values. 

1208 immediate_message_type: The type of the message immediately containing 

1209 the silent marker. 

1210 """ 

1211 field_name = '' 

1212 if tokenizer.TryConsume('['): 

1213 # Consume extension or google.protobuf.Any type URL 

1214 field_name += '[' + tokenizer.ConsumeIdentifier() 

1215 num_identifiers = 1 

1216 while tokenizer.TryConsume('.'): 

1217 field_name += '.' + tokenizer.ConsumeIdentifier() 

1218 num_identifiers += 1 

1219 # This is possibly a type URL for an Any message. 

1220 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1221 field_name += '/' + tokenizer.ConsumeIdentifier() 

1222 while tokenizer.TryConsume('.'): 

1223 field_name += '.' + tokenizer.ConsumeIdentifier() 

1224 tokenizer.Consume(']') 

1225 field_name += ']' 

1226 else: 

1227 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1228 

1229 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1230 

1231 # For historical reasons, fields may optionally be separated by commas or 

1232 # semicolons. 

1233 if not tokenizer.TryConsume(','): 

1234 tokenizer.TryConsume(';') 

1235 

1236 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1237 """Skips over a field message. 

1238 

1239 Args: 

1240 tokenizer: A tokenizer to parse the field name and values. 

1241 immediate_message_type: The type of the message immediately containing 

1242 the silent marker 

1243 """ 

1244 if tokenizer.TryConsume('<'): 

1245 delimiter = '>' 

1246 else: 

1247 tokenizer.Consume('{') 

1248 delimiter = '}' 

1249 

1250 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1251 self._SkipField(tokenizer, immediate_message_type) 

1252 

1253 tokenizer.Consume(delimiter) 

1254 

1255 def _SkipFieldValue(self, tokenizer): 

1256 """Skips over a field value. 

1257 

1258 Args: 

1259 tokenizer: A tokenizer to parse the field name and values. 

1260 

1261 Raises: 

1262 ParseError: In case an invalid field value is found. 

1263 """ 

1264 if (not tokenizer.TryConsumeByteString()and 

1265 not tokenizer.TryConsumeIdentifier() and 

1266 not _TryConsumeInt64(tokenizer) and 

1267 not _TryConsumeUint64(tokenizer) and 

1268 not tokenizer.TryConsumeFloat()): 

1269 raise ParseError('Invalid field value: ' + tokenizer.token) 

1270 

1271 def _SkipRepeatedFieldValue(self, tokenizer): 

1272 """Skips over a repeated field value. 

1273 

1274 Args: 

1275 tokenizer: A tokenizer to parse the field value. 

1276 """ 

1277 tokenizer.Consume('[') 

1278 if not tokenizer.LookingAt(']'): 

1279 self._SkipFieldValue(tokenizer) 

1280 while tokenizer.TryConsume(','): 

1281 self._SkipFieldValue(tokenizer) 

1282 tokenizer.Consume(']') 

1283 

1284 

1285class Tokenizer(object): 

1286 """Protocol buffer text representation tokenizer. 

1287 

1288 This class handles the lower level string parsing by splitting it into 

1289 meaningful tokens. 

1290 

1291 It was directly ported from the Java protocol buffer API. 

1292 """ 

1293 

1294 _WHITESPACE = re.compile(r'\s+') 

1295 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1296 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1297 _TOKEN = re.compile('|'.join([ 

1298 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1299 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1300 ] + [ # quoted str for each quote mark 

1301 # Avoid backtracking! https://stackoverflow.com/a/844267 

1302 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 

1303 for mark in _QUOTES 

1304 ])) 

1305 

1306 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1307 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1308 

1309 def __init__(self, lines, skip_comments=True): 

1310 self._position = 0 

1311 self._line = -1 

1312 self._column = 0 

1313 self._token_start = None 

1314 self.token = '' 

1315 self._lines = iter(lines) 

1316 self._current_line = '' 

1317 self._previous_line = 0 

1318 self._previous_column = 0 

1319 self._more_lines = True 

1320 self._skip_comments = skip_comments 

1321 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1322 or self._WHITESPACE) 

1323 self.contains_silent_marker_before_current_token = False 

1324 

1325 self._SkipWhitespace() 

1326 self.NextToken() 

1327 

1328 def LookingAt(self, token): 

1329 return self.token == token 

1330 

1331 def AtEnd(self): 

1332 """Checks the end of the text was reached. 

1333 

1334 Returns: 

1335 True iff the end was reached. 

1336 """ 

1337 return not self.token 

1338 

1339 def _PopLine(self): 

1340 while len(self._current_line) <= self._column: 

1341 try: 

1342 self._current_line = next(self._lines) 

1343 except StopIteration: 

1344 self._current_line = '' 

1345 self._more_lines = False 

1346 return 

1347 else: 

1348 self._line += 1 

1349 self._column = 0 

1350 

1351 def _SkipWhitespace(self): 

1352 while True: 

1353 self._PopLine() 

1354 match = self._whitespace_pattern.match(self._current_line, self._column) 

1355 if not match: 

1356 break 

1357 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1358 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1359 length = len(match.group(0)) 

1360 self._column += length 

1361 

1362 def TryConsume(self, token): 

1363 """Tries to consume a given piece of text. 

1364 

1365 Args: 

1366 token: Text to consume. 

1367 

1368 Returns: 

1369 True iff the text was consumed. 

1370 """ 

1371 if self.token == token: 

1372 self.NextToken() 

1373 return True 

1374 return False 

1375 

1376 def Consume(self, token): 

1377 """Consumes a piece of text. 

1378 

1379 Args: 

1380 token: Text to consume. 

1381 

1382 Raises: 

1383 ParseError: If the text couldn't be consumed. 

1384 """ 

1385 if not self.TryConsume(token): 

1386 raise self.ParseError('Expected "%s".' % token) 

1387 

1388 def ConsumeComment(self): 

1389 result = self.token 

1390 if not self._COMMENT.match(result): 

1391 raise self.ParseError('Expected comment.') 

1392 self.NextToken() 

1393 return result 

1394 

1395 def ConsumeCommentOrTrailingComment(self): 

1396 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1397 

1398 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1399 # tokenizer starts, it looks like there is a previous token on the line. 

1400 just_started = self._line == 0 and self._column == 0 

1401 

1402 before_parsing = self._previous_line 

1403 comment = self.ConsumeComment() 

1404 

1405 # A trailing comment is a comment on the same line than the previous token. 

1406 trailing = (self._previous_line == before_parsing 

1407 and not just_started) 

1408 

1409 return trailing, comment 

1410 

1411 def TryConsumeIdentifier(self): 

1412 try: 

1413 self.ConsumeIdentifier() 

1414 return True 

1415 except ParseError: 

1416 return False 

1417 

1418 def ConsumeIdentifier(self): 

1419 """Consumes protocol message field identifier. 

1420 

1421 Returns: 

1422 Identifier string. 

1423 

1424 Raises: 

1425 ParseError: If an identifier couldn't be consumed. 

1426 """ 

1427 result = self.token 

1428 if not self._IDENTIFIER.match(result): 

1429 raise self.ParseError('Expected identifier.') 

1430 self.NextToken() 

1431 return result 

1432 

1433 def TryConsumeIdentifierOrNumber(self): 

1434 try: 

1435 self.ConsumeIdentifierOrNumber() 

1436 return True 

1437 except ParseError: 

1438 return False 

1439 

1440 def ConsumeIdentifierOrNumber(self): 

1441 """Consumes protocol message field identifier. 

1442 

1443 Returns: 

1444 Identifier string. 

1445 

1446 Raises: 

1447 ParseError: If an identifier couldn't be consumed. 

1448 """ 

1449 result = self.token 

1450 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1451 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1452 self.NextToken() 

1453 return result 

1454 

1455 def TryConsumeInteger(self): 

1456 try: 

1457 self.ConsumeInteger() 

1458 return True 

1459 except ParseError: 

1460 return False 

1461 

1462 def ConsumeInteger(self): 

1463 """Consumes an integer number. 

1464 

1465 Returns: 

1466 The integer parsed. 

1467 

1468 Raises: 

1469 ParseError: If an integer couldn't be consumed. 

1470 """ 

1471 try: 

1472 result = _ParseAbstractInteger(self.token) 

1473 except ValueError as e: 

1474 raise self.ParseError(str(e)) 

1475 self.NextToken() 

1476 return result 

1477 

1478 def TryConsumeFloat(self): 

1479 try: 

1480 self.ConsumeFloat() 

1481 return True 

1482 except ParseError: 

1483 return False 

1484 

1485 def ConsumeFloat(self): 

1486 """Consumes an floating point number. 

1487 

1488 Returns: 

1489 The number parsed. 

1490 

1491 Raises: 

1492 ParseError: If a floating point number couldn't be consumed. 

1493 """ 

1494 try: 

1495 result = ParseFloat(self.token) 

1496 except ValueError as e: 

1497 raise self.ParseError(str(e)) 

1498 self.NextToken() 

1499 return result 

1500 

1501 def ConsumeBool(self): 

1502 """Consumes a boolean value. 

1503 

1504 Returns: 

1505 The bool parsed. 

1506 

1507 Raises: 

1508 ParseError: If a boolean value couldn't be consumed. 

1509 """ 

1510 try: 

1511 result = ParseBool(self.token) 

1512 except ValueError as e: 

1513 raise self.ParseError(str(e)) 

1514 self.NextToken() 

1515 return result 

1516 

1517 def TryConsumeByteString(self): 

1518 try: 

1519 self.ConsumeByteString() 

1520 return True 

1521 except ParseError: 

1522 return False 

1523 

1524 def ConsumeString(self): 

1525 """Consumes a string value. 

1526 

1527 Returns: 

1528 The string parsed. 

1529 

1530 Raises: 

1531 ParseError: If a string value couldn't be consumed. 

1532 """ 

1533 the_bytes = self.ConsumeByteString() 

1534 try: 

1535 return str(the_bytes, 'utf-8') 

1536 except UnicodeDecodeError as e: 

1537 raise self._StringParseError(e) 

1538 

1539 def ConsumeByteString(self): 

1540 """Consumes a byte array value. 

1541 

1542 Returns: 

1543 The array parsed (as a string). 

1544 

1545 Raises: 

1546 ParseError: If a byte array value couldn't be consumed. 

1547 """ 

1548 the_list = [self._ConsumeSingleByteString()] 

1549 while self.token and self.token[0] in _QUOTES: 

1550 the_list.append(self._ConsumeSingleByteString()) 

1551 return b''.join(the_list) 

1552 

1553 def _ConsumeSingleByteString(self): 

1554 """Consume one token of a string literal. 

1555 

1556 String literals (whether bytes or text) can come in multiple adjacent 

1557 tokens which are automatically concatenated, like in C or Python. This 

1558 method only consumes one token. 

1559 

1560 Returns: 

1561 The token parsed. 

1562 Raises: 

1563 ParseError: When the wrong format data is found. 

1564 """ 

1565 text = self.token 

1566 if len(text) < 1 or text[0] not in _QUOTES: 

1567 raise self.ParseError('Expected string but found: %r' % (text,)) 

1568 

1569 if len(text) < 2 or text[-1] != text[0]: 

1570 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1571 

1572 try: 

1573 result = text_encoding.CUnescape(text[1:-1]) 

1574 except ValueError as e: 

1575 raise self.ParseError(str(e)) 

1576 self.NextToken() 

1577 return result 

1578 

1579 def ConsumeEnum(self, field): 

1580 try: 

1581 result = ParseEnum(field, self.token) 

1582 except ValueError as e: 

1583 raise self.ParseError(str(e)) 

1584 self.NextToken() 

1585 return result 

1586 

1587 def ParseErrorPreviousToken(self, message): 

1588 """Creates and *returns* a ParseError for the previously read token. 

1589 

1590 Args: 

1591 message: A message to set for the exception. 

1592 

1593 Returns: 

1594 A ParseError instance. 

1595 """ 

1596 return ParseError(message, self._previous_line + 1, 

1597 self._previous_column + 1) 

1598 

1599 def ParseError(self, message): 

1600 """Creates and *returns* a ParseError for the current token.""" 

1601 return ParseError('\'' + self._current_line + '\': ' + message, 

1602 self._line + 1, self._column + 1) 

1603 

1604 def _StringParseError(self, e): 

1605 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1606 

1607 def NextToken(self): 

1608 """Reads the next meaningful token.""" 

1609 self._previous_line = self._line 

1610 self._previous_column = self._column 

1611 self.contains_silent_marker_before_current_token = False 

1612 

1613 self._column += len(self.token) 

1614 self._SkipWhitespace() 

1615 

1616 if not self._more_lines: 

1617 self.token = '' 

1618 return 

1619 

1620 match = self._TOKEN.match(self._current_line, self._column) 

1621 if not match and not self._skip_comments: 

1622 match = self._COMMENT.match(self._current_line, self._column) 

1623 if match: 

1624 token = match.group(0) 

1625 self.token = token 

1626 else: 

1627 self.token = self._current_line[self._column] 

1628 

1629# Aliased so it can still be accessed by current visibility violators. 

1630# TODO: Migrate violators to textformat_tokenizer. 

1631_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1632 

1633 

1634def _ConsumeInt32(tokenizer): 

1635 """Consumes a signed 32bit integer number from tokenizer. 

1636 

1637 Args: 

1638 tokenizer: A tokenizer used to parse the number. 

1639 

1640 Returns: 

1641 The integer parsed. 

1642 

1643 Raises: 

1644 ParseError: If a signed 32bit integer couldn't be consumed. 

1645 """ 

1646 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1647 

1648 

1649def _ConsumeUint32(tokenizer): 

1650 """Consumes an unsigned 32bit integer number from tokenizer. 

1651 

1652 Args: 

1653 tokenizer: A tokenizer used to parse the number. 

1654 

1655 Returns: 

1656 The integer parsed. 

1657 

1658 Raises: 

1659 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1660 """ 

1661 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1662 

1663 

1664def _TryConsumeInt64(tokenizer): 

1665 try: 

1666 _ConsumeInt64(tokenizer) 

1667 return True 

1668 except ParseError: 

1669 return False 

1670 

1671 

1672def _ConsumeInt64(tokenizer): 

1673 """Consumes a signed 32bit integer number from tokenizer. 

1674 

1675 Args: 

1676 tokenizer: A tokenizer used to parse the number. 

1677 

1678 Returns: 

1679 The integer parsed. 

1680 

1681 Raises: 

1682 ParseError: If a signed 32bit integer couldn't be consumed. 

1683 """ 

1684 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1685 

1686 

1687def _TryConsumeUint64(tokenizer): 

1688 try: 

1689 _ConsumeUint64(tokenizer) 

1690 return True 

1691 except ParseError: 

1692 return False 

1693 

1694 

1695def _ConsumeUint64(tokenizer): 

1696 """Consumes an unsigned 64bit integer number from tokenizer. 

1697 

1698 Args: 

1699 tokenizer: A tokenizer used to parse the number. 

1700 

1701 Returns: 

1702 The integer parsed. 

1703 

1704 Raises: 

1705 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1706 """ 

1707 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1708 

1709 

1710def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1711 """Consumes an integer number from tokenizer. 

1712 

1713 Args: 

1714 tokenizer: A tokenizer used to parse the number. 

1715 is_signed: True if a signed integer must be parsed. 

1716 is_long: True if a long integer must be parsed. 

1717 

1718 Returns: 

1719 The integer parsed. 

1720 

1721 Raises: 

1722 ParseError: If an integer with given characteristics couldn't be consumed. 

1723 """ 

1724 try: 

1725 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1726 except ValueError as e: 

1727 raise tokenizer.ParseError(str(e)) 

1728 tokenizer.NextToken() 

1729 return result 

1730 

1731 

1732def ParseInteger(text, is_signed=False, is_long=False): 

1733 """Parses an integer. 

1734 

1735 Args: 

1736 text: The text to parse. 

1737 is_signed: True if a signed integer must be parsed. 

1738 is_long: True if a long integer must be parsed. 

1739 

1740 Returns: 

1741 The integer value. 

1742 

1743 Raises: 

1744 ValueError: Thrown Iff the text is not a valid integer. 

1745 """ 

1746 # Do the actual parsing. Exception handling is propagated to caller. 

1747 result = _ParseAbstractInteger(text) 

1748 

1749 # Check if the integer is sane. Exceptions handled by callers. 

1750 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1751 checker.CheckValue(result) 

1752 return result 

1753 

1754 

1755def _ParseAbstractInteger(text): 

1756 """Parses an integer without checking size/signedness. 

1757 

1758 Args: 

1759 text: The text to parse. 

1760 

1761 Returns: 

1762 The integer value. 

1763 

1764 Raises: 

1765 ValueError: Thrown Iff the text is not a valid integer. 

1766 """ 

1767 # Do the actual parsing. Exception handling is propagated to caller. 

1768 orig_text = text 

1769 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1770 if c_octal_match: 

1771 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1772 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1773 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1774 try: 

1775 return int(text, 0) 

1776 except ValueError: 

1777 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1778 

1779 

1780def ParseFloat(text): 

1781 """Parse a floating point number. 

1782 

1783 Args: 

1784 text: Text to parse. 

1785 

1786 Returns: 

1787 The number parsed. 

1788 

1789 Raises: 

1790 ValueError: If a floating point number couldn't be parsed. 

1791 """ 

1792 try: 

1793 # Assume Python compatible syntax. 

1794 return float(text) 

1795 except ValueError: 

1796 # Check alternative spellings. 

1797 if _FLOAT_INFINITY.match(text): 

1798 if text[0] == '-': 

1799 return float('-inf') 

1800 else: 

1801 return float('inf') 

1802 elif _FLOAT_NAN.match(text): 

1803 return float('nan') 

1804 else: 

1805 # assume '1.0f' format 

1806 try: 

1807 return float(text.rstrip('f')) 

1808 except ValueError: 

1809 raise ValueError('Couldn\'t parse float: %s' % text) 

1810 

1811 

1812def ParseBool(text): 

1813 """Parse a boolean value. 

1814 

1815 Args: 

1816 text: Text to parse. 

1817 

1818 Returns: 

1819 Boolean values parsed 

1820 

1821 Raises: 

1822 ValueError: If text is not a valid boolean. 

1823 """ 

1824 if text in ('true', 't', '1', 'True'): 

1825 return True 

1826 elif text in ('false', 'f', '0', 'False'): 

1827 return False 

1828 else: 

1829 raise ValueError('Expected "true" or "false".') 

1830 

1831 

1832def ParseEnum(field, value): 

1833 """Parse an enum value. 

1834 

1835 The value can be specified by a number (the enum value), or by 

1836 a string literal (the enum name). 

1837 

1838 Args: 

1839 field: Enum field descriptor. 

1840 value: String value. 

1841 

1842 Returns: 

1843 Enum value number. 

1844 

1845 Raises: 

1846 ValueError: If the enum value could not be parsed. 

1847 """ 

1848 enum_descriptor = field.enum_type 

1849 try: 

1850 number = int(value, 0) 

1851 except ValueError: 

1852 # Identifier. 

1853 enum_value = enum_descriptor.values_by_name.get(value, None) 

1854 if enum_value is None: 

1855 raise ValueError('Enum type "%s" has no value named %s.' % 

1856 (enum_descriptor.full_name, value)) 

1857 else: 

1858 if not field.enum_type.is_closed: 

1859 return number 

1860 enum_value = enum_descriptor.values_by_number.get(number, None) 

1861 if enum_value is None: 

1862 raise ValueError('Enum type "%s" has no value with number %d.' % 

1863 (enum_descriptor.full_name, number)) 

1864 return enum_value.number