Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/protobuf/text_format.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

767 statements  

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# 

4# Use of this source code is governed by a BSD-style 

5# license that can be found in the LICENSE file or at 

6# https://developers.google.com/open-source/licenses/bsd 

7 

8"""Contains routines for printing protocol messages in text format. 

9 

10Simple usage example:: 

11 

12 # Create a proto object and serialize it to a text proto string. 

13 message = my_proto_pb2.MyMessage(foo='bar') 

14 text_proto = text_format.MessageToString(message) 

15 

16 # Parse a text proto string. 

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

18""" 

19 

20__author__ = 'kenton@google.com (Kenton Varda)' 

21 

22# TODO Import thread contention leads to test failures. 

23import encodings.raw_unicode_escape # pylint: disable=unused-import 

24import encodings.unicode_escape # pylint: disable=unused-import 

25import io 

26import math 

27import re 

28 

29from google.protobuf.internal import decoder 

30from google.protobuf.internal import type_checkers 

31from google.protobuf import descriptor 

32from google.protobuf import text_encoding 

33from google.protobuf import unknown_fields 

34 

35# pylint: disable=g-import-not-at-top 

36__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

37 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

38 

39_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

40 type_checkers.Int32ValueChecker(), 

41 type_checkers.Uint64ValueChecker(), 

42 type_checkers.Int64ValueChecker()) 

43_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

44_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

45_FLOAT_OCTAL_PREFIX = re.compile('-?0[0-9]+') 

46_QUOTES = frozenset(("'", '"')) 

47_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

48_DEBUG_STRING_SILENT_MARKER = '\t ' 

49 

50_as_utf8_default = True 

51 

52 

53class Error(Exception): 

54 """Top-level module error for text_format.""" 

55 

56 

57class ParseError(Error): 

58 """Thrown in case of text parsing or tokenizing error.""" 

59 

60 def __init__(self, message=None, line=None, column=None): 

61 if message is not None and line is not None: 

62 loc = str(line) 

63 if column is not None: 

64 loc += ':{0}'.format(column) 

65 message = '{0} : {1}'.format(loc, message) 

66 if message is not None: 

67 super(ParseError, self).__init__(message) 

68 else: 

69 super(ParseError, self).__init__() 

70 self._line = line 

71 self._column = column 

72 

73 def GetLine(self): 

74 return self._line 

75 

76 def GetColumn(self): 

77 return self._column 

78 

79 

80class TextWriter(object): 

81 

82 def __init__(self, as_utf8): 

83 self._writer = io.StringIO() 

84 

85 def write(self, val): 

86 return self._writer.write(val) 

87 

88 def close(self): 

89 return self._writer.close() 

90 

91 def getvalue(self): 

92 return self._writer.getvalue() 

93 

94 

95def MessageToString( 

96 message, 

97 as_utf8=_as_utf8_default, 

98 as_one_line=False, 

99 use_short_repeated_primitives=False, 

100 pointy_brackets=False, 

101 use_index_order=False, 

102 float_format=None, 

103 double_format=None, 

104 use_field_number=False, 

105 descriptor_pool=None, 

106 indent=0, 

107 message_formatter=None, 

108 print_unknown_fields=False, 

109 force_colon=False) -> str: 

110 """Convert protobuf message to text format. 

111 

112 Double values can be formatted compactly with 15 digits of 

113 precision (which is the most that IEEE 754 "double" can guarantee) 

114 using double_format='.15g'. To ensure that converting to text and back to a 

115 proto will result in an identical value, double_format='.17g' should be used. 

116 

117 Args: 

118 message: The protocol buffers message. 

119 as_utf8: Return unescaped Unicode for non-ASCII characters. 

120 as_one_line: Don't introduce newlines between fields. 

121 use_short_repeated_primitives: Use short repeated format for primitives. 

122 pointy_brackets: If True, use angle brackets instead of curly braces for 

123 nesting. 

124 use_index_order: If True, fields of a proto message will be printed using 

125 the order defined in source code instead of the field number, extensions 

126 will be printed at the end of the message and their relative order is 

127 determined by the extension number. By default, use the field number 

128 order. 

129 float_format (str): If set, use this to specify float field formatting 

130 (per the "Format Specification Mini-Language"); otherwise, shortest float 

131 that has same value in wire will be printed. Also affect double field 

132 if double_format is not set but float_format is set. 

133 double_format (str): If set, use this to specify double field formatting 

134 (per the "Format Specification Mini-Language"); if it is not set but 

135 float_format is set, use float_format. Otherwise, use ``str()`` 

136 use_field_number: If True, print field numbers instead of names. 

137 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

138 indent (int): The initial indent level, in terms of spaces, for pretty 

139 print. 

140 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

141 Custom formatter for selected sub-messages (usually based on message 

142 type). Use to pretty print parts of the protobuf for easier diffing. 

143 print_unknown_fields: If True, unknown fields will be printed. 

144 force_colon: If set, a colon will be added after the field name even if the 

145 field is a proto message. 

146 

147 Returns: 

148 str: A string of the text formatted protocol buffer message. 

149 """ 

150 out = TextWriter(as_utf8) 

151 printer = _Printer( 

152 out, 

153 indent, 

154 as_utf8, 

155 as_one_line, 

156 use_short_repeated_primitives, 

157 pointy_brackets, 

158 use_index_order, 

159 float_format, 

160 double_format, 

161 use_field_number, 

162 descriptor_pool, 

163 message_formatter, 

164 print_unknown_fields=print_unknown_fields, 

165 force_colon=force_colon) 

166 printer.PrintMessage(message) 

167 result = out.getvalue() 

168 out.close() 

169 if as_one_line: 

170 return result.rstrip() 

171 return result 

172 

173 

174def MessageToBytes(message, **kwargs) -> bytes: 

175 """Convert protobuf message to encoded text format. See MessageToString.""" 

176 text = MessageToString(message, **kwargs) 

177 if isinstance(text, bytes): 

178 return text 

179 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

180 return text.encode(codec) 

181 

182 

183def _IsMapEntry(field): 

184 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

185 field.message_type.has_options and 

186 field.message_type.GetOptions().map_entry) 

187 

188 

189def _IsGroupLike(field): 

190 """Determines if a field is consistent with a proto2 group. 

191 

192 Args: 

193 field: The field descriptor. 

194 

195 Returns: 

196 True if this field is group-like, false otherwise. 

197 """ 

198 # Groups are always tag-delimited. 

199 if field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

200 return False 

201 

202 # Group fields always are always the lowercase type name. 

203 if field.name != field.message_type.name.lower(): 

204 return False 

205 

206 if field.message_type.file != field.file: 

207 return False 

208 

209 # Group messages are always defined in the same scope as the field. File 

210 # level extensions will compare NULL == NULL here, which is why the file 

211 # comparison above is necessary to ensure both come from the same file. 

212 return ( 

213 field.message_type.containing_type == field.extension_scope 

214 if field.is_extension 

215 else field.message_type.containing_type == field.containing_type 

216 ) 

217 

218 

219def PrintMessage(message, 

220 out, 

221 indent=0, 

222 as_utf8=_as_utf8_default, 

223 as_one_line=False, 

224 use_short_repeated_primitives=False, 

225 pointy_brackets=False, 

226 use_index_order=False, 

227 float_format=None, 

228 double_format=None, 

229 use_field_number=False, 

230 descriptor_pool=None, 

231 message_formatter=None, 

232 print_unknown_fields=False, 

233 force_colon=False): 

234 """Convert the message to text format and write it to the out stream. 

235 

236 Args: 

237 message: The Message object to convert to text format. 

238 out: A file handle to write the message to. 

239 indent: The initial indent level for pretty print. 

240 as_utf8: Return unescaped Unicode for non-ASCII characters. 

241 as_one_line: Don't introduce newlines between fields. 

242 use_short_repeated_primitives: Use short repeated format for primitives. 

243 pointy_brackets: If True, use angle brackets instead of curly braces for 

244 nesting. 

245 use_index_order: If True, print fields of a proto message using the order 

246 defined in source code instead of the field number. By default, use the 

247 field number order. 

248 float_format: If set, use this to specify float field formatting 

249 (per the "Format Specification Mini-Language"); otherwise, shortest 

250 float that has same value in wire will be printed. Also affect double 

251 field if double_format is not set but float_format is set. 

252 double_format: If set, use this to specify double field formatting 

253 (per the "Format Specification Mini-Language"); if it is not set but 

254 float_format is set, use float_format. Otherwise, str() is used. 

255 use_field_number: If True, print field numbers instead of names. 

256 descriptor_pool: A DescriptorPool used to resolve Any types. 

257 message_formatter: A function(message, indent, as_one_line): unicode|None 

258 to custom format selected sub-messages (usually based on message type). 

259 Use to pretty print parts of the protobuf for easier diffing. 

260 print_unknown_fields: If True, unknown fields will be printed. 

261 force_colon: If set, a colon will be added after the field name even if 

262 the field is a proto message. 

263 """ 

264 printer = _Printer( 

265 out=out, indent=indent, as_utf8=as_utf8, 

266 as_one_line=as_one_line, 

267 use_short_repeated_primitives=use_short_repeated_primitives, 

268 pointy_brackets=pointy_brackets, 

269 use_index_order=use_index_order, 

270 float_format=float_format, 

271 double_format=double_format, 

272 use_field_number=use_field_number, 

273 descriptor_pool=descriptor_pool, 

274 message_formatter=message_formatter, 

275 print_unknown_fields=print_unknown_fields, 

276 force_colon=force_colon) 

277 printer.PrintMessage(message) 

278 

279 

280def PrintField(field, 

281 value, 

282 out, 

283 indent=0, 

284 as_utf8=_as_utf8_default, 

285 as_one_line=False, 

286 use_short_repeated_primitives=False, 

287 pointy_brackets=False, 

288 use_index_order=False, 

289 float_format=None, 

290 double_format=None, 

291 message_formatter=None, 

292 print_unknown_fields=False, 

293 force_colon=False): 

294 """Print a single field name/value pair.""" 

295 printer = _Printer(out, indent, as_utf8, as_one_line, 

296 use_short_repeated_primitives, pointy_brackets, 

297 use_index_order, float_format, double_format, 

298 message_formatter=message_formatter, 

299 print_unknown_fields=print_unknown_fields, 

300 force_colon=force_colon) 

301 printer.PrintField(field, value) 

302 

303 

304def PrintFieldValue(field, 

305 value, 

306 out, 

307 indent=0, 

308 as_utf8=_as_utf8_default, 

309 as_one_line=False, 

310 use_short_repeated_primitives=False, 

311 pointy_brackets=False, 

312 use_index_order=False, 

313 float_format=None, 

314 double_format=None, 

315 message_formatter=None, 

316 print_unknown_fields=False, 

317 force_colon=False): 

318 """Print a single field value (not including name).""" 

319 printer = _Printer(out, indent, as_utf8, as_one_line, 

320 use_short_repeated_primitives, pointy_brackets, 

321 use_index_order, float_format, double_format, 

322 message_formatter=message_formatter, 

323 print_unknown_fields=print_unknown_fields, 

324 force_colon=force_colon) 

325 printer.PrintFieldValue(field, value) 

326 

327 

328def _BuildMessageFromTypeName(type_name, descriptor_pool): 

329 """Returns a protobuf message instance. 

330 

331 Args: 

332 type_name: Fully-qualified protobuf message type name string. 

333 descriptor_pool: DescriptorPool instance. 

334 

335 Returns: 

336 A Message instance of type matching type_name, or None if the a Descriptor 

337 wasn't found matching type_name. 

338 """ 

339 # pylint: disable=g-import-not-at-top 

340 if descriptor_pool is None: 

341 from google.protobuf import descriptor_pool as pool_mod 

342 descriptor_pool = pool_mod.Default() 

343 from google.protobuf import message_factory 

344 try: 

345 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

346 except KeyError: 

347 return None 

348 message_type = message_factory.GetMessageClass(message_descriptor) 

349 return message_type() 

350 

351 

352# These values must match WireType enum in //google/protobuf/wire_format.h. 

353WIRETYPE_LENGTH_DELIMITED = 2 

354WIRETYPE_START_GROUP = 3 

355 

356 

357class _Printer(object): 

358 """Text format printer for protocol message.""" 

359 

360 def __init__( 

361 self, 

362 out, 

363 indent=0, 

364 as_utf8=_as_utf8_default, 

365 as_one_line=False, 

366 use_short_repeated_primitives=False, 

367 pointy_brackets=False, 

368 use_index_order=False, 

369 float_format=None, 

370 double_format=None, 

371 use_field_number=False, 

372 descriptor_pool=None, 

373 message_formatter=None, 

374 print_unknown_fields=False, 

375 force_colon=False): 

376 """Initialize the Printer. 

377 

378 Double values can be formatted compactly with 15 digits of precision 

379 (which is the most that IEEE 754 "double" can guarantee) using 

380 double_format='.15g'. To ensure that converting to text and back to a proto 

381 will result in an identical value, double_format='.17g' should be used. 

382 

383 Args: 

384 out: To record the text format result. 

385 indent: The initial indent level for pretty print. 

386 as_utf8: Return unescaped Unicode for non-ASCII characters. 

387 as_one_line: Don't introduce newlines between fields. 

388 use_short_repeated_primitives: Use short repeated format for primitives. 

389 pointy_brackets: If True, use angle brackets instead of curly braces for 

390 nesting. 

391 use_index_order: If True, print fields of a proto message using the order 

392 defined in source code instead of the field number. By default, use the 

393 field number order. 

394 float_format: If set, use this to specify float field formatting 

395 (per the "Format Specification Mini-Language"); otherwise, shortest 

396 float that has same value in wire will be printed. Also affect double 

397 field if double_format is not set but float_format is set. 

398 double_format: If set, use this to specify double field formatting 

399 (per the "Format Specification Mini-Language"); if it is not set but 

400 float_format is set, use float_format. Otherwise, str() is used. 

401 use_field_number: If True, print field numbers instead of names. 

402 descriptor_pool: A DescriptorPool used to resolve Any types. 

403 message_formatter: A function(message, indent, as_one_line): unicode|None 

404 to custom format selected sub-messages (usually based on message type). 

405 Use to pretty print parts of the protobuf for easier diffing. 

406 print_unknown_fields: If True, unknown fields will be printed. 

407 force_colon: If set, a colon will be added after the field name even if 

408 the field is a proto message. 

409 """ 

410 self.out = out 

411 self.indent = indent 

412 self.as_utf8 = as_utf8 

413 self.as_one_line = as_one_line 

414 self.use_short_repeated_primitives = use_short_repeated_primitives 

415 self.pointy_brackets = pointy_brackets 

416 self.use_index_order = use_index_order 

417 self.float_format = float_format 

418 if double_format is not None: 

419 self.double_format = double_format 

420 else: 

421 self.double_format = float_format 

422 self.use_field_number = use_field_number 

423 self.descriptor_pool = descriptor_pool 

424 self.message_formatter = message_formatter 

425 self.print_unknown_fields = print_unknown_fields 

426 self.force_colon = force_colon 

427 

428 def _TryPrintAsAnyMessage(self, message): 

429 """Serializes if message is a google.protobuf.Any field.""" 

430 if '/' not in message.type_url: 

431 return False 

432 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

433 self.descriptor_pool) 

434 if packed_message is not None: 

435 packed_message.MergeFromString(message.value) 

436 colon = ':' if self.force_colon else '' 

437 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

438 self._PrintMessageFieldValue(packed_message) 

439 self.out.write(' ' if self.as_one_line else '\n') 

440 return True 

441 else: 

442 return False 

443 

444 def _TryCustomFormatMessage(self, message): 

445 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

446 if formatted is None: 

447 return False 

448 

449 out = self.out 

450 out.write(' ' * self.indent) 

451 out.write(formatted) 

452 out.write(' ' if self.as_one_line else '\n') 

453 return True 

454 

455 def PrintMessage(self, message): 

456 """Convert protobuf message to text format. 

457 

458 Args: 

459 message: The protocol buffers message. 

460 """ 

461 if self.message_formatter and self._TryCustomFormatMessage(message): 

462 return 

463 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

464 self._TryPrintAsAnyMessage(message)): 

465 return 

466 fields = message.ListFields() 

467 if self.use_index_order: 

468 fields.sort( 

469 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

470 for field, value in fields: 

471 if _IsMapEntry(field): 

472 for key in sorted(value): 

473 # This is slow for maps with submessage entries because it copies the 

474 # entire tree. Unfortunately this would take significant refactoring 

475 # of this file to work around. 

476 # 

477 # TODO: refactor and optimize if this becomes an issue. 

478 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

479 self.PrintField(field, entry_submsg) 

480 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

481 if (self.use_short_repeated_primitives 

482 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

483 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

484 self._PrintShortRepeatedPrimitivesValue(field, value) 

485 else: 

486 for element in value: 

487 self.PrintField(field, element) 

488 else: 

489 self.PrintField(field, value) 

490 

491 if self.print_unknown_fields: 

492 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

493 

494 def _PrintUnknownFields(self, unknown_field_set): 

495 """Print unknown fields.""" 

496 out = self.out 

497 for field in unknown_field_set: 

498 out.write(' ' * self.indent) 

499 out.write(str(field.field_number)) 

500 if field.wire_type == WIRETYPE_START_GROUP: 

501 if self.as_one_line: 

502 out.write(' { ') 

503 else: 

504 out.write(' {\n') 

505 self.indent += 2 

506 

507 self._PrintUnknownFields(field.data) 

508 

509 if self.as_one_line: 

510 out.write('} ') 

511 else: 

512 self.indent -= 2 

513 out.write(' ' * self.indent + '}\n') 

514 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

515 try: 

516 # If this field is parseable as a Message, it is probably 

517 # an embedded message. 

518 # pylint: disable=protected-access 

519 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

520 memoryview(field.data), 0, len(field.data)) 

521 except Exception: # pylint: disable=broad-except 

522 pos = 0 

523 

524 if pos == len(field.data): 

525 if self.as_one_line: 

526 out.write(' { ') 

527 else: 

528 out.write(' {\n') 

529 self.indent += 2 

530 

531 self._PrintUnknownFields(embedded_unknown_message) 

532 

533 if self.as_one_line: 

534 out.write('} ') 

535 else: 

536 self.indent -= 2 

537 out.write(' ' * self.indent + '}\n') 

538 else: 

539 # A string or bytes field. self.as_utf8 may not work. 

540 out.write(': \"') 

541 out.write(text_encoding.CEscape(field.data, False)) 

542 out.write('\" ' if self.as_one_line else '\"\n') 

543 else: 

544 # varint, fixed32, fixed64 

545 out.write(': ') 

546 out.write(str(field.data)) 

547 out.write(' ' if self.as_one_line else '\n') 

548 

549 def _PrintFieldName(self, field): 

550 """Print field name.""" 

551 out = self.out 

552 out.write(' ' * self.indent) 

553 if self.use_field_number: 

554 out.write(str(field.number)) 

555 else: 

556 if field.is_extension: 

557 out.write('[') 

558 if (field.containing_type.GetOptions().message_set_wire_format and 

559 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

560 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): 

561 out.write(field.message_type.full_name) 

562 else: 

563 out.write(field.full_name) 

564 out.write(']') 

565 elif _IsGroupLike(field): 

566 # For groups, use the capitalized name. 

567 out.write(field.message_type.name) 

568 else: 

569 out.write(field.name) 

570 

571 if (self.force_colon or 

572 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

573 # The colon is optional in this case, but our cross-language golden files 

574 # don't include it. Here, the colon is only included if force_colon is 

575 # set to True 

576 out.write(':') 

577 

578 def PrintField(self, field, value): 

579 """Print a single field name/value pair.""" 

580 self._PrintFieldName(field) 

581 self.out.write(' ') 

582 self.PrintFieldValue(field, value) 

583 self.out.write(' ' if self.as_one_line else '\n') 

584 

585 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

586 """"Prints short repeated primitives value.""" 

587 # Note: this is called only when value has at least one element. 

588 self._PrintFieldName(field) 

589 self.out.write(' [') 

590 for i in range(len(value) - 1): 

591 self.PrintFieldValue(field, value[i]) 

592 self.out.write(', ') 

593 self.PrintFieldValue(field, value[-1]) 

594 self.out.write(']') 

595 self.out.write(' ' if self.as_one_line else '\n') 

596 

597 def _PrintMessageFieldValue(self, value): 

598 if self.pointy_brackets: 

599 openb = '<' 

600 closeb = '>' 

601 else: 

602 openb = '{' 

603 closeb = '}' 

604 

605 if self.as_one_line: 

606 self.out.write('%s ' % openb) 

607 self.PrintMessage(value) 

608 self.out.write(closeb) 

609 else: 

610 self.out.write('%s\n' % openb) 

611 self.indent += 2 

612 self.PrintMessage(value) 

613 self.indent -= 2 

614 self.out.write(' ' * self.indent + closeb) 

615 

616 def PrintFieldValue(self, field, value): 

617 """Print a single field value (not including name). 

618 

619 For repeated fields, the value should be a single element. 

620 

621 Args: 

622 field: The descriptor of the field to be printed. 

623 value: The value of the field. 

624 """ 

625 out = self.out 

626 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

627 self._PrintMessageFieldValue(value) 

628 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

629 enum_value = field.enum_type.values_by_number.get(value, None) 

630 if enum_value is not None: 

631 out.write(enum_value.name) 

632 else: 

633 out.write(str(value)) 

634 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

635 out.write('\"') 

636 if isinstance(value, str) and not self.as_utf8: 

637 out_value = value.encode('utf-8') 

638 else: 

639 out_value = value 

640 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

641 # We always need to escape all binary data in TYPE_BYTES fields. 

642 out_as_utf8 = False 

643 else: 

644 out_as_utf8 = self.as_utf8 

645 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

646 out.write('\"') 

647 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

648 if value: 

649 out.write('true') 

650 else: 

651 out.write('false') 

652 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

653 if self.float_format is not None: 

654 out.write('{1:{0}}'.format(self.float_format, value)) 

655 else: 

656 if math.isnan(value): 

657 out.write(str(value)) 

658 else: 

659 out.write(str(type_checkers.ToShortestFloat(value))) 

660 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 

661 self.double_format is not None): 

662 out.write('{1:{0}}'.format(self.double_format, value)) 

663 else: 

664 out.write(str(value)) 

665 

666 

667def Parse(text, 

668 message, 

669 allow_unknown_extension=False, 

670 allow_field_number=False, 

671 descriptor_pool=None, 

672 allow_unknown_field=False): 

673 """Parses a text representation of a protocol message into a message. 

674 

675 NOTE: for historical reasons this function does not clear the input 

676 message. This is different from what the binary msg.ParseFrom(...) does. 

677 If text contains a field already set in message, the value is appended if the 

678 field is repeated. Otherwise, an error is raised. 

679 

680 Example:: 

681 

682 a = MyProto() 

683 a.repeated_field.append('test') 

684 b = MyProto() 

685 

686 # Repeated fields are combined 

687 text_format.Parse(repr(a), b) 

688 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

689 

690 # Non-repeated fields cannot be overwritten 

691 a.singular_field = 1 

692 b.singular_field = 2 

693 text_format.Parse(repr(a), b) # ParseError 

694 

695 # Binary version: 

696 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

697 

698 Caller is responsible for clearing the message as needed. 

699 

700 Args: 

701 text (str): Message text representation. 

702 message (Message): A protocol buffer message to merge into. 

703 allow_unknown_extension: if True, skip over missing extensions and keep 

704 parsing 

705 allow_field_number: if True, both field number and field name are allowed. 

706 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

707 allow_unknown_field: if True, skip over unknown field and keep 

708 parsing. Avoid to use this option if possible. It may hide some 

709 errors (e.g. spelling error on field name) 

710 

711 Returns: 

712 Message: The same message passed as argument. 

713 

714 Raises: 

715 ParseError: On text parsing problems. 

716 """ 

717 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

718 message, 

719 allow_unknown_extension, 

720 allow_field_number, 

721 descriptor_pool=descriptor_pool, 

722 allow_unknown_field=allow_unknown_field) 

723 

724 

725def Merge(text, 

726 message, 

727 allow_unknown_extension=False, 

728 allow_field_number=False, 

729 descriptor_pool=None, 

730 allow_unknown_field=False): 

731 """Parses a text representation of a protocol message into a message. 

732 

733 Like Parse(), but allows repeated values for a non-repeated field, and uses 

734 the last one. This means any non-repeated, top-level fields specified in text 

735 replace those in the message. 

736 

737 Args: 

738 text (str): Message text representation. 

739 message (Message): A protocol buffer message to merge into. 

740 allow_unknown_extension: if True, skip over missing extensions and keep 

741 parsing 

742 allow_field_number: if True, both field number and field name are allowed. 

743 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

744 allow_unknown_field: if True, skip over unknown field and keep 

745 parsing. Avoid to use this option if possible. It may hide some 

746 errors (e.g. spelling error on field name) 

747 

748 Returns: 

749 Message: The same message passed as argument. 

750 

751 Raises: 

752 ParseError: On text parsing problems. 

753 """ 

754 return MergeLines( 

755 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

756 message, 

757 allow_unknown_extension, 

758 allow_field_number, 

759 descriptor_pool=descriptor_pool, 

760 allow_unknown_field=allow_unknown_field) 

761 

762 

763def ParseLines(lines, 

764 message, 

765 allow_unknown_extension=False, 

766 allow_field_number=False, 

767 descriptor_pool=None, 

768 allow_unknown_field=False): 

769 """Parses a text representation of a protocol message into a message. 

770 

771 See Parse() for caveats. 

772 

773 Args: 

774 lines: An iterable of lines of a message's text representation. 

775 message: A protocol buffer message to merge into. 

776 allow_unknown_extension: if True, skip over missing extensions and keep 

777 parsing 

778 allow_field_number: if True, both field number and field name are allowed. 

779 descriptor_pool: A DescriptorPool used to resolve Any types. 

780 allow_unknown_field: if True, skip over unknown field and keep 

781 parsing. Avoid to use this option if possible. It may hide some 

782 errors (e.g. spelling error on field name) 

783 

784 Returns: 

785 The same message passed as argument. 

786 

787 Raises: 

788 ParseError: On text parsing problems. 

789 """ 

790 parser = _Parser(allow_unknown_extension, 

791 allow_field_number, 

792 descriptor_pool=descriptor_pool, 

793 allow_unknown_field=allow_unknown_field) 

794 return parser.ParseLines(lines, message) 

795 

796 

797def MergeLines(lines, 

798 message, 

799 allow_unknown_extension=False, 

800 allow_field_number=False, 

801 descriptor_pool=None, 

802 allow_unknown_field=False): 

803 """Parses a text representation of a protocol message into a message. 

804 

805 See Merge() for more details. 

806 

807 Args: 

808 lines: An iterable of lines of a message's text representation. 

809 message: A protocol buffer message to merge into. 

810 allow_unknown_extension: if True, skip over missing extensions and keep 

811 parsing 

812 allow_field_number: if True, both field number and field name are allowed. 

813 descriptor_pool: A DescriptorPool used to resolve Any types. 

814 allow_unknown_field: if True, skip over unknown field and keep 

815 parsing. Avoid to use this option if possible. It may hide some 

816 errors (e.g. spelling error on field name) 

817 

818 Returns: 

819 The same message passed as argument. 

820 

821 Raises: 

822 ParseError: On text parsing problems. 

823 """ 

824 parser = _Parser(allow_unknown_extension, 

825 allow_field_number, 

826 descriptor_pool=descriptor_pool, 

827 allow_unknown_field=allow_unknown_field) 

828 return parser.MergeLines(lines, message) 

829 

830 

831class _Parser(object): 

832 """Text format parser for protocol message.""" 

833 

834 def __init__(self, 

835 allow_unknown_extension=False, 

836 allow_field_number=False, 

837 descriptor_pool=None, 

838 allow_unknown_field=False): 

839 self.allow_unknown_extension = allow_unknown_extension 

840 self.allow_field_number = allow_field_number 

841 self.descriptor_pool = descriptor_pool 

842 self.allow_unknown_field = allow_unknown_field 

843 

844 def ParseLines(self, lines, message): 

845 """Parses a text representation of a protocol message into a message.""" 

846 self._allow_multiple_scalars = False 

847 self._ParseOrMerge(lines, message) 

848 return message 

849 

850 def MergeLines(self, lines, message): 

851 """Merges a text representation of a protocol message into a message.""" 

852 self._allow_multiple_scalars = True 

853 self._ParseOrMerge(lines, message) 

854 return message 

855 

856 def _ParseOrMerge(self, lines, message): 

857 """Converts a text representation of a protocol message into a message. 

858 

859 Args: 

860 lines: Lines of a message's text representation. 

861 message: A protocol buffer message to merge into. 

862 

863 Raises: 

864 ParseError: On text parsing problems. 

865 """ 

866 # Tokenize expects native str lines. 

867 try: 

868 str_lines = ( 

869 line if isinstance(line, str) else line.decode('utf-8') 

870 for line in lines) 

871 tokenizer = Tokenizer(str_lines) 

872 except UnicodeDecodeError as e: 

873 raise ParseError from e 

874 if message: 

875 self.root_type = message.DESCRIPTOR.full_name 

876 while not tokenizer.AtEnd(): 

877 self._MergeField(tokenizer, message) 

878 

879 def _MergeField(self, tokenizer, message): 

880 """Merges a single protocol message field into a message. 

881 

882 Args: 

883 tokenizer: A tokenizer to parse the field name and values. 

884 message: A protocol message to record the data. 

885 

886 Raises: 

887 ParseError: In case of text parsing problems. 

888 """ 

889 message_descriptor = message.DESCRIPTOR 

890 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

891 tokenizer.TryConsume('[')): 

892 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

893 tokenizer.Consume(']') 

894 tokenizer.TryConsume(':') 

895 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

896 type_url_prefix + '/' + packed_type_name) 

897 if tokenizer.TryConsume('<'): 

898 expanded_any_end_token = '>' 

899 else: 

900 tokenizer.Consume('{') 

901 expanded_any_end_token = '}' 

902 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

903 self.descriptor_pool) 

904 # Direct comparison with None is used instead of implicit bool conversion 

905 # to avoid false positives with falsy initial values, e.g. for 

906 # google.protobuf.ListValue. 

907 if expanded_any_sub_message is None: 

908 raise ParseError('Type %s not found in descriptor pool' % 

909 packed_type_name) 

910 while not tokenizer.TryConsume(expanded_any_end_token): 

911 if tokenizer.AtEnd(): 

912 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 

913 (expanded_any_end_token,)) 

914 self._MergeField(tokenizer, expanded_any_sub_message) 

915 deterministic = False 

916 

917 message.Pack(expanded_any_sub_message, 

918 type_url_prefix=type_url_prefix, 

919 deterministic=deterministic) 

920 return 

921 

922 if tokenizer.TryConsume('['): 

923 name = [tokenizer.ConsumeIdentifier()] 

924 while tokenizer.TryConsume('.'): 

925 name.append(tokenizer.ConsumeIdentifier()) 

926 name = '.'.join(name) 

927 

928 if not message_descriptor.is_extendable: 

929 raise tokenizer.ParseErrorPreviousToken( 

930 'Message type "%s" does not have extensions.' % 

931 message_descriptor.full_name) 

932 # pylint: disable=protected-access 

933 field = message.Extensions._FindExtensionByName(name) 

934 # pylint: enable=protected-access 

935 if not field: 

936 if self.allow_unknown_extension: 

937 field = None 

938 else: 

939 raise tokenizer.ParseErrorPreviousToken( 

940 'Extension "%s" not registered. ' 

941 'Did you import the _pb2 module which defines it? ' 

942 'If you are trying to place the extension in the MessageSet ' 

943 'field of another message that is in an Any or MessageSet field, ' 

944 'that message\'s _pb2 module must be imported as well' % name) 

945 elif message_descriptor != field.containing_type: 

946 raise tokenizer.ParseErrorPreviousToken( 

947 'Extension "%s" does not extend message type "%s".' % 

948 (name, message_descriptor.full_name)) 

949 

950 tokenizer.Consume(']') 

951 

952 else: 

953 name = tokenizer.ConsumeIdentifierOrNumber() 

954 if self.allow_field_number and name.isdigit(): 

955 number = ParseInteger(name, True, True) 

956 field = message_descriptor.fields_by_number.get(number, None) 

957 if not field and message_descriptor.is_extendable: 

958 field = message.Extensions._FindExtensionByNumber(number) 

959 else: 

960 field = message_descriptor.fields_by_name.get(name, None) 

961 

962 # Group names are expected to be capitalized as they appear in the 

963 # .proto file, which actually matches their type names, not their field 

964 # names. 

965 if not field: 

966 field = message_descriptor.fields_by_name.get(name.lower(), None) 

967 if field and not _IsGroupLike(field): 

968 field = None 

969 if field and field.message_type.name != name: 

970 field = None 

971 

972 if not field and not self.allow_unknown_field: 

973 raise tokenizer.ParseErrorPreviousToken( 

974 'Message type "%s" has no field named "%s".' % 

975 (message_descriptor.full_name, name)) 

976 

977 if field: 

978 if not self._allow_multiple_scalars and field.containing_oneof: 

979 # Check if there's a different field set in this oneof. 

980 # Note that we ignore the case if the same field was set before, and we 

981 # apply _allow_multiple_scalars to non-scalar fields as well. 

982 which_oneof = message.WhichOneof(field.containing_oneof.name) 

983 if which_oneof is not None and which_oneof != field.name: 

984 raise tokenizer.ParseErrorPreviousToken( 

985 'Field "%s" is specified along with field "%s", another member ' 

986 'of oneof "%s" for message type "%s".' % 

987 (field.name, which_oneof, field.containing_oneof.name, 

988 message_descriptor.full_name)) 

989 

990 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

991 tokenizer.TryConsume(':') 

992 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

993 field.full_name) 

994 merger = self._MergeMessageField 

995 else: 

996 tokenizer.Consume(':') 

997 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

998 field.full_name) 

999 merger = self._MergeScalarField 

1000 

1001 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and 

1002 tokenizer.TryConsume('[')): 

1003 # Short repeated format, e.g. "foo: [1, 2, 3]" 

1004 if not tokenizer.TryConsume(']'): 

1005 while True: 

1006 merger(tokenizer, message, field) 

1007 if tokenizer.TryConsume(']'): 

1008 break 

1009 tokenizer.Consume(',') 

1010 

1011 else: 

1012 merger(tokenizer, message, field) 

1013 

1014 else: # Proto field is unknown. 

1015 assert (self.allow_unknown_extension or self.allow_unknown_field) 

1016 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

1017 

1018 # For historical reasons, fields may optionally be separated by commas or 

1019 # semicolons. 

1020 if not tokenizer.TryConsume(','): 

1021 tokenizer.TryConsume(';') 

1022 

1023 def _LogSilentMarker(self, immediate_message_type, field_name): 

1024 pass 

1025 

1026 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

1027 if tokenizer.contains_silent_marker_before_current_token: 

1028 self._LogSilentMarker(immediate_message_type, field_name) 

1029 

1030 def _ConsumeAnyTypeUrl(self, tokenizer): 

1031 """Consumes a google.protobuf.Any type URL and returns the type name.""" 

1032 # Consume "type.googleapis.com/". 

1033 prefix = [tokenizer.ConsumeIdentifier()] 

1034 tokenizer.Consume('.') 

1035 prefix.append(tokenizer.ConsumeIdentifier()) 

1036 tokenizer.Consume('.') 

1037 prefix.append(tokenizer.ConsumeIdentifier()) 

1038 tokenizer.Consume('/') 

1039 # Consume the fully-qualified type name. 

1040 name = [tokenizer.ConsumeIdentifier()] 

1041 while tokenizer.TryConsume('.'): 

1042 name.append(tokenizer.ConsumeIdentifier()) 

1043 return '.'.join(prefix), '.'.join(name) 

1044 

1045 def _MergeMessageField(self, tokenizer, message, field): 

1046 """Merges a single scalar field into a message. 

1047 

1048 Args: 

1049 tokenizer: A tokenizer to parse the field value. 

1050 message: The message of which field is a member. 

1051 field: The descriptor of the field to be merged. 

1052 

1053 Raises: 

1054 ParseError: In case of text parsing problems. 

1055 """ 

1056 is_map_entry = _IsMapEntry(field) 

1057 

1058 if tokenizer.TryConsume('<'): 

1059 end_token = '>' 

1060 else: 

1061 tokenizer.Consume('{') 

1062 end_token = '}' 

1063 

1064 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1065 if field.is_extension: 

1066 sub_message = message.Extensions[field].add() 

1067 elif is_map_entry: 

1068 sub_message = getattr(message, field.name).GetEntryClass()() 

1069 else: 

1070 sub_message = getattr(message, field.name).add() 

1071 else: 

1072 if field.is_extension: 

1073 if (not self._allow_multiple_scalars and 

1074 message.HasExtension(field)): 

1075 raise tokenizer.ParseErrorPreviousToken( 

1076 'Message type "%s" should not have multiple "%s" extensions.' % 

1077 (message.DESCRIPTOR.full_name, field.full_name)) 

1078 sub_message = message.Extensions[field] 

1079 else: 

1080 # Also apply _allow_multiple_scalars to message field. 

1081 # TODO: Change to _allow_singular_overwrites. 

1082 if (not self._allow_multiple_scalars and 

1083 message.HasField(field.name)): 

1084 raise tokenizer.ParseErrorPreviousToken( 

1085 'Message type "%s" should not have multiple "%s" fields.' % 

1086 (message.DESCRIPTOR.full_name, field.name)) 

1087 sub_message = getattr(message, field.name) 

1088 sub_message.SetInParent() 

1089 

1090 while not tokenizer.TryConsume(end_token): 

1091 if tokenizer.AtEnd(): 

1092 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 

1093 self._MergeField(tokenizer, sub_message) 

1094 

1095 if is_map_entry: 

1096 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1097 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1098 value = getattr(message, field.name)[sub_message.key] 

1099 value.CopyFrom(sub_message.value) 

1100 else: 

1101 getattr(message, field.name)[sub_message.key] = sub_message.value 

1102 

1103 def _MergeScalarField(self, tokenizer, message, field): 

1104 """Merges a single scalar field into a message. 

1105 

1106 Args: 

1107 tokenizer: A tokenizer to parse the field value. 

1108 message: A protocol message to record the data. 

1109 field: The descriptor of the field to be merged. 

1110 

1111 Raises: 

1112 ParseError: In case of text parsing problems. 

1113 RuntimeError: On runtime errors. 

1114 """ 

1115 _ = self.allow_unknown_extension 

1116 value = None 

1117 

1118 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1119 descriptor.FieldDescriptor.TYPE_SINT32, 

1120 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1121 value = _ConsumeInt32(tokenizer) 

1122 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1123 descriptor.FieldDescriptor.TYPE_SINT64, 

1124 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1125 value = _ConsumeInt64(tokenizer) 

1126 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1127 descriptor.FieldDescriptor.TYPE_FIXED32): 

1128 value = _ConsumeUint32(tokenizer) 

1129 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1130 descriptor.FieldDescriptor.TYPE_FIXED64): 

1131 value = _ConsumeUint64(tokenizer) 

1132 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1133 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1134 value = tokenizer.ConsumeFloat() 

1135 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1136 value = tokenizer.ConsumeBool() 

1137 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1138 value = tokenizer.ConsumeString() 

1139 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1140 value = tokenizer.ConsumeByteString() 

1141 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1142 value = tokenizer.ConsumeEnum(field) 

1143 else: 

1144 raise RuntimeError('Unknown field type %d' % field.type) 

1145 

1146 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1147 if field.is_extension: 

1148 message.Extensions[field].append(value) 

1149 else: 

1150 getattr(message, field.name).append(value) 

1151 else: 

1152 if field.is_extension: 

1153 if (not self._allow_multiple_scalars and 

1154 field.has_presence and 

1155 message.HasExtension(field)): 

1156 raise tokenizer.ParseErrorPreviousToken( 

1157 'Message type "%s" should not have multiple "%s" extensions.' % 

1158 (message.DESCRIPTOR.full_name, field.full_name)) 

1159 else: 

1160 message.Extensions[field] = value 

1161 else: 

1162 duplicate_error = False 

1163 if not self._allow_multiple_scalars: 

1164 if field.has_presence: 

1165 duplicate_error = message.HasField(field.name) 

1166 else: 

1167 # For field that doesn't represent presence, try best effort to 

1168 # check multiple scalars by compare to default values. 

1169 duplicate_error = not decoder.IsDefaultScalarValue( 

1170 getattr(message, field.name) 

1171 ) 

1172 

1173 if duplicate_error: 

1174 raise tokenizer.ParseErrorPreviousToken( 

1175 'Message type "%s" should not have multiple "%s" fields.' % 

1176 (message.DESCRIPTOR.full_name, field.name)) 

1177 else: 

1178 setattr(message, field.name, value) 

1179 

1180 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1181 """Skips over contents (value or message) of a field. 

1182 

1183 Args: 

1184 tokenizer: A tokenizer to parse the field name and values. 

1185 field_name: The field name currently being parsed. 

1186 immediate_message_type: The type of the message immediately containing 

1187 the silent marker. 

1188 """ 

1189 # Try to guess the type of this field. 

1190 # If this field is not a message, there should be a ":" between the 

1191 # field name and the field value and also the field value should not 

1192 # start with "{" or "<" which indicates the beginning of a message body. 

1193 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1194 # to be a message or the input is ill-formed. 

1195 if tokenizer.TryConsume( 

1196 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1197 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1198 if tokenizer.LookingAt('['): 

1199 self._SkipRepeatedFieldValue(tokenizer, immediate_message_type) 

1200 else: 

1201 self._SkipFieldValue(tokenizer) 

1202 else: 

1203 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1204 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1205 

1206 def _SkipField(self, tokenizer, immediate_message_type): 

1207 """Skips over a complete field (name and value/message). 

1208 

1209 Args: 

1210 tokenizer: A tokenizer to parse the field name and values. 

1211 immediate_message_type: The type of the message immediately containing 

1212 the silent marker. 

1213 """ 

1214 field_name = '' 

1215 if tokenizer.TryConsume('['): 

1216 # Consume extension or google.protobuf.Any type URL 

1217 field_name += '[' + tokenizer.ConsumeIdentifier() 

1218 num_identifiers = 1 

1219 while tokenizer.TryConsume('.'): 

1220 field_name += '.' + tokenizer.ConsumeIdentifier() 

1221 num_identifiers += 1 

1222 # This is possibly a type URL for an Any message. 

1223 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1224 field_name += '/' + tokenizer.ConsumeIdentifier() 

1225 while tokenizer.TryConsume('.'): 

1226 field_name += '.' + tokenizer.ConsumeIdentifier() 

1227 tokenizer.Consume(']') 

1228 field_name += ']' 

1229 else: 

1230 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1231 

1232 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1233 

1234 # For historical reasons, fields may optionally be separated by commas or 

1235 # semicolons. 

1236 if not tokenizer.TryConsume(','): 

1237 tokenizer.TryConsume(';') 

1238 

1239 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1240 """Skips over a field message. 

1241 

1242 Args: 

1243 tokenizer: A tokenizer to parse the field name and values. 

1244 immediate_message_type: The type of the message immediately containing 

1245 the silent marker 

1246 """ 

1247 if tokenizer.TryConsume('<'): 

1248 delimiter = '>' 

1249 else: 

1250 tokenizer.Consume('{') 

1251 delimiter = '}' 

1252 

1253 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1254 self._SkipField(tokenizer, immediate_message_type) 

1255 

1256 tokenizer.Consume(delimiter) 

1257 

1258 def _SkipFieldValue(self, tokenizer): 

1259 """Skips over a field value. 

1260 

1261 Args: 

1262 tokenizer: A tokenizer to parse the field name and values. 

1263 

1264 Raises: 

1265 ParseError: In case an invalid field value is found. 

1266 """ 

1267 if (not tokenizer.TryConsumeByteString()and 

1268 not tokenizer.TryConsumeIdentifier() and 

1269 not _TryConsumeInt64(tokenizer) and 

1270 not _TryConsumeUint64(tokenizer) and 

1271 not tokenizer.TryConsumeFloat()): 

1272 raise ParseError('Invalid field value: ' + tokenizer.token) 

1273 

1274 def _SkipRepeatedFieldValue(self, tokenizer, immediate_message_type): 

1275 """Skips over a repeated field value. 

1276 

1277 Args: 

1278 tokenizer: A tokenizer to parse the field value. 

1279 """ 

1280 tokenizer.Consume('[') 

1281 if not tokenizer.TryConsume(']'): 

1282 while True: 

1283 if tokenizer.LookingAt('<') or tokenizer.LookingAt('{'): 

1284 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1285 else: 

1286 self._SkipFieldValue(tokenizer) 

1287 if tokenizer.TryConsume(']'): 

1288 break 

1289 tokenizer.Consume(',') 

1290 

1291 

1292class Tokenizer(object): 

1293 """Protocol buffer text representation tokenizer. 

1294 

1295 This class handles the lower level string parsing by splitting it into 

1296 meaningful tokens. 

1297 

1298 It was directly ported from the Java protocol buffer API. 

1299 """ 

1300 

1301 _WHITESPACE = re.compile(r'\s+') 

1302 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1303 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1304 _TOKEN = re.compile('|'.join([ 

1305 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1306 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1307 ] + [ # quoted str for each quote mark 

1308 # Avoid backtracking! https://stackoverflow.com/a/844267 

1309 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 

1310 for mark in _QUOTES 

1311 ])) 

1312 

1313 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1314 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1315 

1316 def __init__(self, lines, skip_comments=True): 

1317 self._position = 0 

1318 self._line = -1 

1319 self._column = 0 

1320 self._token_start = None 

1321 self.token = '' 

1322 self._lines = iter(lines) 

1323 self._current_line = '' 

1324 self._previous_line = 0 

1325 self._previous_column = 0 

1326 self._more_lines = True 

1327 self._skip_comments = skip_comments 

1328 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1329 or self._WHITESPACE) 

1330 self.contains_silent_marker_before_current_token = False 

1331 

1332 self._SkipWhitespace() 

1333 self.NextToken() 

1334 

1335 def LookingAt(self, token): 

1336 return self.token == token 

1337 

1338 def AtEnd(self): 

1339 """Checks the end of the text was reached. 

1340 

1341 Returns: 

1342 True iff the end was reached. 

1343 """ 

1344 return not self.token 

1345 

1346 def _PopLine(self): 

1347 while len(self._current_line) <= self._column: 

1348 try: 

1349 self._current_line = next(self._lines) 

1350 except StopIteration: 

1351 self._current_line = '' 

1352 self._more_lines = False 

1353 return 

1354 else: 

1355 self._line += 1 

1356 self._column = 0 

1357 

1358 def _SkipWhitespace(self): 

1359 while True: 

1360 self._PopLine() 

1361 match = self._whitespace_pattern.match(self._current_line, self._column) 

1362 if not match: 

1363 break 

1364 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1365 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1366 length = len(match.group(0)) 

1367 self._column += length 

1368 

1369 def TryConsume(self, token): 

1370 """Tries to consume a given piece of text. 

1371 

1372 Args: 

1373 token: Text to consume. 

1374 

1375 Returns: 

1376 True iff the text was consumed. 

1377 """ 

1378 if self.token == token: 

1379 self.NextToken() 

1380 return True 

1381 return False 

1382 

1383 def Consume(self, token): 

1384 """Consumes a piece of text. 

1385 

1386 Args: 

1387 token: Text to consume. 

1388 

1389 Raises: 

1390 ParseError: If the text couldn't be consumed. 

1391 """ 

1392 if not self.TryConsume(token): 

1393 raise self.ParseError('Expected "%s".' % token) 

1394 

1395 def ConsumeComment(self): 

1396 result = self.token 

1397 if not self._COMMENT.match(result): 

1398 raise self.ParseError('Expected comment.') 

1399 self.NextToken() 

1400 return result 

1401 

1402 def ConsumeCommentOrTrailingComment(self): 

1403 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1404 

1405 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1406 # tokenizer starts, it looks like there is a previous token on the line. 

1407 just_started = self._line == 0 and self._column == 0 

1408 

1409 before_parsing = self._previous_line 

1410 comment = self.ConsumeComment() 

1411 

1412 # A trailing comment is a comment on the same line than the previous token. 

1413 trailing = (self._previous_line == before_parsing 

1414 and not just_started) 

1415 

1416 return trailing, comment 

1417 

1418 def TryConsumeIdentifier(self): 

1419 try: 

1420 self.ConsumeIdentifier() 

1421 return True 

1422 except ParseError: 

1423 return False 

1424 

1425 def ConsumeIdentifier(self): 

1426 """Consumes protocol message field identifier. 

1427 

1428 Returns: 

1429 Identifier string. 

1430 

1431 Raises: 

1432 ParseError: If an identifier couldn't be consumed. 

1433 """ 

1434 result = self.token 

1435 if not self._IDENTIFIER.match(result): 

1436 raise self.ParseError('Expected identifier.') 

1437 self.NextToken() 

1438 return result 

1439 

1440 def TryConsumeIdentifierOrNumber(self): 

1441 try: 

1442 self.ConsumeIdentifierOrNumber() 

1443 return True 

1444 except ParseError: 

1445 return False 

1446 

1447 def ConsumeIdentifierOrNumber(self): 

1448 """Consumes protocol message field identifier. 

1449 

1450 Returns: 

1451 Identifier string. 

1452 

1453 Raises: 

1454 ParseError: If an identifier couldn't be consumed. 

1455 """ 

1456 result = self.token 

1457 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1458 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1459 self.NextToken() 

1460 return result 

1461 

1462 def TryConsumeInteger(self): 

1463 try: 

1464 self.ConsumeInteger() 

1465 return True 

1466 except ParseError: 

1467 return False 

1468 

1469 def ConsumeInteger(self): 

1470 """Consumes an integer number. 

1471 

1472 Returns: 

1473 The integer parsed. 

1474 

1475 Raises: 

1476 ParseError: If an integer couldn't be consumed. 

1477 """ 

1478 try: 

1479 result = _ParseAbstractInteger(self.token) 

1480 except ValueError as e: 

1481 raise self.ParseError(str(e)) 

1482 self.NextToken() 

1483 return result 

1484 

1485 def TryConsumeFloat(self): 

1486 try: 

1487 self.ConsumeFloat() 

1488 return True 

1489 except ParseError: 

1490 return False 

1491 

1492 def ConsumeFloat(self): 

1493 """Consumes an floating point number. 

1494 

1495 Returns: 

1496 The number parsed. 

1497 

1498 Raises: 

1499 ParseError: If a floating point number couldn't be consumed. 

1500 """ 

1501 try: 

1502 result = ParseFloat(self.token) 

1503 except ValueError as e: 

1504 raise self.ParseError(str(e)) 

1505 self.NextToken() 

1506 return result 

1507 

1508 def ConsumeBool(self): 

1509 """Consumes a boolean value. 

1510 

1511 Returns: 

1512 The bool parsed. 

1513 

1514 Raises: 

1515 ParseError: If a boolean value couldn't be consumed. 

1516 """ 

1517 try: 

1518 result = ParseBool(self.token) 

1519 except ValueError as e: 

1520 raise self.ParseError(str(e)) 

1521 self.NextToken() 

1522 return result 

1523 

1524 def TryConsumeByteString(self): 

1525 try: 

1526 self.ConsumeByteString() 

1527 return True 

1528 except ParseError: 

1529 return False 

1530 

1531 def ConsumeString(self): 

1532 """Consumes a string value. 

1533 

1534 Returns: 

1535 The string parsed. 

1536 

1537 Raises: 

1538 ParseError: If a string value couldn't be consumed. 

1539 """ 

1540 the_bytes = self.ConsumeByteString() 

1541 try: 

1542 return str(the_bytes, 'utf-8') 

1543 except UnicodeDecodeError as e: 

1544 raise self._StringParseError(e) 

1545 

1546 def ConsumeByteString(self): 

1547 """Consumes a byte array value. 

1548 

1549 Returns: 

1550 The array parsed (as a string). 

1551 

1552 Raises: 

1553 ParseError: If a byte array value couldn't be consumed. 

1554 """ 

1555 the_list = [self._ConsumeSingleByteString()] 

1556 while self.token and self.token[0] in _QUOTES: 

1557 the_list.append(self._ConsumeSingleByteString()) 

1558 return b''.join(the_list) 

1559 

1560 def _ConsumeSingleByteString(self): 

1561 """Consume one token of a string literal. 

1562 

1563 String literals (whether bytes or text) can come in multiple adjacent 

1564 tokens which are automatically concatenated, like in C or Python. This 

1565 method only consumes one token. 

1566 

1567 Returns: 

1568 The token parsed. 

1569 Raises: 

1570 ParseError: When the wrong format data is found. 

1571 """ 

1572 text = self.token 

1573 if len(text) < 1 or text[0] not in _QUOTES: 

1574 raise self.ParseError('Expected string but found: %r' % (text,)) 

1575 

1576 if len(text) < 2 or text[-1] != text[0]: 

1577 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1578 

1579 try: 

1580 result = text_encoding.CUnescape(text[1:-1]) 

1581 except ValueError as e: 

1582 raise self.ParseError(str(e)) 

1583 self.NextToken() 

1584 return result 

1585 

1586 def ConsumeEnum(self, field): 

1587 try: 

1588 result = ParseEnum(field, self.token) 

1589 except ValueError as e: 

1590 raise self.ParseError(str(e)) 

1591 self.NextToken() 

1592 return result 

1593 

1594 def ParseErrorPreviousToken(self, message): 

1595 """Creates and *returns* a ParseError for the previously read token. 

1596 

1597 Args: 

1598 message: A message to set for the exception. 

1599 

1600 Returns: 

1601 A ParseError instance. 

1602 """ 

1603 return ParseError(message, self._previous_line + 1, 

1604 self._previous_column + 1) 

1605 

1606 def ParseError(self, message): 

1607 """Creates and *returns* a ParseError for the current token.""" 

1608 return ParseError('\'' + self._current_line + '\': ' + message, 

1609 self._line + 1, self._column + 1) 

1610 

1611 def _StringParseError(self, e): 

1612 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1613 

1614 def NextToken(self): 

1615 """Reads the next meaningful token.""" 

1616 self._previous_line = self._line 

1617 self._previous_column = self._column 

1618 self.contains_silent_marker_before_current_token = False 

1619 

1620 self._column += len(self.token) 

1621 self._SkipWhitespace() 

1622 

1623 if not self._more_lines: 

1624 self.token = '' 

1625 return 

1626 

1627 match = self._TOKEN.match(self._current_line, self._column) 

1628 if not match and not self._skip_comments: 

1629 match = self._COMMENT.match(self._current_line, self._column) 

1630 if match: 

1631 token = match.group(0) 

1632 self.token = token 

1633 else: 

1634 self.token = self._current_line[self._column] 

1635 

1636# Aliased so it can still be accessed by current visibility violators. 

1637# TODO: Migrate violators to textformat_tokenizer. 

1638_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1639 

1640 

1641def _ConsumeInt32(tokenizer): 

1642 """Consumes a signed 32bit integer number from tokenizer. 

1643 

1644 Args: 

1645 tokenizer: A tokenizer used to parse the number. 

1646 

1647 Returns: 

1648 The integer parsed. 

1649 

1650 Raises: 

1651 ParseError: If a signed 32bit integer couldn't be consumed. 

1652 """ 

1653 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1654 

1655 

1656def _ConsumeUint32(tokenizer): 

1657 """Consumes an unsigned 32bit integer number from tokenizer. 

1658 

1659 Args: 

1660 tokenizer: A tokenizer used to parse the number. 

1661 

1662 Returns: 

1663 The integer parsed. 

1664 

1665 Raises: 

1666 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1667 """ 

1668 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1669 

1670 

1671def _TryConsumeInt64(tokenizer): 

1672 try: 

1673 _ConsumeInt64(tokenizer) 

1674 return True 

1675 except ParseError: 

1676 return False 

1677 

1678 

1679def _ConsumeInt64(tokenizer): 

1680 """Consumes a signed 32bit integer number from tokenizer. 

1681 

1682 Args: 

1683 tokenizer: A tokenizer used to parse the number. 

1684 

1685 Returns: 

1686 The integer parsed. 

1687 

1688 Raises: 

1689 ParseError: If a signed 32bit integer couldn't be consumed. 

1690 """ 

1691 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1692 

1693 

1694def _TryConsumeUint64(tokenizer): 

1695 try: 

1696 _ConsumeUint64(tokenizer) 

1697 return True 

1698 except ParseError: 

1699 return False 

1700 

1701 

1702def _ConsumeUint64(tokenizer): 

1703 """Consumes an unsigned 64bit integer number from tokenizer. 

1704 

1705 Args: 

1706 tokenizer: A tokenizer used to parse the number. 

1707 

1708 Returns: 

1709 The integer parsed. 

1710 

1711 Raises: 

1712 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1713 """ 

1714 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1715 

1716 

1717def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1718 """Consumes an integer number from tokenizer. 

1719 

1720 Args: 

1721 tokenizer: A tokenizer used to parse the number. 

1722 is_signed: True if a signed integer must be parsed. 

1723 is_long: True if a long integer must be parsed. 

1724 

1725 Returns: 

1726 The integer parsed. 

1727 

1728 Raises: 

1729 ParseError: If an integer with given characteristics couldn't be consumed. 

1730 """ 

1731 try: 

1732 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1733 except ValueError as e: 

1734 raise tokenizer.ParseError(str(e)) 

1735 tokenizer.NextToken() 

1736 return result 

1737 

1738 

1739def ParseInteger(text, is_signed=False, is_long=False): 

1740 """Parses an integer. 

1741 

1742 Args: 

1743 text: The text to parse. 

1744 is_signed: True if a signed integer must be parsed. 

1745 is_long: True if a long integer must be parsed. 

1746 

1747 Returns: 

1748 The integer value. 

1749 

1750 Raises: 

1751 ValueError: Thrown Iff the text is not a valid integer. 

1752 """ 

1753 # Do the actual parsing. Exception handling is propagated to caller. 

1754 result = _ParseAbstractInteger(text) 

1755 

1756 # Check if the integer is sane. Exceptions handled by callers. 

1757 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1758 checker.CheckValue(result) 

1759 return result 

1760 

1761 

1762def _ParseAbstractInteger(text): 

1763 """Parses an integer without checking size/signedness. 

1764 

1765 Args: 

1766 text: The text to parse. 

1767 

1768 Returns: 

1769 The integer value. 

1770 

1771 Raises: 

1772 ValueError: Thrown Iff the text is not a valid integer. 

1773 """ 

1774 # Do the actual parsing. Exception handling is propagated to caller. 

1775 orig_text = text 

1776 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1777 if c_octal_match: 

1778 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1779 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1780 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1781 try: 

1782 return int(text, 0) 

1783 except ValueError: 

1784 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1785 

1786 

1787def ParseFloat(text): 

1788 """Parse a floating point number. 

1789 

1790 Args: 

1791 text: Text to parse. 

1792 

1793 Returns: 

1794 The number parsed. 

1795 

1796 Raises: 

1797 ValueError: If a floating point number couldn't be parsed. 

1798 """ 

1799 if _FLOAT_OCTAL_PREFIX.match(text): 

1800 raise ValueError('Invalid octal float: %s' % text) 

1801 try: 

1802 # Assume Python compatible syntax. 

1803 return float(text) 

1804 except ValueError: 

1805 # Check alternative spellings. 

1806 if _FLOAT_INFINITY.match(text): 

1807 if text[0] == '-': 

1808 return float('-inf') 

1809 else: 

1810 return float('inf') 

1811 elif _FLOAT_NAN.match(text): 

1812 return float('nan') 

1813 else: 

1814 # assume '1.0f' format 

1815 try: 

1816 return float(text.rstrip('fF')) 

1817 except ValueError: 

1818 raise ValueError("Couldn't parse float: %s" % text) 

1819 

1820 

1821def ParseBool(text): 

1822 """Parse a boolean value. 

1823 

1824 Args: 

1825 text: Text to parse. 

1826 

1827 Returns: 

1828 Boolean values parsed 

1829 

1830 Raises: 

1831 ValueError: If text is not a valid boolean. 

1832 """ 

1833 if text in ('true', 't', '1', 'True'): 

1834 return True 

1835 elif text in ('false', 'f', '0', 'False'): 

1836 return False 

1837 else: 

1838 raise ValueError('Expected "true" or "false".') 

1839 

1840 

1841def ParseEnum(field, value): 

1842 """Parse an enum value. 

1843 

1844 The value can be specified by a number (the enum value), or by 

1845 a string literal (the enum name). 

1846 

1847 Args: 

1848 field: Enum field descriptor. 

1849 value: String value. 

1850 

1851 Returns: 

1852 Enum value number. 

1853 

1854 Raises: 

1855 ValueError: If the enum value could not be parsed. 

1856 """ 

1857 enum_descriptor = field.enum_type 

1858 try: 

1859 number = int(value, 0) 

1860 except ValueError: 

1861 # Identifier. 

1862 enum_value = enum_descriptor.values_by_name.get(value, None) 

1863 if enum_value is None: 

1864 raise ValueError('Enum type "%s" has no value named %s.' % 

1865 (enum_descriptor.full_name, value)) 

1866 else: 

1867 if not field.enum_type.is_closed: 

1868 return number 

1869 enum_value = enum_descriptor.values_by_number.get(number, None) 

1870 if enum_value is None: 

1871 raise ValueError('Enum type "%s" has no value with number %d.' % 

1872 (enum_descriptor.full_name, number)) 

1873 return enum_value.number