Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/protobuf/text_format.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

767 statements  

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# 

4# Use of this source code is governed by a BSD-style 

5# license that can be found in the LICENSE file or at 

6# https://developers.google.com/open-source/licenses/bsd 

7 

8"""Contains routines for printing protocol messages in text format. 

9 

10Simple usage example:: 

11 

12 # Create a proto object and serialize it to a text proto string. 

13 message = my_proto_pb2.MyMessage(foo='bar') 

14 text_proto = text_format.MessageToString(message) 

15 

16 # Parse a text proto string. 

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

18""" 

19 

20__author__ = 'kenton@google.com (Kenton Varda)' 

21 

22# TODO Import thread contention leads to test failures. 

23import encodings.raw_unicode_escape # pylint: disable=unused-import 

24import encodings.unicode_escape # pylint: disable=unused-import 

25import io 

26import math 

27import re 

28 

29from google.protobuf.internal import decoder 

30from google.protobuf.internal import type_checkers 

31from google.protobuf import descriptor 

32from google.protobuf import text_encoding 

33from google.protobuf import unknown_fields 

34 

35# pylint: disable=g-import-not-at-top 

36__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

37 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

38 

39_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

40 type_checkers.Int32ValueChecker(), 

41 type_checkers.Uint64ValueChecker(), 

42 type_checkers.Int64ValueChecker()) 

43_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

44_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

45_FLOAT_OCTAL_PREFIX = re.compile('-?0[0-9]+') 

46_QUOTES = frozenset(("'", '"')) 

47_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

48_DEBUG_STRING_SILENT_MARKER = '\t ' 

49 

50_as_utf8_default = True 

51 

52 

53class Error(Exception): 

54 """Top-level module error for text_format.""" 

55 

56 

57class ParseError(Error): 

58 """Thrown in case of text parsing or tokenizing error.""" 

59 

60 def __init__(self, message=None, line=None, column=None): 

61 if message is not None and line is not None: 

62 loc = str(line) 

63 if column is not None: 

64 loc += ':{0}'.format(column) 

65 message = '{0} : {1}'.format(loc, message) 

66 if message is not None: 

67 super(ParseError, self).__init__(message) 

68 else: 

69 super(ParseError, self).__init__() 

70 self._line = line 

71 self._column = column 

72 

73 def GetLine(self): 

74 return self._line 

75 

76 def GetColumn(self): 

77 return self._column 

78 

79 

80class TextWriter(object): 

81 

82 def __init__(self, as_utf8): 

83 self._writer = io.StringIO() 

84 

85 def write(self, val): 

86 return self._writer.write(val) 

87 

88 def close(self): 

89 return self._writer.close() 

90 

91 def getvalue(self): 

92 return self._writer.getvalue() 

93 

94 

95def MessageToString( 

96 message, 

97 as_utf8=_as_utf8_default, 

98 as_one_line=False, 

99 use_short_repeated_primitives=False, 

100 pointy_brackets=False, 

101 use_index_order=False, 

102 float_format=None, 

103 double_format=None, 

104 use_field_number=False, 

105 descriptor_pool=None, 

106 indent=0, 

107 message_formatter=None, 

108 print_unknown_fields=False, 

109 force_colon=False) -> str: 

110 """Convert protobuf message to text format. 

111 

112 Double values can be formatted compactly with 15 digits of 

113 precision (which is the most that IEEE 754 "double" can guarantee) 

114 using double_format='.15g'. To ensure that converting to text and back to a 

115 proto will result in an identical value, double_format='.17g' should be used. 

116 

117 Args: 

118 message: The protocol buffers message. 

119 as_utf8: Return unescaped Unicode for non-ASCII characters. 

120 as_one_line: Don't introduce newlines between fields. 

121 use_short_repeated_primitives: Use short repeated format for primitives. 

122 pointy_brackets: If True, use angle brackets instead of curly braces for 

123 nesting. 

124 use_index_order: If True, fields of a proto message will be printed using 

125 the order defined in source code instead of the field number, extensions 

126 will be printed at the end of the message and their relative order is 

127 determined by the extension number. By default, use the field number 

128 order. 

129 float_format (str): Deprecated. If set, use this to specify float field 

130 formatting (per the "Format Specification Mini-Language"); otherwise, 

131 shortest float that has same value in wire will be printed. Also affect 

132 double field if double_format is not set but float_format is set. 

133 double_format (str): Deprecated. If set, use this to specify double field 

134 formatting (per the "Format Specification Mini-Language"); if it is not 

135 set but float_format is set, use float_format. Otherwise, use ``str()`` 

136 use_field_number: If True, print field numbers instead of names. 

137 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

138 indent (int): The initial indent level, in terms of spaces, for pretty 

139 print. 

140 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

141 Custom formatter for selected sub-messages (usually based on message 

142 type). Use to pretty print parts of the protobuf for easier diffing. 

143 print_unknown_fields: If True, unknown fields will be printed. 

144 force_colon: If set, a colon will be added after the field name even if the 

145 field is a proto message. 

146 

147 Returns: 

148 str: A string of the text formatted protocol buffer message. 

149 """ 

150 out = TextWriter(as_utf8) 

151 printer = _Printer( 

152 out, 

153 indent, 

154 as_utf8, 

155 as_one_line, 

156 use_short_repeated_primitives, 

157 pointy_brackets, 

158 use_index_order, 

159 float_format, 

160 double_format, 

161 use_field_number, 

162 descriptor_pool, 

163 message_formatter, 

164 print_unknown_fields=print_unknown_fields, 

165 force_colon=force_colon) 

166 printer.PrintMessage(message) 

167 result = out.getvalue() 

168 out.close() 

169 if as_one_line: 

170 return result.rstrip() 

171 return result 

172 

173 

174def MessageToBytes(message, **kwargs) -> bytes: 

175 """Convert protobuf message to encoded text format. See MessageToString.""" 

176 text = MessageToString(message, **kwargs) 

177 if isinstance(text, bytes): 

178 return text 

179 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

180 return text.encode(codec) 

181 

182 

183def _IsMapEntry(field): 

184 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

185 field.message_type.has_options and 

186 field.message_type.GetOptions().map_entry) 

187 

188 

189def _IsGroupLike(field): 

190 """Determines if a field is consistent with a proto2 group. 

191 

192 Args: 

193 field: The field descriptor. 

194 

195 Returns: 

196 True if this field is group-like, false otherwise. 

197 """ 

198 # Groups are always tag-delimited. 

199 if field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

200 return False 

201 

202 # Group fields always are always the lowercase type name. 

203 if field.name != field.message_type.name.lower(): 

204 return False 

205 

206 if field.message_type.file != field.file: 

207 return False 

208 

209 # Group messages are always defined in the same scope as the field. File 

210 # level extensions will compare NULL == NULL here, which is why the file 

211 # comparison above is necessary to ensure both come from the same file. 

212 return ( 

213 field.message_type.containing_type == field.extension_scope 

214 if field.is_extension 

215 else field.message_type.containing_type == field.containing_type 

216 ) 

217 

218 

219def PrintMessage(message, 

220 out, 

221 indent=0, 

222 as_utf8=_as_utf8_default, 

223 as_one_line=False, 

224 use_short_repeated_primitives=False, 

225 pointy_brackets=False, 

226 use_index_order=False, 

227 float_format=None, 

228 double_format=None, 

229 use_field_number=False, 

230 descriptor_pool=None, 

231 message_formatter=None, 

232 print_unknown_fields=False, 

233 force_colon=False): 

234 """Convert the message to text format and write it to the out stream. 

235 

236 Args: 

237 message: The Message object to convert to text format. 

238 out: A file handle to write the message to. 

239 indent: The initial indent level for pretty print. 

240 as_utf8: Return unescaped Unicode for non-ASCII characters. 

241 as_one_line: Don't introduce newlines between fields. 

242 use_short_repeated_primitives: Use short repeated format for primitives. 

243 pointy_brackets: If True, use angle brackets instead of curly braces for 

244 nesting. 

245 use_index_order: If True, print fields of a proto message using the order 

246 defined in source code instead of the field number. By default, use the 

247 field number order. 

248 float_format: If set, use this to specify float field formatting 

249 (per the "Format Specification Mini-Language"); otherwise, shortest 

250 float that has same value in wire will be printed. Also affect double 

251 field if double_format is not set but float_format is set. 

252 double_format: If set, use this to specify double field formatting 

253 (per the "Format Specification Mini-Language"); if it is not set but 

254 float_format is set, use float_format. Otherwise, str() is used. 

255 use_field_number: If True, print field numbers instead of names. 

256 descriptor_pool: A DescriptorPool used to resolve Any types. 

257 message_formatter: A function(message, indent, as_one_line): unicode|None 

258 to custom format selected sub-messages (usually based on message type). 

259 Use to pretty print parts of the protobuf for easier diffing. 

260 print_unknown_fields: If True, unknown fields will be printed. 

261 force_colon: If set, a colon will be added after the field name even if 

262 the field is a proto message. 

263 """ 

264 printer = _Printer( 

265 out=out, indent=indent, as_utf8=as_utf8, 

266 as_one_line=as_one_line, 

267 use_short_repeated_primitives=use_short_repeated_primitives, 

268 pointy_brackets=pointy_brackets, 

269 use_index_order=use_index_order, 

270 float_format=float_format, 

271 double_format=double_format, 

272 use_field_number=use_field_number, 

273 descriptor_pool=descriptor_pool, 

274 message_formatter=message_formatter, 

275 print_unknown_fields=print_unknown_fields, 

276 force_colon=force_colon) 

277 printer.PrintMessage(message) 

278 

279 

280def PrintField(field, 

281 value, 

282 out, 

283 indent=0, 

284 as_utf8=_as_utf8_default, 

285 as_one_line=False, 

286 use_short_repeated_primitives=False, 

287 pointy_brackets=False, 

288 use_index_order=False, 

289 float_format=None, 

290 double_format=None, 

291 message_formatter=None, 

292 print_unknown_fields=False, 

293 force_colon=False): 

294 """Print a single field name/value pair.""" 

295 printer = _Printer(out, indent, as_utf8, as_one_line, 

296 use_short_repeated_primitives, pointy_brackets, 

297 use_index_order, float_format, double_format, 

298 message_formatter=message_formatter, 

299 print_unknown_fields=print_unknown_fields, 

300 force_colon=force_colon) 

301 printer.PrintField(field, value) 

302 

303 

304def PrintFieldValue(field, 

305 value, 

306 out, 

307 indent=0, 

308 as_utf8=_as_utf8_default, 

309 as_one_line=False, 

310 use_short_repeated_primitives=False, 

311 pointy_brackets=False, 

312 use_index_order=False, 

313 float_format=None, 

314 double_format=None, 

315 message_formatter=None, 

316 print_unknown_fields=False, 

317 force_colon=False): 

318 """Print a single field value (not including name).""" 

319 printer = _Printer(out, indent, as_utf8, as_one_line, 

320 use_short_repeated_primitives, pointy_brackets, 

321 use_index_order, float_format, double_format, 

322 message_formatter=message_formatter, 

323 print_unknown_fields=print_unknown_fields, 

324 force_colon=force_colon) 

325 printer.PrintFieldValue(field, value) 

326 

327 

328def _BuildMessageFromTypeName(type_name, descriptor_pool): 

329 """Returns a protobuf message instance. 

330 

331 Args: 

332 type_name: Fully-qualified protobuf message type name string. 

333 descriptor_pool: DescriptorPool instance. 

334 

335 Returns: 

336 A Message instance of type matching type_name, or None if the a Descriptor 

337 wasn't found matching type_name. 

338 """ 

339 # pylint: disable=g-import-not-at-top 

340 if descriptor_pool is None: 

341 from google.protobuf import descriptor_pool as pool_mod 

342 descriptor_pool = pool_mod.Default() 

343 from google.protobuf import message_factory 

344 try: 

345 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

346 except KeyError: 

347 return None 

348 message_type = message_factory.GetMessageClass(message_descriptor) 

349 return message_type() 

350 

351 

352# These values must match WireType enum in //google/protobuf/wire_format.h. 

353WIRETYPE_LENGTH_DELIMITED = 2 

354WIRETYPE_START_GROUP = 3 

355 

356 

357class _Printer(object): 

358 """Text format printer for protocol message.""" 

359 

360 def __init__( 

361 self, 

362 out, 

363 indent=0, 

364 as_utf8=_as_utf8_default, 

365 as_one_line=False, 

366 use_short_repeated_primitives=False, 

367 pointy_brackets=False, 

368 use_index_order=False, 

369 float_format=None, 

370 double_format=None, 

371 use_field_number=False, 

372 descriptor_pool=None, 

373 message_formatter=None, 

374 print_unknown_fields=False, 

375 force_colon=False): 

376 """Initialize the Printer. 

377 

378 Double values can be formatted compactly with 15 digits of precision 

379 (which is the most that IEEE 754 "double" can guarantee) using 

380 double_format='.15g'. To ensure that converting to text and back to a proto 

381 will result in an identical value, double_format='.17g' should be used. 

382 

383 Args: 

384 out: To record the text format result. 

385 indent: The initial indent level for pretty print. 

386 as_utf8: Return unescaped Unicode for non-ASCII characters. 

387 as_one_line: Don't introduce newlines between fields. 

388 use_short_repeated_primitives: Use short repeated format for primitives. 

389 pointy_brackets: If True, use angle brackets instead of curly braces for 

390 nesting. 

391 use_index_order: If True, print fields of a proto message using the order 

392 defined in source code instead of the field number. By default, use the 

393 field number order. 

394 float_format: Deprecated. If set, use this to specify float field 

395 formatting (per the "Format Specification Mini-Language"); otherwise, 

396 shortest float that has same value in wire will be printed. Also affect 

397 double field if double_format is not set but float_format is set. 

398 double_format: Deprecated. If set, use this to specify double field 

399 formatting (per the "Format Specification Mini-Language"); if it is not 

400 set but float_format is set, use float_format. Otherwise, str() is used. 

401 use_field_number: If True, print field numbers instead of names. 

402 descriptor_pool: A DescriptorPool used to resolve Any types. 

403 message_formatter: A function(message, indent, as_one_line): unicode|None 

404 to custom format selected sub-messages (usually based on message type). 

405 Use to pretty print parts of the protobuf for easier diffing. 

406 print_unknown_fields: If True, unknown fields will be printed. 

407 force_colon: If set, a colon will be added after the field name even if 

408 the field is a proto message. 

409 """ 

410 self.out = out 

411 self.indent = indent 

412 self.as_utf8 = as_utf8 

413 self.as_one_line = as_one_line 

414 self.use_short_repeated_primitives = use_short_repeated_primitives 

415 self.pointy_brackets = pointy_brackets 

416 self.use_index_order = use_index_order 

417 self.float_format = float_format 

418 if double_format is not None: 

419 self.double_format = double_format 

420 else: 

421 self.double_format = float_format 

422 self.use_field_number = use_field_number 

423 self.descriptor_pool = descriptor_pool 

424 self.message_formatter = message_formatter 

425 self.print_unknown_fields = print_unknown_fields 

426 self.force_colon = force_colon 

427 

428 def _TryPrintAsAnyMessage(self, message): 

429 """Serializes if message is a google.protobuf.Any field.""" 

430 if '/' not in message.type_url: 

431 return False 

432 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

433 self.descriptor_pool) 

434 if packed_message is not None: 

435 packed_message.MergeFromString(message.value) 

436 colon = ':' if self.force_colon else '' 

437 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

438 self._PrintMessageFieldValue(packed_message) 

439 self.out.write(' ' if self.as_one_line else '\n') 

440 return True 

441 else: 

442 return False 

443 

444 def _TryCustomFormatMessage(self, message): 

445 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

446 if formatted is None: 

447 return False 

448 

449 out = self.out 

450 out.write(' ' * self.indent) 

451 out.write(formatted) 

452 out.write(' ' if self.as_one_line else '\n') 

453 return True 

454 

455 def PrintMessage(self, message): 

456 """Convert protobuf message to text format. 

457 

458 Args: 

459 message: The protocol buffers message. 

460 """ 

461 if self.message_formatter and self._TryCustomFormatMessage(message): 

462 return 

463 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

464 self._TryPrintAsAnyMessage(message)): 

465 return 

466 fields = message.ListFields() 

467 if self.use_index_order: 

468 fields.sort( 

469 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

470 for field, value in fields: 

471 if _IsMapEntry(field): 

472 for key in sorted(value): 

473 # This is slow for maps with submessage entries because it copies the 

474 # entire tree. Unfortunately this would take significant refactoring 

475 # of this file to work around. 

476 # 

477 # TODO: refactor and optimize if this becomes an issue. 

478 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

479 self.PrintField(field, entry_submsg) 

480 elif field.is_repeated: 

481 if (self.use_short_repeated_primitives 

482 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

483 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

484 self._PrintShortRepeatedPrimitivesValue(field, value) 

485 else: 

486 for element in value: 

487 self.PrintField(field, element) 

488 else: 

489 self.PrintField(field, value) 

490 

491 if self.print_unknown_fields: 

492 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

493 

494 def _PrintUnknownFields(self, unknown_field_set): 

495 """Print unknown fields.""" 

496 out = self.out 

497 for field in unknown_field_set: 

498 out.write(' ' * self.indent) 

499 out.write(str(field.field_number)) 

500 if field.wire_type == WIRETYPE_START_GROUP: 

501 if self.as_one_line: 

502 out.write(' { ') 

503 else: 

504 out.write(' {\n') 

505 self.indent += 2 

506 

507 self._PrintUnknownFields(field.data) 

508 

509 if self.as_one_line: 

510 out.write('} ') 

511 else: 

512 self.indent -= 2 

513 out.write(' ' * self.indent + '}\n') 

514 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

515 try: 

516 # If this field is parseable as a Message, it is probably 

517 # an embedded message. 

518 # pylint: disable=protected-access 

519 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

520 memoryview(field.data), 0, len(field.data)) 

521 except Exception: # pylint: disable=broad-except 

522 pos = 0 

523 

524 if pos == len(field.data): 

525 if self.as_one_line: 

526 out.write(' { ') 

527 else: 

528 out.write(' {\n') 

529 self.indent += 2 

530 

531 self._PrintUnknownFields(embedded_unknown_message) 

532 

533 if self.as_one_line: 

534 out.write('} ') 

535 else: 

536 self.indent -= 2 

537 out.write(' ' * self.indent + '}\n') 

538 else: 

539 # A string or bytes field. self.as_utf8 may not work. 

540 out.write(': \"') 

541 out.write(text_encoding.CEscape(field.data, False)) 

542 out.write('\" ' if self.as_one_line else '\"\n') 

543 else: 

544 # varint, fixed32, fixed64 

545 out.write(': ') 

546 out.write(str(field.data)) 

547 out.write(' ' if self.as_one_line else '\n') 

548 

549 def _PrintFieldName(self, field): 

550 """Print field name.""" 

551 out = self.out 

552 out.write(' ' * self.indent) 

553 if self.use_field_number: 

554 out.write(str(field.number)) 

555 else: 

556 if field.is_extension: 

557 out.write('[') 

558 if (field.containing_type.GetOptions().message_set_wire_format and 

559 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

560 not field.is_required and 

561 not field.is_repeated): 

562 out.write(field.message_type.full_name) 

563 else: 

564 out.write(field.full_name) 

565 out.write(']') 

566 elif _IsGroupLike(field): 

567 # For groups, use the capitalized name. 

568 out.write(field.message_type.name) 

569 else: 

570 out.write(field.name) 

571 

572 if (self.force_colon or 

573 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

574 # The colon is optional in this case, but our cross-language golden files 

575 # don't include it. Here, the colon is only included if force_colon is 

576 # set to True 

577 out.write(':') 

578 

579 def PrintField(self, field, value): 

580 """Print a single field name/value pair.""" 

581 self._PrintFieldName(field) 

582 self.out.write(' ') 

583 self.PrintFieldValue(field, value) 

584 self.out.write(' ' if self.as_one_line else '\n') 

585 

586 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

587 """"Prints short repeated primitives value.""" 

588 # Note: this is called only when value has at least one element. 

589 self._PrintFieldName(field) 

590 self.out.write(' [') 

591 for i in range(len(value) - 1): 

592 self.PrintFieldValue(field, value[i]) 

593 self.out.write(', ') 

594 self.PrintFieldValue(field, value[-1]) 

595 self.out.write(']') 

596 self.out.write(' ' if self.as_one_line else '\n') 

597 

598 def _PrintMessageFieldValue(self, value): 

599 if self.pointy_brackets: 

600 openb = '<' 

601 closeb = '>' 

602 else: 

603 openb = '{' 

604 closeb = '}' 

605 

606 if self.as_one_line: 

607 self.out.write('%s ' % openb) 

608 self.PrintMessage(value) 

609 self.out.write(closeb) 

610 else: 

611 self.out.write('%s\n' % openb) 

612 self.indent += 2 

613 self.PrintMessage(value) 

614 self.indent -= 2 

615 self.out.write(' ' * self.indent + closeb) 

616 

617 def PrintFieldValue(self, field, value): 

618 """Print a single field value (not including name). 

619 

620 For repeated fields, the value should be a single element. 

621 

622 Args: 

623 field: The descriptor of the field to be printed. 

624 value: The value of the field. 

625 """ 

626 out = self.out 

627 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

628 self._PrintMessageFieldValue(value) 

629 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

630 enum_value = field.enum_type.values_by_number.get(value, None) 

631 if enum_value is not None: 

632 out.write(enum_value.name) 

633 else: 

634 out.write(str(value)) 

635 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

636 out.write('\"') 

637 if isinstance(value, str) and not self.as_utf8: 

638 out_value = value.encode('utf-8') 

639 else: 

640 out_value = value 

641 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

642 # We always need to escape all binary data in TYPE_BYTES fields. 

643 out_as_utf8 = False 

644 else: 

645 out_as_utf8 = self.as_utf8 

646 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

647 out.write('\"') 

648 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

649 if value: 

650 out.write('true') 

651 else: 

652 out.write('false') 

653 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

654 if self.float_format is not None: 

655 out.write('{1:{0}}'.format(self.float_format, value)) 

656 else: 

657 if math.isnan(value): 

658 out.write(str(value)) 

659 else: 

660 out.write(str(type_checkers.ToShortestFloat(value))) 

661 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 

662 self.double_format is not None): 

663 out.write('{1:{0}}'.format(self.double_format, value)) 

664 else: 

665 out.write(str(value)) 

666 

667 

668def Parse(text, 

669 message, 

670 allow_unknown_extension=False, 

671 allow_field_number=False, 

672 descriptor_pool=None, 

673 allow_unknown_field=False): 

674 """Parses a text representation of a protocol message into a message. 

675 

676 NOTE: for historical reasons this function does not clear the input 

677 message. This is different from what the binary msg.ParseFrom(...) does. 

678 If text contains a field already set in message, the value is appended if the 

679 field is repeated. Otherwise, an error is raised. 

680 

681 Example:: 

682 

683 a = MyProto() 

684 a.repeated_field.append('test') 

685 b = MyProto() 

686 

687 # Repeated fields are combined 

688 text_format.Parse(repr(a), b) 

689 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

690 

691 # Non-repeated fields cannot be overwritten 

692 a.singular_field = 1 

693 b.singular_field = 2 

694 text_format.Parse(repr(a), b) # ParseError 

695 

696 # Binary version: 

697 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

698 

699 Caller is responsible for clearing the message as needed. 

700 

701 Args: 

702 text (str): Message text representation. 

703 message (Message): A protocol buffer message to merge into. 

704 allow_unknown_extension: if True, skip over missing extensions and keep 

705 parsing 

706 allow_field_number: if True, both field number and field name are allowed. 

707 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

708 allow_unknown_field: if True, skip over unknown field and keep 

709 parsing. Avoid to use this option if possible. It may hide some 

710 errors (e.g. spelling error on field name) 

711 

712 Returns: 

713 Message: The same message passed as argument. 

714 

715 Raises: 

716 ParseError: On text parsing problems. 

717 """ 

718 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

719 message, 

720 allow_unknown_extension, 

721 allow_field_number, 

722 descriptor_pool=descriptor_pool, 

723 allow_unknown_field=allow_unknown_field) 

724 

725 

726def Merge(text, 

727 message, 

728 allow_unknown_extension=False, 

729 allow_field_number=False, 

730 descriptor_pool=None, 

731 allow_unknown_field=False): 

732 """Parses a text representation of a protocol message into a message. 

733 

734 Like Parse(), but allows repeated values for a non-repeated field, and uses 

735 the last one. This means any non-repeated, top-level fields specified in text 

736 replace those in the message. 

737 

738 Args: 

739 text (str): Message text representation. 

740 message (Message): A protocol buffer message to merge into. 

741 allow_unknown_extension: if True, skip over missing extensions and keep 

742 parsing 

743 allow_field_number: if True, both field number and field name are allowed. 

744 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

745 allow_unknown_field: if True, skip over unknown field and keep 

746 parsing. Avoid to use this option if possible. It may hide some 

747 errors (e.g. spelling error on field name) 

748 

749 Returns: 

750 Message: The same message passed as argument. 

751 

752 Raises: 

753 ParseError: On text parsing problems. 

754 """ 

755 return MergeLines( 

756 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

757 message, 

758 allow_unknown_extension, 

759 allow_field_number, 

760 descriptor_pool=descriptor_pool, 

761 allow_unknown_field=allow_unknown_field) 

762 

763 

764def ParseLines(lines, 

765 message, 

766 allow_unknown_extension=False, 

767 allow_field_number=False, 

768 descriptor_pool=None, 

769 allow_unknown_field=False): 

770 """Parses a text representation of a protocol message into a message. 

771 

772 See Parse() for caveats. 

773 

774 Args: 

775 lines: An iterable of lines of a message's text representation. 

776 message: A protocol buffer message to merge into. 

777 allow_unknown_extension: if True, skip over missing extensions and keep 

778 parsing 

779 allow_field_number: if True, both field number and field name are allowed. 

780 descriptor_pool: A DescriptorPool used to resolve Any types. 

781 allow_unknown_field: if True, skip over unknown field and keep 

782 parsing. Avoid to use this option if possible. It may hide some 

783 errors (e.g. spelling error on field name) 

784 

785 Returns: 

786 The same message passed as argument. 

787 

788 Raises: 

789 ParseError: On text parsing problems. 

790 """ 

791 parser = _Parser(allow_unknown_extension, 

792 allow_field_number, 

793 descriptor_pool=descriptor_pool, 

794 allow_unknown_field=allow_unknown_field) 

795 return parser.ParseLines(lines, message) 

796 

797 

798def MergeLines(lines, 

799 message, 

800 allow_unknown_extension=False, 

801 allow_field_number=False, 

802 descriptor_pool=None, 

803 allow_unknown_field=False): 

804 """Parses a text representation of a protocol message into a message. 

805 

806 See Merge() for more details. 

807 

808 Args: 

809 lines: An iterable of lines of a message's text representation. 

810 message: A protocol buffer message to merge into. 

811 allow_unknown_extension: if True, skip over missing extensions and keep 

812 parsing 

813 allow_field_number: if True, both field number and field name are allowed. 

814 descriptor_pool: A DescriptorPool used to resolve Any types. 

815 allow_unknown_field: if True, skip over unknown field and keep 

816 parsing. Avoid to use this option if possible. It may hide some 

817 errors (e.g. spelling error on field name) 

818 

819 Returns: 

820 The same message passed as argument. 

821 

822 Raises: 

823 ParseError: On text parsing problems. 

824 """ 

825 parser = _Parser(allow_unknown_extension, 

826 allow_field_number, 

827 descriptor_pool=descriptor_pool, 

828 allow_unknown_field=allow_unknown_field) 

829 return parser.MergeLines(lines, message) 

830 

831 

832class _Parser(object): 

833 """Text format parser for protocol message.""" 

834 

835 def __init__(self, 

836 allow_unknown_extension=False, 

837 allow_field_number=False, 

838 descriptor_pool=None, 

839 allow_unknown_field=False): 

840 self.allow_unknown_extension = allow_unknown_extension 

841 self.allow_field_number = allow_field_number 

842 self.descriptor_pool = descriptor_pool 

843 self.allow_unknown_field = allow_unknown_field 

844 

845 def ParseLines(self, lines, message): 

846 """Parses a text representation of a protocol message into a message.""" 

847 self._allow_multiple_scalars = False 

848 self._ParseOrMerge(lines, message) 

849 return message 

850 

851 def MergeLines(self, lines, message): 

852 """Merges a text representation of a protocol message into a message.""" 

853 self._allow_multiple_scalars = True 

854 self._ParseOrMerge(lines, message) 

855 return message 

856 

857 def _ParseOrMerge(self, lines, message): 

858 """Converts a text representation of a protocol message into a message. 

859 

860 Args: 

861 lines: Lines of a message's text representation. 

862 message: A protocol buffer message to merge into. 

863 

864 Raises: 

865 ParseError: On text parsing problems. 

866 """ 

867 # Tokenize expects native str lines. 

868 try: 

869 str_lines = ( 

870 line if isinstance(line, str) else line.decode('utf-8') 

871 for line in lines) 

872 tokenizer = Tokenizer(str_lines) 

873 except UnicodeDecodeError as e: 

874 raise ParseError from e 

875 if message: 

876 self.root_type = message.DESCRIPTOR.full_name 

877 while not tokenizer.AtEnd(): 

878 self._MergeField(tokenizer, message) 

879 

880 def _MergeField(self, tokenizer, message): 

881 """Merges a single protocol message field into a message. 

882 

883 Args: 

884 tokenizer: A tokenizer to parse the field name and values. 

885 message: A protocol message to record the data. 

886 

887 Raises: 

888 ParseError: In case of text parsing problems. 

889 """ 

890 message_descriptor = message.DESCRIPTOR 

891 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

892 tokenizer.TryConsume('[')): 

893 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

894 tokenizer.Consume(']') 

895 tokenizer.TryConsume(':') 

896 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

897 type_url_prefix + '/' + packed_type_name) 

898 if tokenizer.TryConsume('<'): 

899 expanded_any_end_token = '>' 

900 else: 

901 tokenizer.Consume('{') 

902 expanded_any_end_token = '}' 

903 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

904 self.descriptor_pool) 

905 # Direct comparison with None is used instead of implicit bool conversion 

906 # to avoid false positives with falsy initial values, e.g. for 

907 # google.protobuf.ListValue. 

908 if expanded_any_sub_message is None: 

909 raise ParseError('Type %s not found in descriptor pool' % 

910 packed_type_name) 

911 while not tokenizer.TryConsume(expanded_any_end_token): 

912 if tokenizer.AtEnd(): 

913 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 

914 (expanded_any_end_token,)) 

915 self._MergeField(tokenizer, expanded_any_sub_message) 

916 deterministic = False 

917 

918 message.Pack(expanded_any_sub_message, 

919 type_url_prefix=type_url_prefix, 

920 deterministic=deterministic) 

921 return 

922 

923 if tokenizer.TryConsume('['): 

924 name = [tokenizer.ConsumeIdentifier()] 

925 while tokenizer.TryConsume('.'): 

926 name.append(tokenizer.ConsumeIdentifier()) 

927 name = '.'.join(name) 

928 

929 if not message_descriptor.is_extendable: 

930 raise tokenizer.ParseErrorPreviousToken( 

931 'Message type "%s" does not have extensions.' % 

932 message_descriptor.full_name) 

933 # pylint: disable=protected-access 

934 field = message.Extensions._FindExtensionByName(name) 

935 # pylint: enable=protected-access 

936 if not field: 

937 if self.allow_unknown_extension: 

938 field = None 

939 else: 

940 raise tokenizer.ParseErrorPreviousToken( 

941 'Extension "%s" not registered. ' 

942 'Did you import the _pb2 module which defines it? ' 

943 'If you are trying to place the extension in the MessageSet ' 

944 'field of another message that is in an Any or MessageSet field, ' 

945 'that message\'s _pb2 module must be imported as well' % name) 

946 elif message_descriptor != field.containing_type: 

947 raise tokenizer.ParseErrorPreviousToken( 

948 'Extension "%s" does not extend message type "%s".' % 

949 (name, message_descriptor.full_name)) 

950 

951 tokenizer.Consume(']') 

952 

953 else: 

954 name = tokenizer.ConsumeIdentifierOrNumber() 

955 if self.allow_field_number and name.isdigit(): 

956 number = ParseInteger(name, True, True) 

957 field = message_descriptor.fields_by_number.get(number, None) 

958 if not field and message_descriptor.is_extendable: 

959 field = message.Extensions._FindExtensionByNumber(number) 

960 else: 

961 field = message_descriptor.fields_by_name.get(name, None) 

962 

963 # Group names are expected to be capitalized as they appear in the 

964 # .proto file, which actually matches their type names, not their field 

965 # names. 

966 if not field: 

967 field = message_descriptor.fields_by_name.get(name.lower(), None) 

968 if field and not _IsGroupLike(field): 

969 field = None 

970 if field and field.message_type.name != name: 

971 field = None 

972 

973 if not field and not self.allow_unknown_field: 

974 raise tokenizer.ParseErrorPreviousToken( 

975 'Message type "%s" has no field named "%s".' % 

976 (message_descriptor.full_name, name)) 

977 

978 if field: 

979 if not self._allow_multiple_scalars and field.containing_oneof: 

980 # Check if there's a different field set in this oneof. 

981 # Note that we ignore the case if the same field was set before, and we 

982 # apply _allow_multiple_scalars to non-scalar fields as well. 

983 which_oneof = message.WhichOneof(field.containing_oneof.name) 

984 if which_oneof is not None and which_oneof != field.name: 

985 raise tokenizer.ParseErrorPreviousToken( 

986 'Field "%s" is specified along with field "%s", another member ' 

987 'of oneof "%s" for message type "%s".' % 

988 (field.name, which_oneof, field.containing_oneof.name, 

989 message_descriptor.full_name)) 

990 

991 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

992 tokenizer.TryConsume(':') 

993 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

994 field.full_name) 

995 merger = self._MergeMessageField 

996 else: 

997 tokenizer.Consume(':') 

998 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

999 field.full_name) 

1000 merger = self._MergeScalarField 

1001 

1002 if (field.is_repeated and 

1003 tokenizer.TryConsume('[')): 

1004 # Short repeated format, e.g. "foo: [1, 2, 3]" 

1005 if not tokenizer.TryConsume(']'): 

1006 while True: 

1007 merger(tokenizer, message, field) 

1008 if tokenizer.TryConsume(']'): 

1009 break 

1010 tokenizer.Consume(',') 

1011 

1012 else: 

1013 merger(tokenizer, message, field) 

1014 

1015 else: # Proto field is unknown. 

1016 assert (self.allow_unknown_extension or self.allow_unknown_field) 

1017 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

1018 

1019 # For historical reasons, fields may optionally be separated by commas or 

1020 # semicolons. 

1021 if not tokenizer.TryConsume(','): 

1022 tokenizer.TryConsume(';') 

1023 

1024 def _LogSilentMarker(self, immediate_message_type, field_name): 

1025 pass 

1026 

1027 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

1028 if tokenizer.contains_silent_marker_before_current_token: 

1029 self._LogSilentMarker(immediate_message_type, field_name) 

1030 

1031 def _ConsumeAnyTypeUrl(self, tokenizer): 

1032 """Consumes a google.protobuf.Any type URL and returns the type name.""" 

1033 # Consume "type.googleapis.com/". 

1034 prefix = [tokenizer.ConsumeIdentifier()] 

1035 tokenizer.Consume('.') 

1036 prefix.append(tokenizer.ConsumeIdentifier()) 

1037 tokenizer.Consume('.') 

1038 prefix.append(tokenizer.ConsumeIdentifier()) 

1039 tokenizer.Consume('/') 

1040 # Consume the fully-qualified type name. 

1041 name = [tokenizer.ConsumeIdentifier()] 

1042 while tokenizer.TryConsume('.'): 

1043 name.append(tokenizer.ConsumeIdentifier()) 

1044 return '.'.join(prefix), '.'.join(name) 

1045 

1046 def _MergeMessageField(self, tokenizer, message, field): 

1047 """Merges a single scalar field into a message. 

1048 

1049 Args: 

1050 tokenizer: A tokenizer to parse the field value. 

1051 message: The message of which field is a member. 

1052 field: The descriptor of the field to be merged. 

1053 

1054 Raises: 

1055 ParseError: In case of text parsing problems. 

1056 """ 

1057 is_map_entry = _IsMapEntry(field) 

1058 

1059 if tokenizer.TryConsume('<'): 

1060 end_token = '>' 

1061 else: 

1062 tokenizer.Consume('{') 

1063 end_token = '}' 

1064 

1065 if field.is_repeated: 

1066 if field.is_extension: 

1067 sub_message = message.Extensions[field].add() 

1068 elif is_map_entry: 

1069 sub_message = getattr(message, field.name).GetEntryClass()() 

1070 else: 

1071 sub_message = getattr(message, field.name).add() 

1072 else: 

1073 if field.is_extension: 

1074 if (not self._allow_multiple_scalars and 

1075 message.HasExtension(field)): 

1076 raise tokenizer.ParseErrorPreviousToken( 

1077 'Message type "%s" should not have multiple "%s" extensions.' % 

1078 (message.DESCRIPTOR.full_name, field.full_name)) 

1079 sub_message = message.Extensions[field] 

1080 else: 

1081 # Also apply _allow_multiple_scalars to message field. 

1082 # TODO: Change to _allow_singular_overwrites. 

1083 if (not self._allow_multiple_scalars and 

1084 message.HasField(field.name)): 

1085 raise tokenizer.ParseErrorPreviousToken( 

1086 'Message type "%s" should not have multiple "%s" fields.' % 

1087 (message.DESCRIPTOR.full_name, field.name)) 

1088 sub_message = getattr(message, field.name) 

1089 sub_message.SetInParent() 

1090 

1091 while not tokenizer.TryConsume(end_token): 

1092 if tokenizer.AtEnd(): 

1093 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 

1094 self._MergeField(tokenizer, sub_message) 

1095 

1096 if is_map_entry: 

1097 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1098 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1099 value = getattr(message, field.name)[sub_message.key] 

1100 value.CopyFrom(sub_message.value) 

1101 else: 

1102 getattr(message, field.name)[sub_message.key] = sub_message.value 

1103 

1104 def _MergeScalarField(self, tokenizer, message, field): 

1105 """Merges a single scalar field into a message. 

1106 

1107 Args: 

1108 tokenizer: A tokenizer to parse the field value. 

1109 message: A protocol message to record the data. 

1110 field: The descriptor of the field to be merged. 

1111 

1112 Raises: 

1113 ParseError: In case of text parsing problems. 

1114 RuntimeError: On runtime errors. 

1115 """ 

1116 _ = self.allow_unknown_extension 

1117 value = None 

1118 

1119 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1120 descriptor.FieldDescriptor.TYPE_SINT32, 

1121 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1122 value = _ConsumeInt32(tokenizer) 

1123 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1124 descriptor.FieldDescriptor.TYPE_SINT64, 

1125 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1126 value = _ConsumeInt64(tokenizer) 

1127 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1128 descriptor.FieldDescriptor.TYPE_FIXED32): 

1129 value = _ConsumeUint32(tokenizer) 

1130 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1131 descriptor.FieldDescriptor.TYPE_FIXED64): 

1132 value = _ConsumeUint64(tokenizer) 

1133 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1134 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1135 value = tokenizer.ConsumeFloat() 

1136 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1137 value = tokenizer.ConsumeBool() 

1138 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1139 value = tokenizer.ConsumeString() 

1140 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1141 value = tokenizer.ConsumeByteString() 

1142 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1143 value = tokenizer.ConsumeEnum(field) 

1144 else: 

1145 raise RuntimeError('Unknown field type %d' % field.type) 

1146 

1147 if field.is_repeated: 

1148 if field.is_extension: 

1149 message.Extensions[field].append(value) 

1150 else: 

1151 getattr(message, field.name).append(value) 

1152 else: 

1153 if field.is_extension: 

1154 if (not self._allow_multiple_scalars and 

1155 field.has_presence and 

1156 message.HasExtension(field)): 

1157 raise tokenizer.ParseErrorPreviousToken( 

1158 'Message type "%s" should not have multiple "%s" extensions.' % 

1159 (message.DESCRIPTOR.full_name, field.full_name)) 

1160 else: 

1161 message.Extensions[field] = value 

1162 else: 

1163 duplicate_error = False 

1164 if not self._allow_multiple_scalars: 

1165 if field.has_presence: 

1166 duplicate_error = message.HasField(field.name) 

1167 else: 

1168 # For field that doesn't represent presence, try best effort to 

1169 # check multiple scalars by compare to default values. 

1170 duplicate_error = not decoder.IsDefaultScalarValue( 

1171 getattr(message, field.name) 

1172 ) 

1173 

1174 if duplicate_error: 

1175 raise tokenizer.ParseErrorPreviousToken( 

1176 'Message type "%s" should not have multiple "%s" fields.' % 

1177 (message.DESCRIPTOR.full_name, field.name)) 

1178 else: 

1179 setattr(message, field.name, value) 

1180 

1181 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1182 """Skips over contents (value or message) of a field. 

1183 

1184 Args: 

1185 tokenizer: A tokenizer to parse the field name and values. 

1186 field_name: The field name currently being parsed. 

1187 immediate_message_type: The type of the message immediately containing 

1188 the silent marker. 

1189 """ 

1190 # Try to guess the type of this field. 

1191 # If this field is not a message, there should be a ":" between the 

1192 # field name and the field value and also the field value should not 

1193 # start with "{" or "<" which indicates the beginning of a message body. 

1194 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1195 # to be a message or the input is ill-formed. 

1196 if tokenizer.TryConsume( 

1197 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1198 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1199 if tokenizer.LookingAt('['): 

1200 self._SkipRepeatedFieldValue(tokenizer, immediate_message_type) 

1201 else: 

1202 self._SkipFieldValue(tokenizer) 

1203 else: 

1204 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1205 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1206 

1207 def _SkipField(self, tokenizer, immediate_message_type): 

1208 """Skips over a complete field (name and value/message). 

1209 

1210 Args: 

1211 tokenizer: A tokenizer to parse the field name and values. 

1212 immediate_message_type: The type of the message immediately containing 

1213 the silent marker. 

1214 """ 

1215 field_name = '' 

1216 if tokenizer.TryConsume('['): 

1217 # Consume extension or google.protobuf.Any type URL 

1218 field_name += '[' + tokenizer.ConsumeIdentifier() 

1219 num_identifiers = 1 

1220 while tokenizer.TryConsume('.'): 

1221 field_name += '.' + tokenizer.ConsumeIdentifier() 

1222 num_identifiers += 1 

1223 # This is possibly a type URL for an Any message. 

1224 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1225 field_name += '/' + tokenizer.ConsumeIdentifier() 

1226 while tokenizer.TryConsume('.'): 

1227 field_name += '.' + tokenizer.ConsumeIdentifier() 

1228 tokenizer.Consume(']') 

1229 field_name += ']' 

1230 else: 

1231 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1232 

1233 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1234 

1235 # For historical reasons, fields may optionally be separated by commas or 

1236 # semicolons. 

1237 if not tokenizer.TryConsume(','): 

1238 tokenizer.TryConsume(';') 

1239 

1240 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1241 """Skips over a field message. 

1242 

1243 Args: 

1244 tokenizer: A tokenizer to parse the field name and values. 

1245 immediate_message_type: The type of the message immediately containing 

1246 the silent marker 

1247 """ 

1248 if tokenizer.TryConsume('<'): 

1249 delimiter = '>' 

1250 else: 

1251 tokenizer.Consume('{') 

1252 delimiter = '}' 

1253 

1254 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1255 self._SkipField(tokenizer, immediate_message_type) 

1256 

1257 tokenizer.Consume(delimiter) 

1258 

1259 def _SkipFieldValue(self, tokenizer): 

1260 """Skips over a field value. 

1261 

1262 Args: 

1263 tokenizer: A tokenizer to parse the field name and values. 

1264 

1265 Raises: 

1266 ParseError: In case an invalid field value is found. 

1267 """ 

1268 if (not tokenizer.TryConsumeByteString()and 

1269 not tokenizer.TryConsumeIdentifier() and 

1270 not _TryConsumeInt64(tokenizer) and 

1271 not _TryConsumeUint64(tokenizer) and 

1272 not tokenizer.TryConsumeFloat()): 

1273 raise ParseError('Invalid field value: ' + tokenizer.token) 

1274 

1275 def _SkipRepeatedFieldValue(self, tokenizer, immediate_message_type): 

1276 """Skips over a repeated field value. 

1277 

1278 Args: 

1279 tokenizer: A tokenizer to parse the field value. 

1280 """ 

1281 tokenizer.Consume('[') 

1282 if not tokenizer.TryConsume(']'): 

1283 while True: 

1284 if tokenizer.LookingAt('<') or tokenizer.LookingAt('{'): 

1285 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1286 else: 

1287 self._SkipFieldValue(tokenizer) 

1288 if tokenizer.TryConsume(']'): 

1289 break 

1290 tokenizer.Consume(',') 

1291 

1292 

1293class Tokenizer(object): 

1294 """Protocol buffer text representation tokenizer. 

1295 

1296 This class handles the lower level string parsing by splitting it into 

1297 meaningful tokens. 

1298 

1299 It was directly ported from the Java protocol buffer API. 

1300 """ 

1301 

1302 _WHITESPACE = re.compile(r'\s+') 

1303 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1304 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1305 _TOKEN = re.compile('|'.join([ 

1306 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1307 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1308 ] + [ # quoted str for each quote mark 

1309 # Avoid backtracking! https://stackoverflow.com/a/844267 

1310 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 

1311 for mark in _QUOTES 

1312 ])) 

1313 

1314 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1315 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1316 

1317 def __init__(self, lines, skip_comments=True): 

1318 self._position = 0 

1319 self._line = -1 

1320 self._column = 0 

1321 self._token_start = None 

1322 self.token = '' 

1323 self._lines = iter(lines) 

1324 self._current_line = '' 

1325 self._previous_line = 0 

1326 self._previous_column = 0 

1327 self._more_lines = True 

1328 self._skip_comments = skip_comments 

1329 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1330 or self._WHITESPACE) 

1331 self.contains_silent_marker_before_current_token = False 

1332 

1333 self._SkipWhitespace() 

1334 self.NextToken() 

1335 

1336 def LookingAt(self, token): 

1337 return self.token == token 

1338 

1339 def AtEnd(self): 

1340 """Checks the end of the text was reached. 

1341 

1342 Returns: 

1343 True iff the end was reached. 

1344 """ 

1345 return not self.token 

1346 

1347 def _PopLine(self): 

1348 while len(self._current_line) <= self._column: 

1349 try: 

1350 self._current_line = next(self._lines) 

1351 except StopIteration: 

1352 self._current_line = '' 

1353 self._more_lines = False 

1354 return 

1355 else: 

1356 self._line += 1 

1357 self._column = 0 

1358 

1359 def _SkipWhitespace(self): 

1360 while True: 

1361 self._PopLine() 

1362 match = self._whitespace_pattern.match(self._current_line, self._column) 

1363 if not match: 

1364 break 

1365 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1366 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1367 length = len(match.group(0)) 

1368 self._column += length 

1369 

1370 def TryConsume(self, token): 

1371 """Tries to consume a given piece of text. 

1372 

1373 Args: 

1374 token: Text to consume. 

1375 

1376 Returns: 

1377 True iff the text was consumed. 

1378 """ 

1379 if self.token == token: 

1380 self.NextToken() 

1381 return True 

1382 return False 

1383 

1384 def Consume(self, token): 

1385 """Consumes a piece of text. 

1386 

1387 Args: 

1388 token: Text to consume. 

1389 

1390 Raises: 

1391 ParseError: If the text couldn't be consumed. 

1392 """ 

1393 if not self.TryConsume(token): 

1394 raise self.ParseError('Expected "%s".' % token) 

1395 

1396 def ConsumeComment(self): 

1397 result = self.token 

1398 if not self._COMMENT.match(result): 

1399 raise self.ParseError('Expected comment.') 

1400 self.NextToken() 

1401 return result 

1402 

1403 def ConsumeCommentOrTrailingComment(self): 

1404 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1405 

1406 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1407 # tokenizer starts, it looks like there is a previous token on the line. 

1408 just_started = self._line == 0 and self._column == 0 

1409 

1410 before_parsing = self._previous_line 

1411 comment = self.ConsumeComment() 

1412 

1413 # A trailing comment is a comment on the same line than the previous token. 

1414 trailing = (self._previous_line == before_parsing 

1415 and not just_started) 

1416 

1417 return trailing, comment 

1418 

1419 def TryConsumeIdentifier(self): 

1420 try: 

1421 self.ConsumeIdentifier() 

1422 return True 

1423 except ParseError: 

1424 return False 

1425 

1426 def ConsumeIdentifier(self): 

1427 """Consumes protocol message field identifier. 

1428 

1429 Returns: 

1430 Identifier string. 

1431 

1432 Raises: 

1433 ParseError: If an identifier couldn't be consumed. 

1434 """ 

1435 result = self.token 

1436 if not self._IDENTIFIER.match(result): 

1437 raise self.ParseError('Expected identifier.') 

1438 self.NextToken() 

1439 return result 

1440 

1441 def TryConsumeIdentifierOrNumber(self): 

1442 try: 

1443 self.ConsumeIdentifierOrNumber() 

1444 return True 

1445 except ParseError: 

1446 return False 

1447 

1448 def ConsumeIdentifierOrNumber(self): 

1449 """Consumes protocol message field identifier. 

1450 

1451 Returns: 

1452 Identifier string. 

1453 

1454 Raises: 

1455 ParseError: If an identifier couldn't be consumed. 

1456 """ 

1457 result = self.token 

1458 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1459 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1460 self.NextToken() 

1461 return result 

1462 

1463 def TryConsumeInteger(self): 

1464 try: 

1465 self.ConsumeInteger() 

1466 return True 

1467 except ParseError: 

1468 return False 

1469 

1470 def ConsumeInteger(self): 

1471 """Consumes an integer number. 

1472 

1473 Returns: 

1474 The integer parsed. 

1475 

1476 Raises: 

1477 ParseError: If an integer couldn't be consumed. 

1478 """ 

1479 try: 

1480 result = _ParseAbstractInteger(self.token) 

1481 except ValueError as e: 

1482 raise self.ParseError(str(e)) 

1483 self.NextToken() 

1484 return result 

1485 

1486 def TryConsumeFloat(self): 

1487 try: 

1488 self.ConsumeFloat() 

1489 return True 

1490 except ParseError: 

1491 return False 

1492 

1493 def ConsumeFloat(self): 

1494 """Consumes an floating point number. 

1495 

1496 Returns: 

1497 The number parsed. 

1498 

1499 Raises: 

1500 ParseError: If a floating point number couldn't be consumed. 

1501 """ 

1502 try: 

1503 result = ParseFloat(self.token) 

1504 except ValueError as e: 

1505 raise self.ParseError(str(e)) 

1506 self.NextToken() 

1507 return result 

1508 

1509 def ConsumeBool(self): 

1510 """Consumes a boolean value. 

1511 

1512 Returns: 

1513 The bool parsed. 

1514 

1515 Raises: 

1516 ParseError: If a boolean value couldn't be consumed. 

1517 """ 

1518 try: 

1519 result = ParseBool(self.token) 

1520 except ValueError as e: 

1521 raise self.ParseError(str(e)) 

1522 self.NextToken() 

1523 return result 

1524 

1525 def TryConsumeByteString(self): 

1526 try: 

1527 self.ConsumeByteString() 

1528 return True 

1529 except ParseError: 

1530 return False 

1531 

1532 def ConsumeString(self): 

1533 """Consumes a string value. 

1534 

1535 Returns: 

1536 The string parsed. 

1537 

1538 Raises: 

1539 ParseError: If a string value couldn't be consumed. 

1540 """ 

1541 the_bytes = self.ConsumeByteString() 

1542 try: 

1543 return str(the_bytes, 'utf-8') 

1544 except UnicodeDecodeError as e: 

1545 raise self._StringParseError(e) 

1546 

1547 def ConsumeByteString(self): 

1548 """Consumes a byte array value. 

1549 

1550 Returns: 

1551 The array parsed (as a string). 

1552 

1553 Raises: 

1554 ParseError: If a byte array value couldn't be consumed. 

1555 """ 

1556 the_list = [self._ConsumeSingleByteString()] 

1557 while self.token and self.token[0] in _QUOTES: 

1558 the_list.append(self._ConsumeSingleByteString()) 

1559 return b''.join(the_list) 

1560 

1561 def _ConsumeSingleByteString(self): 

1562 """Consume one token of a string literal. 

1563 

1564 String literals (whether bytes or text) can come in multiple adjacent 

1565 tokens which are automatically concatenated, like in C or Python. This 

1566 method only consumes one token. 

1567 

1568 Returns: 

1569 The token parsed. 

1570 Raises: 

1571 ParseError: When the wrong format data is found. 

1572 """ 

1573 text = self.token 

1574 if len(text) < 1 or text[0] not in _QUOTES: 

1575 raise self.ParseError('Expected string but found: %r' % (text,)) 

1576 

1577 if len(text) < 2 or text[-1] != text[0]: 

1578 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1579 

1580 try: 

1581 result = text_encoding.CUnescape(text[1:-1]) 

1582 except ValueError as e: 

1583 raise self.ParseError(str(e)) 

1584 self.NextToken() 

1585 return result 

1586 

1587 def ConsumeEnum(self, field): 

1588 try: 

1589 result = ParseEnum(field, self.token) 

1590 except ValueError as e: 

1591 raise self.ParseError(str(e)) 

1592 self.NextToken() 

1593 return result 

1594 

1595 def ParseErrorPreviousToken(self, message): 

1596 """Creates and *returns* a ParseError for the previously read token. 

1597 

1598 Args: 

1599 message: A message to set for the exception. 

1600 

1601 Returns: 

1602 A ParseError instance. 

1603 """ 

1604 return ParseError(message, self._previous_line + 1, 

1605 self._previous_column + 1) 

1606 

1607 def ParseError(self, message): 

1608 """Creates and *returns* a ParseError for the current token.""" 

1609 return ParseError('\'' + self._current_line + '\': ' + message, 

1610 self._line + 1, self._column + 1) 

1611 

1612 def _StringParseError(self, e): 

1613 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1614 

1615 def NextToken(self): 

1616 """Reads the next meaningful token.""" 

1617 self._previous_line = self._line 

1618 self._previous_column = self._column 

1619 self.contains_silent_marker_before_current_token = False 

1620 

1621 self._column += len(self.token) 

1622 self._SkipWhitespace() 

1623 

1624 if not self._more_lines: 

1625 self.token = '' 

1626 return 

1627 

1628 match = self._TOKEN.match(self._current_line, self._column) 

1629 if not match and not self._skip_comments: 

1630 match = self._COMMENT.match(self._current_line, self._column) 

1631 if match: 

1632 token = match.group(0) 

1633 self.token = token 

1634 else: 

1635 self.token = self._current_line[self._column] 

1636 

1637# Aliased so it can still be accessed by current visibility violators. 

1638# TODO: Migrate violators to textformat_tokenizer. 

1639_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1640 

1641 

1642def _ConsumeInt32(tokenizer): 

1643 """Consumes a signed 32bit integer number from tokenizer. 

1644 

1645 Args: 

1646 tokenizer: A tokenizer used to parse the number. 

1647 

1648 Returns: 

1649 The integer parsed. 

1650 

1651 Raises: 

1652 ParseError: If a signed 32bit integer couldn't be consumed. 

1653 """ 

1654 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1655 

1656 

1657def _ConsumeUint32(tokenizer): 

1658 """Consumes an unsigned 32bit integer number from tokenizer. 

1659 

1660 Args: 

1661 tokenizer: A tokenizer used to parse the number. 

1662 

1663 Returns: 

1664 The integer parsed. 

1665 

1666 Raises: 

1667 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1668 """ 

1669 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1670 

1671 

1672def _TryConsumeInt64(tokenizer): 

1673 try: 

1674 _ConsumeInt64(tokenizer) 

1675 return True 

1676 except ParseError: 

1677 return False 

1678 

1679 

1680def _ConsumeInt64(tokenizer): 

1681 """Consumes a signed 32bit integer number from tokenizer. 

1682 

1683 Args: 

1684 tokenizer: A tokenizer used to parse the number. 

1685 

1686 Returns: 

1687 The integer parsed. 

1688 

1689 Raises: 

1690 ParseError: If a signed 32bit integer couldn't be consumed. 

1691 """ 

1692 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1693 

1694 

1695def _TryConsumeUint64(tokenizer): 

1696 try: 

1697 _ConsumeUint64(tokenizer) 

1698 return True 

1699 except ParseError: 

1700 return False 

1701 

1702 

1703def _ConsumeUint64(tokenizer): 

1704 """Consumes an unsigned 64bit integer number from tokenizer. 

1705 

1706 Args: 

1707 tokenizer: A tokenizer used to parse the number. 

1708 

1709 Returns: 

1710 The integer parsed. 

1711 

1712 Raises: 

1713 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1714 """ 

1715 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1716 

1717 

1718def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1719 """Consumes an integer number from tokenizer. 

1720 

1721 Args: 

1722 tokenizer: A tokenizer used to parse the number. 

1723 is_signed: True if a signed integer must be parsed. 

1724 is_long: True if a long integer must be parsed. 

1725 

1726 Returns: 

1727 The integer parsed. 

1728 

1729 Raises: 

1730 ParseError: If an integer with given characteristics couldn't be consumed. 

1731 """ 

1732 try: 

1733 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1734 except ValueError as e: 

1735 raise tokenizer.ParseError(str(e)) 

1736 tokenizer.NextToken() 

1737 return result 

1738 

1739 

1740def ParseInteger(text, is_signed=False, is_long=False): 

1741 """Parses an integer. 

1742 

1743 Args: 

1744 text: The text to parse. 

1745 is_signed: True if a signed integer must be parsed. 

1746 is_long: True if a long integer must be parsed. 

1747 

1748 Returns: 

1749 The integer value. 

1750 

1751 Raises: 

1752 ValueError: Thrown Iff the text is not a valid integer. 

1753 """ 

1754 # Do the actual parsing. Exception handling is propagated to caller. 

1755 result = _ParseAbstractInteger(text) 

1756 

1757 # Check if the integer is sane. Exceptions handled by callers. 

1758 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1759 checker.CheckValue(result) 

1760 return result 

1761 

1762 

1763def _ParseAbstractInteger(text): 

1764 """Parses an integer without checking size/signedness. 

1765 

1766 Args: 

1767 text: The text to parse. 

1768 

1769 Returns: 

1770 The integer value. 

1771 

1772 Raises: 

1773 ValueError: Thrown Iff the text is not a valid integer. 

1774 """ 

1775 # Do the actual parsing. Exception handling is propagated to caller. 

1776 orig_text = text 

1777 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1778 if c_octal_match: 

1779 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1780 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1781 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1782 try: 

1783 return int(text, 0) 

1784 except ValueError: 

1785 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1786 

1787 

1788def ParseFloat(text): 

1789 """Parse a floating point number. 

1790 

1791 Args: 

1792 text: Text to parse. 

1793 

1794 Returns: 

1795 The number parsed. 

1796 

1797 Raises: 

1798 ValueError: If a floating point number couldn't be parsed. 

1799 """ 

1800 if _FLOAT_OCTAL_PREFIX.match(text): 

1801 raise ValueError('Invalid octal float: %s' % text) 

1802 try: 

1803 # Assume Python compatible syntax. 

1804 return float(text) 

1805 except ValueError: 

1806 # Check alternative spellings. 

1807 if _FLOAT_INFINITY.match(text): 

1808 if text[0] == '-': 

1809 return float('-inf') 

1810 else: 

1811 return float('inf') 

1812 elif _FLOAT_NAN.match(text): 

1813 return float('nan') 

1814 else: 

1815 # assume '1.0f' format 

1816 try: 

1817 return float(text.rstrip('fF')) 

1818 except ValueError: 

1819 raise ValueError("Couldn't parse float: %s" % text) 

1820 

1821 

1822def ParseBool(text): 

1823 """Parse a boolean value. 

1824 

1825 Args: 

1826 text: Text to parse. 

1827 

1828 Returns: 

1829 Boolean values parsed 

1830 

1831 Raises: 

1832 ValueError: If text is not a valid boolean. 

1833 """ 

1834 if text in ('true', 't', '1', 'True'): 

1835 return True 

1836 elif text in ('false', 'f', '0', 'False'): 

1837 return False 

1838 else: 

1839 raise ValueError('Expected "true" or "false".') 

1840 

1841 

1842def ParseEnum(field, value): 

1843 """Parse an enum value. 

1844 

1845 The value can be specified by a number (the enum value), or by 

1846 a string literal (the enum name). 

1847 

1848 Args: 

1849 field: Enum field descriptor. 

1850 value: String value. 

1851 

1852 Returns: 

1853 Enum value number. 

1854 

1855 Raises: 

1856 ValueError: If the enum value could not be parsed. 

1857 """ 

1858 enum_descriptor = field.enum_type 

1859 try: 

1860 number = int(value, 0) 

1861 except ValueError: 

1862 # Identifier. 

1863 enum_value = enum_descriptor.values_by_name.get(value, None) 

1864 if enum_value is None: 

1865 raise ValueError('Enum type "%s" has no value named %s.' % 

1866 (enum_descriptor.full_name, value)) 

1867 else: 

1868 if not field.enum_type.is_closed: 

1869 return number 

1870 enum_value = enum_descriptor.values_by_number.get(number, None) 

1871 if enum_value is None: 

1872 raise ValueError('Enum type "%s" has no value with number %d.' % 

1873 (enum_descriptor.full_name, number)) 

1874 return enum_value.number