Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text_format.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

749 statements  

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# 

4# Use of this source code is governed by a BSD-style 

5# license that can be found in the LICENSE file or at 

6# https://developers.google.com/open-source/licenses/bsd 

7 

8"""Contains routines for printing protocol messages in text format. 

9 

10Simple usage example:: 

11 

12 # Create a proto object and serialize it to a text proto string. 

13 message = my_proto_pb2.MyMessage(foo='bar') 

14 text_proto = text_format.MessageToString(message) 

15 

16 # Parse a text proto string. 

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

18""" 

19 

20__author__ = 'kenton@google.com (Kenton Varda)' 

21 

22# TODO Import thread contention leads to test failures. 

23import encodings.raw_unicode_escape # pylint: disable=unused-import 

24import encodings.unicode_escape # pylint: disable=unused-import 

25import io 

26import math 

27import re 

28 

29from google.protobuf.internal import decoder 

30from google.protobuf.internal import type_checkers 

31from google.protobuf import descriptor 

32from google.protobuf import text_encoding 

33from google.protobuf import unknown_fields 

34 

35# pylint: disable=g-import-not-at-top 

36__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

37 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

38 

39_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

40 type_checkers.Int32ValueChecker(), 

41 type_checkers.Uint64ValueChecker(), 

42 type_checkers.Int64ValueChecker()) 

43_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

44_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

45_QUOTES = frozenset(("'", '"')) 

46_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

47_DEBUG_STRING_SILENT_MARKER = '\t ' 

48 

49 

50class Error(Exception): 

51 """Top-level module error for text_format.""" 

52 

53 

54class ParseError(Error): 

55 """Thrown in case of text parsing or tokenizing error.""" 

56 

57 def __init__(self, message=None, line=None, column=None): 

58 if message is not None and line is not None: 

59 loc = str(line) 

60 if column is not None: 

61 loc += ':{0}'.format(column) 

62 message = '{0} : {1}'.format(loc, message) 

63 if message is not None: 

64 super(ParseError, self).__init__(message) 

65 else: 

66 super(ParseError, self).__init__() 

67 self._line = line 

68 self._column = column 

69 

70 def GetLine(self): 

71 return self._line 

72 

73 def GetColumn(self): 

74 return self._column 

75 

76 

77class TextWriter(object): 

78 

79 def __init__(self, as_utf8): 

80 self._writer = io.StringIO() 

81 

82 def write(self, val): 

83 return self._writer.write(val) 

84 

85 def close(self): 

86 return self._writer.close() 

87 

88 def getvalue(self): 

89 return self._writer.getvalue() 

90 

91 

92def MessageToString( 

93 message, 

94 as_utf8=False, 

95 as_one_line=False, 

96 use_short_repeated_primitives=False, 

97 pointy_brackets=False, 

98 use_index_order=False, 

99 float_format=None, 

100 double_format=None, 

101 use_field_number=False, 

102 descriptor_pool=None, 

103 indent=0, 

104 message_formatter=None, 

105 print_unknown_fields=False, 

106 force_colon=False) -> str: 

107 """Convert protobuf message to text format. 

108 

109 Double values can be formatted compactly with 15 digits of 

110 precision (which is the most that IEEE 754 "double" can guarantee) 

111 using double_format='.15g'. To ensure that converting to text and back to a 

112 proto will result in an identical value, double_format='.17g' should be used. 

113 

114 Args: 

115 message: The protocol buffers message. 

116 as_utf8: Return unescaped Unicode for non-ASCII characters. 

117 as_one_line: Don't introduce newlines between fields. 

118 use_short_repeated_primitives: Use short repeated format for primitives. 

119 pointy_brackets: If True, use angle brackets instead of curly braces for 

120 nesting. 

121 use_index_order: If True, fields of a proto message will be printed using 

122 the order defined in source code instead of the field number, extensions 

123 will be printed at the end of the message and their relative order is 

124 determined by the extension number. By default, use the field number 

125 order. 

126 float_format (str): If set, use this to specify float field formatting 

127 (per the "Format Specification Mini-Language"); otherwise, shortest float 

128 that has same value in wire will be printed. Also affect double field 

129 if double_format is not set but float_format is set. 

130 double_format (str): If set, use this to specify double field formatting 

131 (per the "Format Specification Mini-Language"); if it is not set but 

132 float_format is set, use float_format. Otherwise, use ``str()`` 

133 use_field_number: If True, print field numbers instead of names. 

134 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

135 indent (int): The initial indent level, in terms of spaces, for pretty 

136 print. 

137 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

138 Custom formatter for selected sub-messages (usually based on message 

139 type). Use to pretty print parts of the protobuf for easier diffing. 

140 print_unknown_fields: If True, unknown fields will be printed. 

141 force_colon: If set, a colon will be added after the field name even if the 

142 field is a proto message. 

143 

144 Returns: 

145 str: A string of the text formatted protocol buffer message. 

146 """ 

147 out = TextWriter(as_utf8) 

148 printer = _Printer( 

149 out, 

150 indent, 

151 as_utf8, 

152 as_one_line, 

153 use_short_repeated_primitives, 

154 pointy_brackets, 

155 use_index_order, 

156 float_format, 

157 double_format, 

158 use_field_number, 

159 descriptor_pool, 

160 message_formatter, 

161 print_unknown_fields=print_unknown_fields, 

162 force_colon=force_colon) 

163 printer.PrintMessage(message) 

164 result = out.getvalue() 

165 out.close() 

166 if as_one_line: 

167 return result.rstrip() 

168 return result 

169 

170 

171def MessageToBytes(message, **kwargs) -> bytes: 

172 """Convert protobuf message to encoded text format. See MessageToString.""" 

173 text = MessageToString(message, **kwargs) 

174 if isinstance(text, bytes): 

175 return text 

176 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

177 return text.encode(codec) 

178 

179 

180def _IsMapEntry(field): 

181 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

182 field.message_type.has_options and 

183 field.message_type.GetOptions().map_entry) 

184 

185 

186def PrintMessage(message, 

187 out, 

188 indent=0, 

189 as_utf8=False, 

190 as_one_line=False, 

191 use_short_repeated_primitives=False, 

192 pointy_brackets=False, 

193 use_index_order=False, 

194 float_format=None, 

195 double_format=None, 

196 use_field_number=False, 

197 descriptor_pool=None, 

198 message_formatter=None, 

199 print_unknown_fields=False, 

200 force_colon=False): 

201 """Convert the message to text format and write it to the out stream. 

202 

203 Args: 

204 message: The Message object to convert to text format. 

205 out: A file handle to write the message to. 

206 indent: The initial indent level for pretty print. 

207 as_utf8: Return unescaped Unicode for non-ASCII characters. 

208 as_one_line: Don't introduce newlines between fields. 

209 use_short_repeated_primitives: Use short repeated format for primitives. 

210 pointy_brackets: If True, use angle brackets instead of curly braces for 

211 nesting. 

212 use_index_order: If True, print fields of a proto message using the order 

213 defined in source code instead of the field number. By default, use the 

214 field number order. 

215 float_format: If set, use this to specify float field formatting 

216 (per the "Format Specification Mini-Language"); otherwise, shortest 

217 float that has same value in wire will be printed. Also affect double 

218 field if double_format is not set but float_format is set. 

219 double_format: If set, use this to specify double field formatting 

220 (per the "Format Specification Mini-Language"); if it is not set but 

221 float_format is set, use float_format. Otherwise, str() is used. 

222 use_field_number: If True, print field numbers instead of names. 

223 descriptor_pool: A DescriptorPool used to resolve Any types. 

224 message_formatter: A function(message, indent, as_one_line): unicode|None 

225 to custom format selected sub-messages (usually based on message type). 

226 Use to pretty print parts of the protobuf for easier diffing. 

227 print_unknown_fields: If True, unknown fields will be printed. 

228 force_colon: If set, a colon will be added after the field name even if 

229 the field is a proto message. 

230 """ 

231 printer = _Printer( 

232 out=out, indent=indent, as_utf8=as_utf8, 

233 as_one_line=as_one_line, 

234 use_short_repeated_primitives=use_short_repeated_primitives, 

235 pointy_brackets=pointy_brackets, 

236 use_index_order=use_index_order, 

237 float_format=float_format, 

238 double_format=double_format, 

239 use_field_number=use_field_number, 

240 descriptor_pool=descriptor_pool, 

241 message_formatter=message_formatter, 

242 print_unknown_fields=print_unknown_fields, 

243 force_colon=force_colon) 

244 printer.PrintMessage(message) 

245 

246 

247def PrintField(field, 

248 value, 

249 out, 

250 indent=0, 

251 as_utf8=False, 

252 as_one_line=False, 

253 use_short_repeated_primitives=False, 

254 pointy_brackets=False, 

255 use_index_order=False, 

256 float_format=None, 

257 double_format=None, 

258 message_formatter=None, 

259 print_unknown_fields=False, 

260 force_colon=False): 

261 """Print a single field name/value pair.""" 

262 printer = _Printer(out, indent, as_utf8, as_one_line, 

263 use_short_repeated_primitives, pointy_brackets, 

264 use_index_order, float_format, double_format, 

265 message_formatter=message_formatter, 

266 print_unknown_fields=print_unknown_fields, 

267 force_colon=force_colon) 

268 printer.PrintField(field, value) 

269 

270 

271def PrintFieldValue(field, 

272 value, 

273 out, 

274 indent=0, 

275 as_utf8=False, 

276 as_one_line=False, 

277 use_short_repeated_primitives=False, 

278 pointy_brackets=False, 

279 use_index_order=False, 

280 float_format=None, 

281 double_format=None, 

282 message_formatter=None, 

283 print_unknown_fields=False, 

284 force_colon=False): 

285 """Print a single field value (not including name).""" 

286 printer = _Printer(out, indent, as_utf8, as_one_line, 

287 use_short_repeated_primitives, pointy_brackets, 

288 use_index_order, float_format, double_format, 

289 message_formatter=message_formatter, 

290 print_unknown_fields=print_unknown_fields, 

291 force_colon=force_colon) 

292 printer.PrintFieldValue(field, value) 

293 

294 

295def _BuildMessageFromTypeName(type_name, descriptor_pool): 

296 """Returns a protobuf message instance. 

297 

298 Args: 

299 type_name: Fully-qualified protobuf message type name string. 

300 descriptor_pool: DescriptorPool instance. 

301 

302 Returns: 

303 A Message instance of type matching type_name, or None if the a Descriptor 

304 wasn't found matching type_name. 

305 """ 

306 # pylint: disable=g-import-not-at-top 

307 if descriptor_pool is None: 

308 from google.protobuf import descriptor_pool as pool_mod 

309 descriptor_pool = pool_mod.Default() 

310 from google.protobuf import message_factory 

311 try: 

312 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

313 except KeyError: 

314 return None 

315 message_type = message_factory.GetMessageClass(message_descriptor) 

316 return message_type() 

317 

318 

319# These values must match WireType enum in //google/protobuf/wire_format.h. 

320WIRETYPE_LENGTH_DELIMITED = 2 

321WIRETYPE_START_GROUP = 3 

322 

323 

324class _Printer(object): 

325 """Text format printer for protocol message.""" 

326 

327 def __init__( 

328 self, 

329 out, 

330 indent=0, 

331 as_utf8=False, 

332 as_one_line=False, 

333 use_short_repeated_primitives=False, 

334 pointy_brackets=False, 

335 use_index_order=False, 

336 float_format=None, 

337 double_format=None, 

338 use_field_number=False, 

339 descriptor_pool=None, 

340 message_formatter=None, 

341 print_unknown_fields=False, 

342 force_colon=False): 

343 """Initialize the Printer. 

344 

345 Double values can be formatted compactly with 15 digits of precision 

346 (which is the most that IEEE 754 "double" can guarantee) using 

347 double_format='.15g'. To ensure that converting to text and back to a proto 

348 will result in an identical value, double_format='.17g' should be used. 

349 

350 Args: 

351 out: To record the text format result. 

352 indent: The initial indent level for pretty print. 

353 as_utf8: Return unescaped Unicode for non-ASCII characters. 

354 as_one_line: Don't introduce newlines between fields. 

355 use_short_repeated_primitives: Use short repeated format for primitives. 

356 pointy_brackets: If True, use angle brackets instead of curly braces for 

357 nesting. 

358 use_index_order: If True, print fields of a proto message using the order 

359 defined in source code instead of the field number. By default, use the 

360 field number order. 

361 float_format: If set, use this to specify float field formatting 

362 (per the "Format Specification Mini-Language"); otherwise, shortest 

363 float that has same value in wire will be printed. Also affect double 

364 field if double_format is not set but float_format is set. 

365 double_format: If set, use this to specify double field formatting 

366 (per the "Format Specification Mini-Language"); if it is not set but 

367 float_format is set, use float_format. Otherwise, str() is used. 

368 use_field_number: If True, print field numbers instead of names. 

369 descriptor_pool: A DescriptorPool used to resolve Any types. 

370 message_formatter: A function(message, indent, as_one_line): unicode|None 

371 to custom format selected sub-messages (usually based on message type). 

372 Use to pretty print parts of the protobuf for easier diffing. 

373 print_unknown_fields: If True, unknown fields will be printed. 

374 force_colon: If set, a colon will be added after the field name even if 

375 the field is a proto message. 

376 """ 

377 self.out = out 

378 self.indent = indent 

379 self.as_utf8 = as_utf8 

380 self.as_one_line = as_one_line 

381 self.use_short_repeated_primitives = use_short_repeated_primitives 

382 self.pointy_brackets = pointy_brackets 

383 self.use_index_order = use_index_order 

384 self.float_format = float_format 

385 if double_format is not None: 

386 self.double_format = double_format 

387 else: 

388 self.double_format = float_format 

389 self.use_field_number = use_field_number 

390 self.descriptor_pool = descriptor_pool 

391 self.message_formatter = message_formatter 

392 self.print_unknown_fields = print_unknown_fields 

393 self.force_colon = force_colon 

394 

395 def _TryPrintAsAnyMessage(self, message): 

396 """Serializes if message is a google.protobuf.Any field.""" 

397 if '/' not in message.type_url: 

398 return False 

399 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

400 self.descriptor_pool) 

401 if packed_message: 

402 packed_message.MergeFromString(message.value) 

403 colon = ':' if self.force_colon else '' 

404 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

405 self._PrintMessageFieldValue(packed_message) 

406 self.out.write(' ' if self.as_one_line else '\n') 

407 return True 

408 else: 

409 return False 

410 

411 def _TryCustomFormatMessage(self, message): 

412 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

413 if formatted is None: 

414 return False 

415 

416 out = self.out 

417 out.write(' ' * self.indent) 

418 out.write(formatted) 

419 out.write(' ' if self.as_one_line else '\n') 

420 return True 

421 

422 def PrintMessage(self, message): 

423 """Convert protobuf message to text format. 

424 

425 Args: 

426 message: The protocol buffers message. 

427 """ 

428 if self.message_formatter and self._TryCustomFormatMessage(message): 

429 return 

430 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

431 self._TryPrintAsAnyMessage(message)): 

432 return 

433 fields = message.ListFields() 

434 if self.use_index_order: 

435 fields.sort( 

436 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

437 for field, value in fields: 

438 if _IsMapEntry(field): 

439 for key in sorted(value): 

440 # This is slow for maps with submessage entries because it copies the 

441 # entire tree. Unfortunately this would take significant refactoring 

442 # of this file to work around. 

443 # 

444 # TODO: refactor and optimize if this becomes an issue. 

445 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

446 self.PrintField(field, entry_submsg) 

447 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

448 if (self.use_short_repeated_primitives 

449 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

450 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

451 self._PrintShortRepeatedPrimitivesValue(field, value) 

452 else: 

453 for element in value: 

454 self.PrintField(field, element) 

455 else: 

456 self.PrintField(field, value) 

457 

458 if self.print_unknown_fields: 

459 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

460 

461 def _PrintUnknownFields(self, unknown_field_set): 

462 """Print unknown fields.""" 

463 out = self.out 

464 for field in unknown_field_set: 

465 out.write(' ' * self.indent) 

466 out.write(str(field.field_number)) 

467 if field.wire_type == WIRETYPE_START_GROUP: 

468 if self.as_one_line: 

469 out.write(' { ') 

470 else: 

471 out.write(' {\n') 

472 self.indent += 2 

473 

474 self._PrintUnknownFields(field.data) 

475 

476 if self.as_one_line: 

477 out.write('} ') 

478 else: 

479 self.indent -= 2 

480 out.write(' ' * self.indent + '}\n') 

481 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

482 try: 

483 # If this field is parseable as a Message, it is probably 

484 # an embedded message. 

485 # pylint: disable=protected-access 

486 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

487 memoryview(field.data), 0, len(field.data)) 

488 except Exception: # pylint: disable=broad-except 

489 pos = 0 

490 

491 if pos == len(field.data): 

492 if self.as_one_line: 

493 out.write(' { ') 

494 else: 

495 out.write(' {\n') 

496 self.indent += 2 

497 

498 self._PrintUnknownFields(embedded_unknown_message) 

499 

500 if self.as_one_line: 

501 out.write('} ') 

502 else: 

503 self.indent -= 2 

504 out.write(' ' * self.indent + '}\n') 

505 else: 

506 # A string or bytes field. self.as_utf8 may not work. 

507 out.write(': \"') 

508 out.write(text_encoding.CEscape(field.data, False)) 

509 out.write('\" ' if self.as_one_line else '\"\n') 

510 else: 

511 # varint, fixed32, fixed64 

512 out.write(': ') 

513 out.write(str(field.data)) 

514 out.write(' ' if self.as_one_line else '\n') 

515 

516 def _PrintFieldName(self, field): 

517 """Print field name.""" 

518 out = self.out 

519 out.write(' ' * self.indent) 

520 if self.use_field_number: 

521 out.write(str(field.number)) 

522 else: 

523 if field.is_extension: 

524 out.write('[') 

525 if (field.containing_type.GetOptions().message_set_wire_format and 

526 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

527 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): 

528 out.write(field.message_type.full_name) 

529 else: 

530 out.write(field.full_name) 

531 out.write(']') 

532 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP: 

533 # For groups, use the capitalized name. 

534 out.write(field.message_type.name) 

535 else: 

536 out.write(field.name) 

537 

538 if (self.force_colon or 

539 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

540 # The colon is optional in this case, but our cross-language golden files 

541 # don't include it. Here, the colon is only included if force_colon is 

542 # set to True 

543 out.write(':') 

544 

545 def PrintField(self, field, value): 

546 """Print a single field name/value pair.""" 

547 self._PrintFieldName(field) 

548 self.out.write(' ') 

549 self.PrintFieldValue(field, value) 

550 self.out.write(' ' if self.as_one_line else '\n') 

551 

552 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

553 """"Prints short repeated primitives value.""" 

554 # Note: this is called only when value has at least one element. 

555 self._PrintFieldName(field) 

556 self.out.write(' [') 

557 for i in range(len(value) - 1): 

558 self.PrintFieldValue(field, value[i]) 

559 self.out.write(', ') 

560 self.PrintFieldValue(field, value[-1]) 

561 self.out.write(']') 

562 self.out.write(' ' if self.as_one_line else '\n') 

563 

564 def _PrintMessageFieldValue(self, value): 

565 if self.pointy_brackets: 

566 openb = '<' 

567 closeb = '>' 

568 else: 

569 openb = '{' 

570 closeb = '}' 

571 

572 if self.as_one_line: 

573 self.out.write('%s ' % openb) 

574 self.PrintMessage(value) 

575 self.out.write(closeb) 

576 else: 

577 self.out.write('%s\n' % openb) 

578 self.indent += 2 

579 self.PrintMessage(value) 

580 self.indent -= 2 

581 self.out.write(' ' * self.indent + closeb) 

582 

583 def PrintFieldValue(self, field, value): 

584 """Print a single field value (not including name). 

585 

586 For repeated fields, the value should be a single element. 

587 

588 Args: 

589 field: The descriptor of the field to be printed. 

590 value: The value of the field. 

591 """ 

592 out = self.out 

593 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

594 self._PrintMessageFieldValue(value) 

595 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

596 enum_value = field.enum_type.values_by_number.get(value, None) 

597 if enum_value is not None: 

598 out.write(enum_value.name) 

599 else: 

600 out.write(str(value)) 

601 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

602 out.write('\"') 

603 if isinstance(value, str) and not self.as_utf8: 

604 out_value = value.encode('utf-8') 

605 else: 

606 out_value = value 

607 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

608 # We always need to escape all binary data in TYPE_BYTES fields. 

609 out_as_utf8 = False 

610 else: 

611 out_as_utf8 = self.as_utf8 

612 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

613 out.write('\"') 

614 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

615 if value: 

616 out.write('true') 

617 else: 

618 out.write('false') 

619 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

620 if self.float_format is not None: 

621 out.write('{1:{0}}'.format(self.float_format, value)) 

622 else: 

623 if math.isnan(value): 

624 out.write(str(value)) 

625 else: 

626 out.write(str(type_checkers.ToShortestFloat(value))) 

627 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 

628 self.double_format is not None): 

629 out.write('{1:{0}}'.format(self.double_format, value)) 

630 else: 

631 out.write(str(value)) 

632 

633 

634def Parse(text, 

635 message, 

636 allow_unknown_extension=False, 

637 allow_field_number=False, 

638 descriptor_pool=None, 

639 allow_unknown_field=False): 

640 """Parses a text representation of a protocol message into a message. 

641 

642 NOTE: for historical reasons this function does not clear the input 

643 message. This is different from what the binary msg.ParseFrom(...) does. 

644 If text contains a field already set in message, the value is appended if the 

645 field is repeated. Otherwise, an error is raised. 

646 

647 Example:: 

648 

649 a = MyProto() 

650 a.repeated_field.append('test') 

651 b = MyProto() 

652 

653 # Repeated fields are combined 

654 text_format.Parse(repr(a), b) 

655 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

656 

657 # Non-repeated fields cannot be overwritten 

658 a.singular_field = 1 

659 b.singular_field = 2 

660 text_format.Parse(repr(a), b) # ParseError 

661 

662 # Binary version: 

663 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

664 

665 Caller is responsible for clearing the message as needed. 

666 

667 Args: 

668 text (str): Message text representation. 

669 message (Message): A protocol buffer message to merge into. 

670 allow_unknown_extension: if True, skip over missing extensions and keep 

671 parsing 

672 allow_field_number: if True, both field number and field name are allowed. 

673 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

674 allow_unknown_field: if True, skip over unknown field and keep 

675 parsing. Avoid to use this option if possible. It may hide some 

676 errors (e.g. spelling error on field name) 

677 

678 Returns: 

679 Message: The same message passed as argument. 

680 

681 Raises: 

682 ParseError: On text parsing problems. 

683 """ 

684 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

685 message, 

686 allow_unknown_extension, 

687 allow_field_number, 

688 descriptor_pool=descriptor_pool, 

689 allow_unknown_field=allow_unknown_field) 

690 

691 

692def Merge(text, 

693 message, 

694 allow_unknown_extension=False, 

695 allow_field_number=False, 

696 descriptor_pool=None, 

697 allow_unknown_field=False): 

698 """Parses a text representation of a protocol message into a message. 

699 

700 Like Parse(), but allows repeated values for a non-repeated field, and uses 

701 the last one. This means any non-repeated, top-level fields specified in text 

702 replace those in the message. 

703 

704 Args: 

705 text (str): Message text representation. 

706 message (Message): A protocol buffer message to merge into. 

707 allow_unknown_extension: if True, skip over missing extensions and keep 

708 parsing 

709 allow_field_number: if True, both field number and field name are allowed. 

710 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

711 allow_unknown_field: if True, skip over unknown field and keep 

712 parsing. Avoid to use this option if possible. It may hide some 

713 errors (e.g. spelling error on field name) 

714 

715 Returns: 

716 Message: The same message passed as argument. 

717 

718 Raises: 

719 ParseError: On text parsing problems. 

720 """ 

721 return MergeLines( 

722 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

723 message, 

724 allow_unknown_extension, 

725 allow_field_number, 

726 descriptor_pool=descriptor_pool, 

727 allow_unknown_field=allow_unknown_field) 

728 

729 

730def ParseLines(lines, 

731 message, 

732 allow_unknown_extension=False, 

733 allow_field_number=False, 

734 descriptor_pool=None, 

735 allow_unknown_field=False): 

736 """Parses a text representation of a protocol message into a message. 

737 

738 See Parse() for caveats. 

739 

740 Args: 

741 lines: An iterable of lines of a message's text representation. 

742 message: A protocol buffer message to merge into. 

743 allow_unknown_extension: if True, skip over missing extensions and keep 

744 parsing 

745 allow_field_number: if True, both field number and field name are allowed. 

746 descriptor_pool: A DescriptorPool used to resolve Any types. 

747 allow_unknown_field: if True, skip over unknown field and keep 

748 parsing. Avoid to use this option if possible. It may hide some 

749 errors (e.g. spelling error on field name) 

750 

751 Returns: 

752 The same message passed as argument. 

753 

754 Raises: 

755 ParseError: On text parsing problems. 

756 """ 

757 parser = _Parser(allow_unknown_extension, 

758 allow_field_number, 

759 descriptor_pool=descriptor_pool, 

760 allow_unknown_field=allow_unknown_field) 

761 return parser.ParseLines(lines, message) 

762 

763 

764def MergeLines(lines, 

765 message, 

766 allow_unknown_extension=False, 

767 allow_field_number=False, 

768 descriptor_pool=None, 

769 allow_unknown_field=False): 

770 """Parses a text representation of a protocol message into a message. 

771 

772 See Merge() for more details. 

773 

774 Args: 

775 lines: An iterable of lines of a message's text representation. 

776 message: A protocol buffer message to merge into. 

777 allow_unknown_extension: if True, skip over missing extensions and keep 

778 parsing 

779 allow_field_number: if True, both field number and field name are allowed. 

780 descriptor_pool: A DescriptorPool used to resolve Any types. 

781 allow_unknown_field: if True, skip over unknown field and keep 

782 parsing. Avoid to use this option if possible. It may hide some 

783 errors (e.g. spelling error on field name) 

784 

785 Returns: 

786 The same message passed as argument. 

787 

788 Raises: 

789 ParseError: On text parsing problems. 

790 """ 

791 parser = _Parser(allow_unknown_extension, 

792 allow_field_number, 

793 descriptor_pool=descriptor_pool, 

794 allow_unknown_field=allow_unknown_field) 

795 return parser.MergeLines(lines, message) 

796 

797 

798class _Parser(object): 

799 """Text format parser for protocol message.""" 

800 

801 def __init__(self, 

802 allow_unknown_extension=False, 

803 allow_field_number=False, 

804 descriptor_pool=None, 

805 allow_unknown_field=False): 

806 self.allow_unknown_extension = allow_unknown_extension 

807 self.allow_field_number = allow_field_number 

808 self.descriptor_pool = descriptor_pool 

809 self.allow_unknown_field = allow_unknown_field 

810 

811 def ParseLines(self, lines, message): 

812 """Parses a text representation of a protocol message into a message.""" 

813 self._allow_multiple_scalars = False 

814 self._ParseOrMerge(lines, message) 

815 return message 

816 

817 def MergeLines(self, lines, message): 

818 """Merges a text representation of a protocol message into a message.""" 

819 self._allow_multiple_scalars = True 

820 self._ParseOrMerge(lines, message) 

821 return message 

822 

823 def _ParseOrMerge(self, lines, message): 

824 """Converts a text representation of a protocol message into a message. 

825 

826 Args: 

827 lines: Lines of a message's text representation. 

828 message: A protocol buffer message to merge into. 

829 

830 Raises: 

831 ParseError: On text parsing problems. 

832 """ 

833 # Tokenize expects native str lines. 

834 try: 

835 str_lines = ( 

836 line if isinstance(line, str) else line.decode('utf-8') 

837 for line in lines) 

838 tokenizer = Tokenizer(str_lines) 

839 except UnicodeDecodeError as e: 

840 raise ParseError from e 

841 if message: 

842 self.root_type = message.DESCRIPTOR.full_name 

843 while not tokenizer.AtEnd(): 

844 self._MergeField(tokenizer, message) 

845 

846 def _MergeField(self, tokenizer, message): 

847 """Merges a single protocol message field into a message. 

848 

849 Args: 

850 tokenizer: A tokenizer to parse the field name and values. 

851 message: A protocol message to record the data. 

852 

853 Raises: 

854 ParseError: In case of text parsing problems. 

855 """ 

856 message_descriptor = message.DESCRIPTOR 

857 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

858 tokenizer.TryConsume('[')): 

859 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

860 tokenizer.Consume(']') 

861 tokenizer.TryConsume(':') 

862 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

863 type_url_prefix + '/' + packed_type_name) 

864 if tokenizer.TryConsume('<'): 

865 expanded_any_end_token = '>' 

866 else: 

867 tokenizer.Consume('{') 

868 expanded_any_end_token = '}' 

869 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

870 self.descriptor_pool) 

871 # Direct comparison with None is used instead of implicit bool conversion 

872 # to avoid false positives with falsy initial values, e.g. for 

873 # google.protobuf.ListValue. 

874 if expanded_any_sub_message is None: 

875 raise ParseError('Type %s not found in descriptor pool' % 

876 packed_type_name) 

877 while not tokenizer.TryConsume(expanded_any_end_token): 

878 if tokenizer.AtEnd(): 

879 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 

880 (expanded_any_end_token,)) 

881 self._MergeField(tokenizer, expanded_any_sub_message) 

882 deterministic = False 

883 

884 message.Pack(expanded_any_sub_message, 

885 type_url_prefix=type_url_prefix, 

886 deterministic=deterministic) 

887 return 

888 

889 if tokenizer.TryConsume('['): 

890 name = [tokenizer.ConsumeIdentifier()] 

891 while tokenizer.TryConsume('.'): 

892 name.append(tokenizer.ConsumeIdentifier()) 

893 name = '.'.join(name) 

894 

895 if not message_descriptor.is_extendable: 

896 raise tokenizer.ParseErrorPreviousToken( 

897 'Message type "%s" does not have extensions.' % 

898 message_descriptor.full_name) 

899 # pylint: disable=protected-access 

900 field = message.Extensions._FindExtensionByName(name) 

901 # pylint: enable=protected-access 

902 if not field: 

903 if self.allow_unknown_extension: 

904 field = None 

905 else: 

906 raise tokenizer.ParseErrorPreviousToken( 

907 'Extension "%s" not registered. ' 

908 'Did you import the _pb2 module which defines it? ' 

909 'If you are trying to place the extension in the MessageSet ' 

910 'field of another message that is in an Any or MessageSet field, ' 

911 'that message\'s _pb2 module must be imported as well' % name) 

912 elif message_descriptor != field.containing_type: 

913 raise tokenizer.ParseErrorPreviousToken( 

914 'Extension "%s" does not extend message type "%s".' % 

915 (name, message_descriptor.full_name)) 

916 

917 tokenizer.Consume(']') 

918 

919 else: 

920 name = tokenizer.ConsumeIdentifierOrNumber() 

921 if self.allow_field_number and name.isdigit(): 

922 number = ParseInteger(name, True, True) 

923 field = message_descriptor.fields_by_number.get(number, None) 

924 if not field and message_descriptor.is_extendable: 

925 field = message.Extensions._FindExtensionByNumber(number) 

926 else: 

927 field = message_descriptor.fields_by_name.get(name, None) 

928 

929 # Group names are expected to be capitalized as they appear in the 

930 # .proto file, which actually matches their type names, not their field 

931 # names. 

932 if not field: 

933 field = message_descriptor.fields_by_name.get(name.lower(), None) 

934 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

935 field = None 

936 

937 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 

938 field.message_type.name != name): 

939 field = None 

940 

941 if not field and not self.allow_unknown_field: 

942 raise tokenizer.ParseErrorPreviousToken( 

943 'Message type "%s" has no field named "%s".' % 

944 (message_descriptor.full_name, name)) 

945 

946 if field: 

947 if not self._allow_multiple_scalars and field.containing_oneof: 

948 # Check if there's a different field set in this oneof. 

949 # Note that we ignore the case if the same field was set before, and we 

950 # apply _allow_multiple_scalars to non-scalar fields as well. 

951 which_oneof = message.WhichOneof(field.containing_oneof.name) 

952 if which_oneof is not None and which_oneof != field.name: 

953 raise tokenizer.ParseErrorPreviousToken( 

954 'Field "%s" is specified along with field "%s", another member ' 

955 'of oneof "%s" for message type "%s".' % 

956 (field.name, which_oneof, field.containing_oneof.name, 

957 message_descriptor.full_name)) 

958 

959 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

960 tokenizer.TryConsume(':') 

961 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

962 field.full_name) 

963 merger = self._MergeMessageField 

964 else: 

965 tokenizer.Consume(':') 

966 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

967 field.full_name) 

968 merger = self._MergeScalarField 

969 

970 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and 

971 tokenizer.TryConsume('[')): 

972 # Short repeated format, e.g. "foo: [1, 2, 3]" 

973 if not tokenizer.TryConsume(']'): 

974 while True: 

975 merger(tokenizer, message, field) 

976 if tokenizer.TryConsume(']'): 

977 break 

978 tokenizer.Consume(',') 

979 

980 else: 

981 merger(tokenizer, message, field) 

982 

983 else: # Proto field is unknown. 

984 assert (self.allow_unknown_extension or self.allow_unknown_field) 

985 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

986 

987 # For historical reasons, fields may optionally be separated by commas or 

988 # semicolons. 

989 if not tokenizer.TryConsume(','): 

990 tokenizer.TryConsume(';') 

991 

992 def _LogSilentMarker(self, immediate_message_type, field_name): 

993 pass 

994 

995 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

996 if tokenizer.contains_silent_marker_before_current_token: 

997 self._LogSilentMarker(immediate_message_type, field_name) 

998 

999 def _ConsumeAnyTypeUrl(self, tokenizer): 

1000 """Consumes a google.protobuf.Any type URL and returns the type name.""" 

1001 # Consume "type.googleapis.com/". 

1002 prefix = [tokenizer.ConsumeIdentifier()] 

1003 tokenizer.Consume('.') 

1004 prefix.append(tokenizer.ConsumeIdentifier()) 

1005 tokenizer.Consume('.') 

1006 prefix.append(tokenizer.ConsumeIdentifier()) 

1007 tokenizer.Consume('/') 

1008 # Consume the fully-qualified type name. 

1009 name = [tokenizer.ConsumeIdentifier()] 

1010 while tokenizer.TryConsume('.'): 

1011 name.append(tokenizer.ConsumeIdentifier()) 

1012 return '.'.join(prefix), '.'.join(name) 

1013 

1014 def _MergeMessageField(self, tokenizer, message, field): 

1015 """Merges a single scalar field into a message. 

1016 

1017 Args: 

1018 tokenizer: A tokenizer to parse the field value. 

1019 message: The message of which field is a member. 

1020 field: The descriptor of the field to be merged. 

1021 

1022 Raises: 

1023 ParseError: In case of text parsing problems. 

1024 """ 

1025 is_map_entry = _IsMapEntry(field) 

1026 

1027 if tokenizer.TryConsume('<'): 

1028 end_token = '>' 

1029 else: 

1030 tokenizer.Consume('{') 

1031 end_token = '}' 

1032 

1033 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1034 if field.is_extension: 

1035 sub_message = message.Extensions[field].add() 

1036 elif is_map_entry: 

1037 sub_message = getattr(message, field.name).GetEntryClass()() 

1038 else: 

1039 sub_message = getattr(message, field.name).add() 

1040 else: 

1041 if field.is_extension: 

1042 if (not self._allow_multiple_scalars and 

1043 message.HasExtension(field)): 

1044 raise tokenizer.ParseErrorPreviousToken( 

1045 'Message type "%s" should not have multiple "%s" extensions.' % 

1046 (message.DESCRIPTOR.full_name, field.full_name)) 

1047 sub_message = message.Extensions[field] 

1048 else: 

1049 # Also apply _allow_multiple_scalars to message field. 

1050 # TODO: Change to _allow_singular_overwrites. 

1051 if (not self._allow_multiple_scalars and 

1052 message.HasField(field.name)): 

1053 raise tokenizer.ParseErrorPreviousToken( 

1054 'Message type "%s" should not have multiple "%s" fields.' % 

1055 (message.DESCRIPTOR.full_name, field.name)) 

1056 sub_message = getattr(message, field.name) 

1057 sub_message.SetInParent() 

1058 

1059 while not tokenizer.TryConsume(end_token): 

1060 if tokenizer.AtEnd(): 

1061 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 

1062 self._MergeField(tokenizer, sub_message) 

1063 

1064 if is_map_entry: 

1065 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1066 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1067 value = getattr(message, field.name)[sub_message.key] 

1068 value.CopyFrom(sub_message.value) 

1069 else: 

1070 getattr(message, field.name)[sub_message.key] = sub_message.value 

1071 

1072 def _MergeScalarField(self, tokenizer, message, field): 

1073 """Merges a single scalar field into a message. 

1074 

1075 Args: 

1076 tokenizer: A tokenizer to parse the field value. 

1077 message: A protocol message to record the data. 

1078 field: The descriptor of the field to be merged. 

1079 

1080 Raises: 

1081 ParseError: In case of text parsing problems. 

1082 RuntimeError: On runtime errors. 

1083 """ 

1084 _ = self.allow_unknown_extension 

1085 value = None 

1086 

1087 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1088 descriptor.FieldDescriptor.TYPE_SINT32, 

1089 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1090 value = _ConsumeInt32(tokenizer) 

1091 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1092 descriptor.FieldDescriptor.TYPE_SINT64, 

1093 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1094 value = _ConsumeInt64(tokenizer) 

1095 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1096 descriptor.FieldDescriptor.TYPE_FIXED32): 

1097 value = _ConsumeUint32(tokenizer) 

1098 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1099 descriptor.FieldDescriptor.TYPE_FIXED64): 

1100 value = _ConsumeUint64(tokenizer) 

1101 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1102 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1103 value = tokenizer.ConsumeFloat() 

1104 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1105 value = tokenizer.ConsumeBool() 

1106 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1107 value = tokenizer.ConsumeString() 

1108 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1109 value = tokenizer.ConsumeByteString() 

1110 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1111 value = tokenizer.ConsumeEnum(field) 

1112 else: 

1113 raise RuntimeError('Unknown field type %d' % field.type) 

1114 

1115 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1116 if field.is_extension: 

1117 message.Extensions[field].append(value) 

1118 else: 

1119 getattr(message, field.name).append(value) 

1120 else: 

1121 if field.is_extension: 

1122 if (not self._allow_multiple_scalars and 

1123 field.has_presence and 

1124 message.HasExtension(field)): 

1125 raise tokenizer.ParseErrorPreviousToken( 

1126 'Message type "%s" should not have multiple "%s" extensions.' % 

1127 (message.DESCRIPTOR.full_name, field.full_name)) 

1128 else: 

1129 message.Extensions[field] = value 

1130 else: 

1131 duplicate_error = False 

1132 if not self._allow_multiple_scalars: 

1133 if field.has_presence: 

1134 duplicate_error = message.HasField(field.name) 

1135 else: 

1136 # For field that doesn't represent presence, try best effort to 

1137 # check multiple scalars by compare to default values. 

1138 duplicate_error = bool(getattr(message, field.name)) 

1139 

1140 if duplicate_error: 

1141 raise tokenizer.ParseErrorPreviousToken( 

1142 'Message type "%s" should not have multiple "%s" fields.' % 

1143 (message.DESCRIPTOR.full_name, field.name)) 

1144 else: 

1145 setattr(message, field.name, value) 

1146 

1147 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1148 """Skips over contents (value or message) of a field. 

1149 

1150 Args: 

1151 tokenizer: A tokenizer to parse the field name and values. 

1152 field_name: The field name currently being parsed. 

1153 immediate_message_type: The type of the message immediately containing 

1154 the silent marker. 

1155 """ 

1156 # Try to guess the type of this field. 

1157 # If this field is not a message, there should be a ":" between the 

1158 # field name and the field value and also the field value should not 

1159 # start with "{" or "<" which indicates the beginning of a message body. 

1160 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1161 # to be a message or the input is ill-formed. 

1162 if tokenizer.TryConsume( 

1163 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1164 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1165 if tokenizer.LookingAt('['): 

1166 self._SkipRepeatedFieldValue(tokenizer) 

1167 else: 

1168 self._SkipFieldValue(tokenizer) 

1169 else: 

1170 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1171 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1172 

1173 def _SkipField(self, tokenizer, immediate_message_type): 

1174 """Skips over a complete field (name and value/message). 

1175 

1176 Args: 

1177 tokenizer: A tokenizer to parse the field name and values. 

1178 immediate_message_type: The type of the message immediately containing 

1179 the silent marker. 

1180 """ 

1181 field_name = '' 

1182 if tokenizer.TryConsume('['): 

1183 # Consume extension or google.protobuf.Any type URL 

1184 field_name += '[' + tokenizer.ConsumeIdentifier() 

1185 num_identifiers = 1 

1186 while tokenizer.TryConsume('.'): 

1187 field_name += '.' + tokenizer.ConsumeIdentifier() 

1188 num_identifiers += 1 

1189 # This is possibly a type URL for an Any message. 

1190 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1191 field_name += '/' + tokenizer.ConsumeIdentifier() 

1192 while tokenizer.TryConsume('.'): 

1193 field_name += '.' + tokenizer.ConsumeIdentifier() 

1194 tokenizer.Consume(']') 

1195 field_name += ']' 

1196 else: 

1197 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1198 

1199 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1200 

1201 # For historical reasons, fields may optionally be separated by commas or 

1202 # semicolons. 

1203 if not tokenizer.TryConsume(','): 

1204 tokenizer.TryConsume(';') 

1205 

1206 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1207 """Skips over a field message. 

1208 

1209 Args: 

1210 tokenizer: A tokenizer to parse the field name and values. 

1211 immediate_message_type: The type of the message immediately containing 

1212 the silent marker 

1213 """ 

1214 if tokenizer.TryConsume('<'): 

1215 delimiter = '>' 

1216 else: 

1217 tokenizer.Consume('{') 

1218 delimiter = '}' 

1219 

1220 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1221 self._SkipField(tokenizer, immediate_message_type) 

1222 

1223 tokenizer.Consume(delimiter) 

1224 

1225 def _SkipFieldValue(self, tokenizer): 

1226 """Skips over a field value. 

1227 

1228 Args: 

1229 tokenizer: A tokenizer to parse the field name and values. 

1230 

1231 Raises: 

1232 ParseError: In case an invalid field value is found. 

1233 """ 

1234 if (not tokenizer.TryConsumeByteString()and 

1235 not tokenizer.TryConsumeIdentifier() and 

1236 not _TryConsumeInt64(tokenizer) and 

1237 not _TryConsumeUint64(tokenizer) and 

1238 not tokenizer.TryConsumeFloat()): 

1239 raise ParseError('Invalid field value: ' + tokenizer.token) 

1240 

1241 def _SkipRepeatedFieldValue(self, tokenizer): 

1242 """Skips over a repeated field value. 

1243 

1244 Args: 

1245 tokenizer: A tokenizer to parse the field value. 

1246 """ 

1247 tokenizer.Consume('[') 

1248 if not tokenizer.LookingAt(']'): 

1249 self._SkipFieldValue(tokenizer) 

1250 while tokenizer.TryConsume(','): 

1251 self._SkipFieldValue(tokenizer) 

1252 tokenizer.Consume(']') 

1253 

1254 

1255class Tokenizer(object): 

1256 """Protocol buffer text representation tokenizer. 

1257 

1258 This class handles the lower level string parsing by splitting it into 

1259 meaningful tokens. 

1260 

1261 It was directly ported from the Java protocol buffer API. 

1262 """ 

1263 

1264 _WHITESPACE = re.compile(r'\s+') 

1265 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1266 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1267 _TOKEN = re.compile('|'.join([ 

1268 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1269 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1270 ] + [ # quoted str for each quote mark 

1271 # Avoid backtracking! https://stackoverflow.com/a/844267 

1272 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 

1273 for mark in _QUOTES 

1274 ])) 

1275 

1276 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1277 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1278 

1279 def __init__(self, lines, skip_comments=True): 

1280 self._position = 0 

1281 self._line = -1 

1282 self._column = 0 

1283 self._token_start = None 

1284 self.token = '' 

1285 self._lines = iter(lines) 

1286 self._current_line = '' 

1287 self._previous_line = 0 

1288 self._previous_column = 0 

1289 self._more_lines = True 

1290 self._skip_comments = skip_comments 

1291 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1292 or self._WHITESPACE) 

1293 self.contains_silent_marker_before_current_token = False 

1294 

1295 self._SkipWhitespace() 

1296 self.NextToken() 

1297 

1298 def LookingAt(self, token): 

1299 return self.token == token 

1300 

1301 def AtEnd(self): 

1302 """Checks the end of the text was reached. 

1303 

1304 Returns: 

1305 True iff the end was reached. 

1306 """ 

1307 return not self.token 

1308 

1309 def _PopLine(self): 

1310 while len(self._current_line) <= self._column: 

1311 try: 

1312 self._current_line = next(self._lines) 

1313 except StopIteration: 

1314 self._current_line = '' 

1315 self._more_lines = False 

1316 return 

1317 else: 

1318 self._line += 1 

1319 self._column = 0 

1320 

1321 def _SkipWhitespace(self): 

1322 while True: 

1323 self._PopLine() 

1324 match = self._whitespace_pattern.match(self._current_line, self._column) 

1325 if not match: 

1326 break 

1327 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1328 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1329 length = len(match.group(0)) 

1330 self._column += length 

1331 

1332 def TryConsume(self, token): 

1333 """Tries to consume a given piece of text. 

1334 

1335 Args: 

1336 token: Text to consume. 

1337 

1338 Returns: 

1339 True iff the text was consumed. 

1340 """ 

1341 if self.token == token: 

1342 self.NextToken() 

1343 return True 

1344 return False 

1345 

1346 def Consume(self, token): 

1347 """Consumes a piece of text. 

1348 

1349 Args: 

1350 token: Text to consume. 

1351 

1352 Raises: 

1353 ParseError: If the text couldn't be consumed. 

1354 """ 

1355 if not self.TryConsume(token): 

1356 raise self.ParseError('Expected "%s".' % token) 

1357 

1358 def ConsumeComment(self): 

1359 result = self.token 

1360 if not self._COMMENT.match(result): 

1361 raise self.ParseError('Expected comment.') 

1362 self.NextToken() 

1363 return result 

1364 

1365 def ConsumeCommentOrTrailingComment(self): 

1366 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1367 

1368 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1369 # tokenizer starts, it looks like there is a previous token on the line. 

1370 just_started = self._line == 0 and self._column == 0 

1371 

1372 before_parsing = self._previous_line 

1373 comment = self.ConsumeComment() 

1374 

1375 # A trailing comment is a comment on the same line than the previous token. 

1376 trailing = (self._previous_line == before_parsing 

1377 and not just_started) 

1378 

1379 return trailing, comment 

1380 

1381 def TryConsumeIdentifier(self): 

1382 try: 

1383 self.ConsumeIdentifier() 

1384 return True 

1385 except ParseError: 

1386 return False 

1387 

1388 def ConsumeIdentifier(self): 

1389 """Consumes protocol message field identifier. 

1390 

1391 Returns: 

1392 Identifier string. 

1393 

1394 Raises: 

1395 ParseError: If an identifier couldn't be consumed. 

1396 """ 

1397 result = self.token 

1398 if not self._IDENTIFIER.match(result): 

1399 raise self.ParseError('Expected identifier.') 

1400 self.NextToken() 

1401 return result 

1402 

1403 def TryConsumeIdentifierOrNumber(self): 

1404 try: 

1405 self.ConsumeIdentifierOrNumber() 

1406 return True 

1407 except ParseError: 

1408 return False 

1409 

1410 def ConsumeIdentifierOrNumber(self): 

1411 """Consumes protocol message field identifier. 

1412 

1413 Returns: 

1414 Identifier string. 

1415 

1416 Raises: 

1417 ParseError: If an identifier couldn't be consumed. 

1418 """ 

1419 result = self.token 

1420 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1421 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1422 self.NextToken() 

1423 return result 

1424 

1425 def TryConsumeInteger(self): 

1426 try: 

1427 self.ConsumeInteger() 

1428 return True 

1429 except ParseError: 

1430 return False 

1431 

1432 def ConsumeInteger(self): 

1433 """Consumes an integer number. 

1434 

1435 Returns: 

1436 The integer parsed. 

1437 

1438 Raises: 

1439 ParseError: If an integer couldn't be consumed. 

1440 """ 

1441 try: 

1442 result = _ParseAbstractInteger(self.token) 

1443 except ValueError as e: 

1444 raise self.ParseError(str(e)) 

1445 self.NextToken() 

1446 return result 

1447 

1448 def TryConsumeFloat(self): 

1449 try: 

1450 self.ConsumeFloat() 

1451 return True 

1452 except ParseError: 

1453 return False 

1454 

1455 def ConsumeFloat(self): 

1456 """Consumes an floating point number. 

1457 

1458 Returns: 

1459 The number parsed. 

1460 

1461 Raises: 

1462 ParseError: If a floating point number couldn't be consumed. 

1463 """ 

1464 try: 

1465 result = ParseFloat(self.token) 

1466 except ValueError as e: 

1467 raise self.ParseError(str(e)) 

1468 self.NextToken() 

1469 return result 

1470 

1471 def ConsumeBool(self): 

1472 """Consumes a boolean value. 

1473 

1474 Returns: 

1475 The bool parsed. 

1476 

1477 Raises: 

1478 ParseError: If a boolean value couldn't be consumed. 

1479 """ 

1480 try: 

1481 result = ParseBool(self.token) 

1482 except ValueError as e: 

1483 raise self.ParseError(str(e)) 

1484 self.NextToken() 

1485 return result 

1486 

1487 def TryConsumeByteString(self): 

1488 try: 

1489 self.ConsumeByteString() 

1490 return True 

1491 except ParseError: 

1492 return False 

1493 

1494 def ConsumeString(self): 

1495 """Consumes a string value. 

1496 

1497 Returns: 

1498 The string parsed. 

1499 

1500 Raises: 

1501 ParseError: If a string value couldn't be consumed. 

1502 """ 

1503 the_bytes = self.ConsumeByteString() 

1504 try: 

1505 return str(the_bytes, 'utf-8') 

1506 except UnicodeDecodeError as e: 

1507 raise self._StringParseError(e) 

1508 

1509 def ConsumeByteString(self): 

1510 """Consumes a byte array value. 

1511 

1512 Returns: 

1513 The array parsed (as a string). 

1514 

1515 Raises: 

1516 ParseError: If a byte array value couldn't be consumed. 

1517 """ 

1518 the_list = [self._ConsumeSingleByteString()] 

1519 while self.token and self.token[0] in _QUOTES: 

1520 the_list.append(self._ConsumeSingleByteString()) 

1521 return b''.join(the_list) 

1522 

1523 def _ConsumeSingleByteString(self): 

1524 """Consume one token of a string literal. 

1525 

1526 String literals (whether bytes or text) can come in multiple adjacent 

1527 tokens which are automatically concatenated, like in C or Python. This 

1528 method only consumes one token. 

1529 

1530 Returns: 

1531 The token parsed. 

1532 Raises: 

1533 ParseError: When the wrong format data is found. 

1534 """ 

1535 text = self.token 

1536 if len(text) < 1 or text[0] not in _QUOTES: 

1537 raise self.ParseError('Expected string but found: %r' % (text,)) 

1538 

1539 if len(text) < 2 or text[-1] != text[0]: 

1540 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1541 

1542 try: 

1543 result = text_encoding.CUnescape(text[1:-1]) 

1544 except ValueError as e: 

1545 raise self.ParseError(str(e)) 

1546 self.NextToken() 

1547 return result 

1548 

1549 def ConsumeEnum(self, field): 

1550 try: 

1551 result = ParseEnum(field, self.token) 

1552 except ValueError as e: 

1553 raise self.ParseError(str(e)) 

1554 self.NextToken() 

1555 return result 

1556 

1557 def ParseErrorPreviousToken(self, message): 

1558 """Creates and *returns* a ParseError for the previously read token. 

1559 

1560 Args: 

1561 message: A message to set for the exception. 

1562 

1563 Returns: 

1564 A ParseError instance. 

1565 """ 

1566 return ParseError(message, self._previous_line + 1, 

1567 self._previous_column + 1) 

1568 

1569 def ParseError(self, message): 

1570 """Creates and *returns* a ParseError for the current token.""" 

1571 return ParseError('\'' + self._current_line + '\': ' + message, 

1572 self._line + 1, self._column + 1) 

1573 

1574 def _StringParseError(self, e): 

1575 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1576 

1577 def NextToken(self): 

1578 """Reads the next meaningful token.""" 

1579 self._previous_line = self._line 

1580 self._previous_column = self._column 

1581 self.contains_silent_marker_before_current_token = False 

1582 

1583 self._column += len(self.token) 

1584 self._SkipWhitespace() 

1585 

1586 if not self._more_lines: 

1587 self.token = '' 

1588 return 

1589 

1590 match = self._TOKEN.match(self._current_line, self._column) 

1591 if not match and not self._skip_comments: 

1592 match = self._COMMENT.match(self._current_line, self._column) 

1593 if match: 

1594 token = match.group(0) 

1595 self.token = token 

1596 else: 

1597 self.token = self._current_line[self._column] 

1598 

1599# Aliased so it can still be accessed by current visibility violators. 

1600# TODO: Migrate violators to textformat_tokenizer. 

1601_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1602 

1603 

1604def _ConsumeInt32(tokenizer): 

1605 """Consumes a signed 32bit integer number from tokenizer. 

1606 

1607 Args: 

1608 tokenizer: A tokenizer used to parse the number. 

1609 

1610 Returns: 

1611 The integer parsed. 

1612 

1613 Raises: 

1614 ParseError: If a signed 32bit integer couldn't be consumed. 

1615 """ 

1616 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1617 

1618 

1619def _ConsumeUint32(tokenizer): 

1620 """Consumes an unsigned 32bit integer number from tokenizer. 

1621 

1622 Args: 

1623 tokenizer: A tokenizer used to parse the number. 

1624 

1625 Returns: 

1626 The integer parsed. 

1627 

1628 Raises: 

1629 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1630 """ 

1631 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1632 

1633 

1634def _TryConsumeInt64(tokenizer): 

1635 try: 

1636 _ConsumeInt64(tokenizer) 

1637 return True 

1638 except ParseError: 

1639 return False 

1640 

1641 

1642def _ConsumeInt64(tokenizer): 

1643 """Consumes a signed 32bit integer number from tokenizer. 

1644 

1645 Args: 

1646 tokenizer: A tokenizer used to parse the number. 

1647 

1648 Returns: 

1649 The integer parsed. 

1650 

1651 Raises: 

1652 ParseError: If a signed 32bit integer couldn't be consumed. 

1653 """ 

1654 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1655 

1656 

1657def _TryConsumeUint64(tokenizer): 

1658 try: 

1659 _ConsumeUint64(tokenizer) 

1660 return True 

1661 except ParseError: 

1662 return False 

1663 

1664 

1665def _ConsumeUint64(tokenizer): 

1666 """Consumes an unsigned 64bit integer number from tokenizer. 

1667 

1668 Args: 

1669 tokenizer: A tokenizer used to parse the number. 

1670 

1671 Returns: 

1672 The integer parsed. 

1673 

1674 Raises: 

1675 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1676 """ 

1677 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1678 

1679 

1680def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1681 """Consumes an integer number from tokenizer. 

1682 

1683 Args: 

1684 tokenizer: A tokenizer used to parse the number. 

1685 is_signed: True if a signed integer must be parsed. 

1686 is_long: True if a long integer must be parsed. 

1687 

1688 Returns: 

1689 The integer parsed. 

1690 

1691 Raises: 

1692 ParseError: If an integer with given characteristics couldn't be consumed. 

1693 """ 

1694 try: 

1695 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1696 except ValueError as e: 

1697 raise tokenizer.ParseError(str(e)) 

1698 tokenizer.NextToken() 

1699 return result 

1700 

1701 

1702def ParseInteger(text, is_signed=False, is_long=False): 

1703 """Parses an integer. 

1704 

1705 Args: 

1706 text: The text to parse. 

1707 is_signed: True if a signed integer must be parsed. 

1708 is_long: True if a long integer must be parsed. 

1709 

1710 Returns: 

1711 The integer value. 

1712 

1713 Raises: 

1714 ValueError: Thrown Iff the text is not a valid integer. 

1715 """ 

1716 # Do the actual parsing. Exception handling is propagated to caller. 

1717 result = _ParseAbstractInteger(text) 

1718 

1719 # Check if the integer is sane. Exceptions handled by callers. 

1720 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1721 checker.CheckValue(result) 

1722 return result 

1723 

1724 

1725def _ParseAbstractInteger(text): 

1726 """Parses an integer without checking size/signedness. 

1727 

1728 Args: 

1729 text: The text to parse. 

1730 

1731 Returns: 

1732 The integer value. 

1733 

1734 Raises: 

1735 ValueError: Thrown Iff the text is not a valid integer. 

1736 """ 

1737 # Do the actual parsing. Exception handling is propagated to caller. 

1738 orig_text = text 

1739 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1740 if c_octal_match: 

1741 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1742 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1743 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1744 try: 

1745 return int(text, 0) 

1746 except ValueError: 

1747 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1748 

1749 

1750def ParseFloat(text): 

1751 """Parse a floating point number. 

1752 

1753 Args: 

1754 text: Text to parse. 

1755 

1756 Returns: 

1757 The number parsed. 

1758 

1759 Raises: 

1760 ValueError: If a floating point number couldn't be parsed. 

1761 """ 

1762 try: 

1763 # Assume Python compatible syntax. 

1764 return float(text) 

1765 except ValueError: 

1766 # Check alternative spellings. 

1767 if _FLOAT_INFINITY.match(text): 

1768 if text[0] == '-': 

1769 return float('-inf') 

1770 else: 

1771 return float('inf') 

1772 elif _FLOAT_NAN.match(text): 

1773 return float('nan') 

1774 else: 

1775 # assume '1.0f' format 

1776 try: 

1777 return float(text.rstrip('f')) 

1778 except ValueError: 

1779 raise ValueError('Couldn\'t parse float: %s' % text) 

1780 

1781 

1782def ParseBool(text): 

1783 """Parse a boolean value. 

1784 

1785 Args: 

1786 text: Text to parse. 

1787 

1788 Returns: 

1789 Boolean values parsed 

1790 

1791 Raises: 

1792 ValueError: If text is not a valid boolean. 

1793 """ 

1794 if text in ('true', 't', '1', 'True'): 

1795 return True 

1796 elif text in ('false', 'f', '0', 'False'): 

1797 return False 

1798 else: 

1799 raise ValueError('Expected "true" or "false".') 

1800 

1801 

1802def ParseEnum(field, value): 

1803 """Parse an enum value. 

1804 

1805 The value can be specified by a number (the enum value), or by 

1806 a string literal (the enum name). 

1807 

1808 Args: 

1809 field: Enum field descriptor. 

1810 value: String value. 

1811 

1812 Returns: 

1813 Enum value number. 

1814 

1815 Raises: 

1816 ValueError: If the enum value could not be parsed. 

1817 """ 

1818 enum_descriptor = field.enum_type 

1819 try: 

1820 number = int(value, 0) 

1821 except ValueError: 

1822 # Identifier. 

1823 enum_value = enum_descriptor.values_by_name.get(value, None) 

1824 if enum_value is None: 

1825 raise ValueError('Enum type "%s" has no value named %s.' % 

1826 (enum_descriptor.full_name, value)) 

1827 else: 

1828 if not field.enum_type.is_closed: 

1829 return number 

1830 enum_value = enum_descriptor.values_by_number.get(number, None) 

1831 if enum_value is None: 

1832 raise ValueError('Enum type "%s" has no value with number %d.' % 

1833 (enum_descriptor.full_name, number)) 

1834 return enum_value.number