Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/protobuf/text_format.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

770 statements  

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# 

4# Use of this source code is governed by a BSD-style 

5# license that can be found in the LICENSE file or at 

6# https://developers.google.com/open-source/licenses/bsd 

7 

8"""Contains routines for printing protocol messages in text format. 

9 

10Simple usage example:: 

11 

12 # Create a proto object and serialize it to a text proto string. 

13 message = my_proto_pb2.MyMessage(foo='bar') 

14 text_proto = text_format.MessageToString(message) 

15 

16 # Parse a text proto string. 

17 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

18""" 

19 

20__author__ = 'kenton@google.com (Kenton Varda)' 

21 

22# TODO Import thread contention leads to test failures. 

23import encodings.raw_unicode_escape # pylint: disable=unused-import 

24import encodings.unicode_escape # pylint: disable=unused-import 

25import io 

26import math 

27import re 

28import warnings 

29 

30from google.protobuf.internal import decoder 

31from google.protobuf.internal import type_checkers 

32from google.protobuf import descriptor 

33from google.protobuf import text_encoding 

34from google.protobuf import unknown_fields 

35 

36# pylint: disable=g-import-not-at-top 

37__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

38 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

39 

40_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

41 type_checkers.Int32ValueChecker(), 

42 type_checkers.Uint64ValueChecker(), 

43 type_checkers.Int64ValueChecker()) 

44_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

45_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

46_FLOAT_OCTAL_PREFIX = re.compile('-?0[0-9]+') 

47_QUOTES = frozenset(("'", '"')) 

48_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

49_DEBUG_STRING_SILENT_MARKER = '\t ' 

50 

51_as_utf8_default = True 

52 

53 

54class Error(Exception): 

55 """Top-level module error for text_format.""" 

56 

57 

58class ParseError(Error): 

59 """Thrown in case of text parsing or tokenizing error.""" 

60 

61 def __init__(self, message=None, line=None, column=None): 

62 if message is not None and line is not None: 

63 loc = str(line) 

64 if column is not None: 

65 loc += ':{0}'.format(column) 

66 message = '{0} : {1}'.format(loc, message) 

67 if message is not None: 

68 super(ParseError, self).__init__(message) 

69 else: 

70 super(ParseError, self).__init__() 

71 self._line = line 

72 self._column = column 

73 

74 def GetLine(self): 

75 return self._line 

76 

77 def GetColumn(self): 

78 return self._column 

79 

80 

81class TextWriter(object): 

82 

83 def __init__(self, as_utf8): 

84 self._writer = io.StringIO() 

85 

86 def write(self, val): 

87 return self._writer.write(val) 

88 

89 def close(self): 

90 return self._writer.close() 

91 

92 def getvalue(self): 

93 return self._writer.getvalue() 

94 

95 

96def MessageToString( 

97 message, 

98 as_utf8=_as_utf8_default, 

99 as_one_line=False, 

100 use_short_repeated_primitives=False, 

101 pointy_brackets=False, 

102 use_index_order=False, 

103 float_format=None, 

104 double_format=None, 

105 use_field_number=False, 

106 descriptor_pool=None, 

107 indent=0, 

108 message_formatter=None, 

109 print_unknown_fields=False, 

110 force_colon=False) -> str: 

111 """Convert protobuf message to text format. 

112 

113 Double values can be formatted compactly with 15 digits of 

114 precision (which is the most that IEEE 754 "double" can guarantee) 

115 using double_format='.15g'. To ensure that converting to text and back to a 

116 proto will result in an identical value, double_format='.17g' should be used. 

117 

118 Args: 

119 message: The protocol buffers message. 

120 as_utf8: Return unescaped Unicode for non-ASCII characters. 

121 as_one_line: Don't introduce newlines between fields. 

122 use_short_repeated_primitives: Use short repeated format for primitives. 

123 pointy_brackets: If True, use angle brackets instead of curly braces for 

124 nesting. 

125 use_index_order: If True, fields of a proto message will be printed using 

126 the order defined in source code instead of the field number, extensions 

127 will be printed at the end of the message and their relative order is 

128 determined by the extension number. By default, use the field number 

129 order. 

130 float_format (str): Deprecated. If set, use this to specify float field 

131 formatting (per the "Format Specification Mini-Language"); otherwise, 

132 shortest float that has same value in wire will be printed. Also affect 

133 double field if double_format is not set but float_format is set. 

134 double_format (str): Deprecated. If set, use this to specify double field 

135 formatting (per the "Format Specification Mini-Language"); if it is not 

136 set but float_format is set, use float_format. Otherwise, use ``str()`` 

137 use_field_number: If True, print field numbers instead of names. 

138 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

139 indent (int): The initial indent level, in terms of spaces, for pretty 

140 print. 

141 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

142 Custom formatter for selected sub-messages (usually based on message 

143 type). Use to pretty print parts of the protobuf for easier diffing. 

144 print_unknown_fields: If True, unknown fields will be printed. 

145 force_colon: If set, a colon will be added after the field name even if the 

146 field is a proto message. 

147 

148 Returns: 

149 str: A string of the text formatted protocol buffer message. 

150 """ 

151 out = TextWriter(as_utf8) 

152 printer = _Printer( 

153 out, 

154 indent, 

155 as_utf8, 

156 as_one_line, 

157 use_short_repeated_primitives, 

158 pointy_brackets, 

159 use_index_order, 

160 float_format, 

161 double_format, 

162 use_field_number, 

163 descriptor_pool, 

164 message_formatter, 

165 print_unknown_fields=print_unknown_fields, 

166 force_colon=force_colon) 

167 printer.PrintMessage(message) 

168 result = out.getvalue() 

169 out.close() 

170 if as_one_line: 

171 return result.rstrip() 

172 return result 

173 

174 

175def MessageToBytes(message, **kwargs) -> bytes: 

176 """Convert protobuf message to encoded text format. See MessageToString.""" 

177 text = MessageToString(message, **kwargs) 

178 if isinstance(text, bytes): 

179 return text 

180 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

181 return text.encode(codec) 

182 

183 

184def _IsMapEntry(field): 

185 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

186 field.message_type.has_options and 

187 field.message_type.GetOptions().map_entry) 

188 

189 

190def _IsGroupLike(field): 

191 """Determines if a field is consistent with a proto2 group. 

192 

193 Args: 

194 field: The field descriptor. 

195 

196 Returns: 

197 True if this field is group-like, false otherwise. 

198 """ 

199 # Groups are always tag-delimited. 

200 if field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

201 return False 

202 

203 # Group fields always are always the lowercase type name. 

204 if field.name != field.message_type.name.lower(): 

205 return False 

206 

207 if field.message_type.file != field.file: 

208 return False 

209 

210 # Group messages are always defined in the same scope as the field. File 

211 # level extensions will compare NULL == NULL here, which is why the file 

212 # comparison above is necessary to ensure both come from the same file. 

213 return ( 

214 field.message_type.containing_type == field.extension_scope 

215 if field.is_extension 

216 else field.message_type.containing_type == field.containing_type 

217 ) 

218 

219 

220def PrintMessage(message, 

221 out, 

222 indent=0, 

223 as_utf8=_as_utf8_default, 

224 as_one_line=False, 

225 use_short_repeated_primitives=False, 

226 pointy_brackets=False, 

227 use_index_order=False, 

228 float_format=None, 

229 double_format=None, 

230 use_field_number=False, 

231 descriptor_pool=None, 

232 message_formatter=None, 

233 print_unknown_fields=False, 

234 force_colon=False): 

235 """Convert the message to text format and write it to the out stream. 

236 

237 Args: 

238 message: The Message object to convert to text format. 

239 out: A file handle to write the message to. 

240 indent: The initial indent level for pretty print. 

241 as_utf8: Return unescaped Unicode for non-ASCII characters. 

242 as_one_line: Don't introduce newlines between fields. 

243 use_short_repeated_primitives: Use short repeated format for primitives. 

244 pointy_brackets: If True, use angle brackets instead of curly braces for 

245 nesting. 

246 use_index_order: If True, print fields of a proto message using the order 

247 defined in source code instead of the field number. By default, use the 

248 field number order. 

249 float_format: If set, use this to specify float field formatting 

250 (per the "Format Specification Mini-Language"); otherwise, shortest 

251 float that has same value in wire will be printed. Also affect double 

252 field if double_format is not set but float_format is set. 

253 double_format: If set, use this to specify double field formatting 

254 (per the "Format Specification Mini-Language"); if it is not set but 

255 float_format is set, use float_format. Otherwise, str() is used. 

256 use_field_number: If True, print field numbers instead of names. 

257 descriptor_pool: A DescriptorPool used to resolve Any types. 

258 message_formatter: A function(message, indent, as_one_line): unicode|None 

259 to custom format selected sub-messages (usually based on message type). 

260 Use to pretty print parts of the protobuf for easier diffing. 

261 print_unknown_fields: If True, unknown fields will be printed. 

262 force_colon: If set, a colon will be added after the field name even if 

263 the field is a proto message. 

264 """ 

265 printer = _Printer( 

266 out=out, indent=indent, as_utf8=as_utf8, 

267 as_one_line=as_one_line, 

268 use_short_repeated_primitives=use_short_repeated_primitives, 

269 pointy_brackets=pointy_brackets, 

270 use_index_order=use_index_order, 

271 float_format=float_format, 

272 double_format=double_format, 

273 use_field_number=use_field_number, 

274 descriptor_pool=descriptor_pool, 

275 message_formatter=message_formatter, 

276 print_unknown_fields=print_unknown_fields, 

277 force_colon=force_colon) 

278 printer.PrintMessage(message) 

279 

280 

281def PrintField(field, 

282 value, 

283 out, 

284 indent=0, 

285 as_utf8=_as_utf8_default, 

286 as_one_line=False, 

287 use_short_repeated_primitives=False, 

288 pointy_brackets=False, 

289 use_index_order=False, 

290 float_format=None, 

291 double_format=None, 

292 message_formatter=None, 

293 print_unknown_fields=False, 

294 force_colon=False): 

295 """Print a single field name/value pair.""" 

296 printer = _Printer(out, indent, as_utf8, as_one_line, 

297 use_short_repeated_primitives, pointy_brackets, 

298 use_index_order, float_format, double_format, 

299 message_formatter=message_formatter, 

300 print_unknown_fields=print_unknown_fields, 

301 force_colon=force_colon) 

302 printer.PrintField(field, value) 

303 

304 

305def PrintFieldValue(field, 

306 value, 

307 out, 

308 indent=0, 

309 as_utf8=_as_utf8_default, 

310 as_one_line=False, 

311 use_short_repeated_primitives=False, 

312 pointy_brackets=False, 

313 use_index_order=False, 

314 float_format=None, 

315 double_format=None, 

316 message_formatter=None, 

317 print_unknown_fields=False, 

318 force_colon=False): 

319 """Print a single field value (not including name).""" 

320 printer = _Printer(out, indent, as_utf8, as_one_line, 

321 use_short_repeated_primitives, pointy_brackets, 

322 use_index_order, float_format, double_format, 

323 message_formatter=message_formatter, 

324 print_unknown_fields=print_unknown_fields, 

325 force_colon=force_colon) 

326 printer.PrintFieldValue(field, value) 

327 

328 

329def _BuildMessageFromTypeName(type_name, descriptor_pool): 

330 """Returns a protobuf message instance. 

331 

332 Args: 

333 type_name: Fully-qualified protobuf message type name string. 

334 descriptor_pool: DescriptorPool instance. 

335 

336 Returns: 

337 A Message instance of type matching type_name, or None if the a Descriptor 

338 wasn't found matching type_name. 

339 """ 

340 # pylint: disable=g-import-not-at-top 

341 if descriptor_pool is None: 

342 from google.protobuf import descriptor_pool as pool_mod 

343 descriptor_pool = pool_mod.Default() 

344 from google.protobuf import message_factory 

345 try: 

346 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

347 except KeyError: 

348 return None 

349 message_type = message_factory.GetMessageClass(message_descriptor) 

350 return message_type() 

351 

352 

353# These values must match WireType enum in //google/protobuf/wire_format.h. 

354WIRETYPE_LENGTH_DELIMITED = 2 

355WIRETYPE_START_GROUP = 3 

356 

357 

358class _Printer(object): 

359 """Text format printer for protocol message.""" 

360 

361 def __init__( 

362 self, 

363 out, 

364 indent=0, 

365 as_utf8=_as_utf8_default, 

366 as_one_line=False, 

367 use_short_repeated_primitives=False, 

368 pointy_brackets=False, 

369 use_index_order=False, 

370 float_format=None, 

371 double_format=None, 

372 use_field_number=False, 

373 descriptor_pool=None, 

374 message_formatter=None, 

375 print_unknown_fields=False, 

376 force_colon=False): 

377 """Initialize the Printer. 

378 

379 Double values can be formatted compactly with 15 digits of precision 

380 (which is the most that IEEE 754 "double" can guarantee) using 

381 double_format='.15g'. To ensure that converting to text and back to a proto 

382 will result in an identical value, double_format='.17g' should be used. 

383 

384 Args: 

385 out: To record the text format result. 

386 indent: The initial indent level for pretty print. 

387 as_utf8: Return unescaped Unicode for non-ASCII characters. 

388 as_one_line: Don't introduce newlines between fields. 

389 use_short_repeated_primitives: Use short repeated format for primitives. 

390 pointy_brackets: If True, use angle brackets instead of curly braces for 

391 nesting. 

392 use_index_order: If True, print fields of a proto message using the order 

393 defined in source code instead of the field number. By default, use the 

394 field number order. 

395 float_format: Deprecated. If set, use this to specify float field 

396 formatting (per the "Format Specification Mini-Language"); otherwise, 

397 shortest float that has same value in wire will be printed. Also affect 

398 double field if double_format is not set but float_format is set. 

399 double_format: Deprecated. If set, use this to specify double field 

400 formatting (per the "Format Specification Mini-Language"); if it is not 

401 set but float_format is set, use float_format. Otherwise, str() is used. 

402 use_field_number: If True, print field numbers instead of names. 

403 descriptor_pool: A DescriptorPool used to resolve Any types. 

404 message_formatter: A function(message, indent, as_one_line): unicode|None 

405 to custom format selected sub-messages (usually based on message type). 

406 Use to pretty print parts of the protobuf for easier diffing. 

407 print_unknown_fields: If True, unknown fields will be printed. 

408 force_colon: If set, a colon will be added after the field name even if 

409 the field is a proto message. 

410 """ 

411 self.out = out 

412 self.indent = indent 

413 self.as_utf8 = as_utf8 

414 self.as_one_line = as_one_line 

415 self.use_short_repeated_primitives = use_short_repeated_primitives 

416 self.pointy_brackets = pointy_brackets 

417 self.use_index_order = use_index_order 

418 self.float_format = float_format 

419 if double_format is not None: 

420 warnings.warn( 

421 'double_format is deprecated for text_format. This will ' 

422 'turn into error in 7.34.0, please remove it before that.' 

423 ) 

424 self.double_format = double_format 

425 else: 

426 self.double_format = float_format 

427 self.use_field_number = use_field_number 

428 self.descriptor_pool = descriptor_pool 

429 self.message_formatter = message_formatter 

430 self.print_unknown_fields = print_unknown_fields 

431 self.force_colon = force_colon 

432 

433 def _TryPrintAsAnyMessage(self, message): 

434 """Serializes if message is a google.protobuf.Any field.""" 

435 if '/' not in message.type_url: 

436 return False 

437 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

438 self.descriptor_pool) 

439 if packed_message is not None: 

440 packed_message.MergeFromString(message.value) 

441 colon = ':' if self.force_colon else '' 

442 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

443 self._PrintMessageFieldValue(packed_message) 

444 self.out.write(' ' if self.as_one_line else '\n') 

445 return True 

446 else: 

447 return False 

448 

449 def _TryCustomFormatMessage(self, message): 

450 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

451 if formatted is None: 

452 return False 

453 

454 out = self.out 

455 out.write(' ' * self.indent) 

456 out.write(formatted) 

457 out.write(' ' if self.as_one_line else '\n') 

458 return True 

459 

460 def PrintMessage(self, message): 

461 """Convert protobuf message to text format. 

462 

463 Args: 

464 message: The protocol buffers message. 

465 """ 

466 if self.message_formatter and self._TryCustomFormatMessage(message): 

467 return 

468 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

469 self._TryPrintAsAnyMessage(message)): 

470 return 

471 fields = message.ListFields() 

472 if self.use_index_order: 

473 fields.sort( 

474 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

475 for field, value in fields: 

476 if _IsMapEntry(field): 

477 for key in sorted(value): 

478 # This is slow for maps with submessage entries because it copies the 

479 # entire tree. Unfortunately this would take significant refactoring 

480 # of this file to work around. 

481 # 

482 # TODO: refactor and optimize if this becomes an issue. 

483 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

484 self.PrintField(field, entry_submsg) 

485 elif field.is_repeated: 

486 if (self.use_short_repeated_primitives 

487 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

488 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

489 self._PrintShortRepeatedPrimitivesValue(field, value) 

490 else: 

491 for element in value: 

492 self.PrintField(field, element) 

493 else: 

494 self.PrintField(field, value) 

495 

496 if self.print_unknown_fields: 

497 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

498 

499 def _PrintUnknownFields(self, unknown_field_set): 

500 """Print unknown fields.""" 

501 out = self.out 

502 for field in unknown_field_set: 

503 out.write(' ' * self.indent) 

504 out.write(str(field.field_number)) 

505 if field.wire_type == WIRETYPE_START_GROUP: 

506 if self.as_one_line: 

507 out.write(' { ') 

508 else: 

509 out.write(' {\n') 

510 self.indent += 2 

511 

512 self._PrintUnknownFields(field.data) 

513 

514 if self.as_one_line: 

515 out.write('} ') 

516 else: 

517 self.indent -= 2 

518 out.write(' ' * self.indent + '}\n') 

519 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

520 try: 

521 # If this field is parseable as a Message, it is probably 

522 # an embedded message. 

523 # pylint: disable=protected-access 

524 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

525 memoryview(field.data), 0, len(field.data)) 

526 except Exception: # pylint: disable=broad-except 

527 pos = 0 

528 

529 if pos == len(field.data): 

530 if self.as_one_line: 

531 out.write(' { ') 

532 else: 

533 out.write(' {\n') 

534 self.indent += 2 

535 

536 self._PrintUnknownFields(embedded_unknown_message) 

537 

538 if self.as_one_line: 

539 out.write('} ') 

540 else: 

541 self.indent -= 2 

542 out.write(' ' * self.indent + '}\n') 

543 else: 

544 # A string or bytes field. self.as_utf8 may not work. 

545 out.write(': \"') 

546 out.write(text_encoding.CEscape(field.data, False)) 

547 out.write('\" ' if self.as_one_line else '\"\n') 

548 else: 

549 # varint, fixed32, fixed64 

550 out.write(': ') 

551 out.write(str(field.data)) 

552 out.write(' ' if self.as_one_line else '\n') 

553 

554 def _PrintFieldName(self, field): 

555 """Print field name.""" 

556 out = self.out 

557 out.write(' ' * self.indent) 

558 if self.use_field_number: 

559 out.write(str(field.number)) 

560 else: 

561 if field.is_extension: 

562 out.write('[') 

563 if (field.containing_type.GetOptions().message_set_wire_format and 

564 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

565 not field.is_required and 

566 not field.is_repeated): 

567 out.write(field.message_type.full_name) 

568 else: 

569 out.write(field.full_name) 

570 out.write(']') 

571 elif _IsGroupLike(field): 

572 # For groups, use the capitalized name. 

573 out.write(field.message_type.name) 

574 else: 

575 out.write(field.name) 

576 

577 if (self.force_colon or 

578 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

579 # The colon is optional in this case, but our cross-language golden files 

580 # don't include it. Here, the colon is only included if force_colon is 

581 # set to True 

582 out.write(':') 

583 

584 def PrintField(self, field, value): 

585 """Print a single field name/value pair.""" 

586 self._PrintFieldName(field) 

587 self.out.write(' ') 

588 self.PrintFieldValue(field, value) 

589 self.out.write(' ' if self.as_one_line else '\n') 

590 

591 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

592 """"Prints short repeated primitives value.""" 

593 # Note: this is called only when value has at least one element. 

594 self._PrintFieldName(field) 

595 self.out.write(' [') 

596 for i in range(len(value) - 1): 

597 self.PrintFieldValue(field, value[i]) 

598 self.out.write(', ') 

599 self.PrintFieldValue(field, value[-1]) 

600 self.out.write(']') 

601 self.out.write(' ' if self.as_one_line else '\n') 

602 

603 def _PrintMessageFieldValue(self, value): 

604 if self.pointy_brackets: 

605 openb = '<' 

606 closeb = '>' 

607 else: 

608 openb = '{' 

609 closeb = '}' 

610 

611 if self.as_one_line: 

612 self.out.write('%s ' % openb) 

613 self.PrintMessage(value) 

614 self.out.write(closeb) 

615 else: 

616 self.out.write('%s\n' % openb) 

617 self.indent += 2 

618 self.PrintMessage(value) 

619 self.indent -= 2 

620 self.out.write(' ' * self.indent + closeb) 

621 

622 def PrintFieldValue(self, field, value): 

623 """Print a single field value (not including name). 

624 

625 For repeated fields, the value should be a single element. 

626 

627 Args: 

628 field: The descriptor of the field to be printed. 

629 value: The value of the field. 

630 """ 

631 out = self.out 

632 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

633 self._PrintMessageFieldValue(value) 

634 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

635 enum_value = field.enum_type.values_by_number.get(value, None) 

636 if enum_value is not None: 

637 out.write(enum_value.name) 

638 else: 

639 out.write(str(value)) 

640 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

641 out.write('\"') 

642 if isinstance(value, str) and not self.as_utf8: 

643 out_value = value.encode('utf-8') 

644 else: 

645 out_value = value 

646 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

647 # We always need to escape all binary data in TYPE_BYTES fields. 

648 out_as_utf8 = False 

649 else: 

650 out_as_utf8 = self.as_utf8 

651 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

652 out.write('\"') 

653 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

654 if value: 

655 out.write('true') 

656 else: 

657 out.write('false') 

658 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

659 if self.float_format is not None: 

660 warnings.warn( 

661 'float_format is deprecated for text_format. This ' 

662 'will turn into error in 7.34.0, please remove it ' 

663 'before that.' 

664 ) 

665 out.write('{1:{0}}'.format(self.float_format, value)) 

666 else: 

667 if math.isnan(value): 

668 out.write(str(value)) 

669 else: 

670 out.write(str(type_checkers.ToShortestFloat(value))) 

671 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 

672 self.double_format is not None): 

673 out.write('{1:{0}}'.format(self.double_format, value)) 

674 else: 

675 out.write(str(value)) 

676 

677 

678def Parse(text, 

679 message, 

680 allow_unknown_extension=False, 

681 allow_field_number=False, 

682 descriptor_pool=None, 

683 allow_unknown_field=False): 

684 """Parses a text representation of a protocol message into a message. 

685 

686 NOTE: for historical reasons this function does not clear the input 

687 message. This is different from what the binary msg.ParseFrom(...) does. 

688 If text contains a field already set in message, the value is appended if the 

689 field is repeated. Otherwise, an error is raised. 

690 

691 Example:: 

692 

693 a = MyProto() 

694 a.repeated_field.append('test') 

695 b = MyProto() 

696 

697 # Repeated fields are combined 

698 text_format.Parse(repr(a), b) 

699 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

700 

701 # Non-repeated fields cannot be overwritten 

702 a.singular_field = 1 

703 b.singular_field = 2 

704 text_format.Parse(repr(a), b) # ParseError 

705 

706 # Binary version: 

707 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

708 

709 Caller is responsible for clearing the message as needed. 

710 

711 Args: 

712 text (str): Message text representation. 

713 message (Message): A protocol buffer message to merge into. 

714 allow_unknown_extension: if True, skip over missing extensions and keep 

715 parsing 

716 allow_field_number: if True, both field number and field name are allowed. 

717 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

718 allow_unknown_field: if True, skip over unknown field and keep 

719 parsing. Avoid to use this option if possible. It may hide some 

720 errors (e.g. spelling error on field name) 

721 

722 Returns: 

723 Message: The same message passed as argument. 

724 

725 Raises: 

726 ParseError: On text parsing problems. 

727 """ 

728 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

729 message, 

730 allow_unknown_extension, 

731 allow_field_number, 

732 descriptor_pool=descriptor_pool, 

733 allow_unknown_field=allow_unknown_field) 

734 

735 

736def Merge(text, 

737 message, 

738 allow_unknown_extension=False, 

739 allow_field_number=False, 

740 descriptor_pool=None, 

741 allow_unknown_field=False): 

742 """Parses a text representation of a protocol message into a message. 

743 

744 Like Parse(), but allows repeated values for a non-repeated field, and uses 

745 the last one. This means any non-repeated, top-level fields specified in text 

746 replace those in the message. 

747 

748 Args: 

749 text (str): Message text representation. 

750 message (Message): A protocol buffer message to merge into. 

751 allow_unknown_extension: if True, skip over missing extensions and keep 

752 parsing 

753 allow_field_number: if True, both field number and field name are allowed. 

754 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

755 allow_unknown_field: if True, skip over unknown field and keep 

756 parsing. Avoid to use this option if possible. It may hide some 

757 errors (e.g. spelling error on field name) 

758 

759 Returns: 

760 Message: The same message passed as argument. 

761 

762 Raises: 

763 ParseError: On text parsing problems. 

764 """ 

765 return MergeLines( 

766 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

767 message, 

768 allow_unknown_extension, 

769 allow_field_number, 

770 descriptor_pool=descriptor_pool, 

771 allow_unknown_field=allow_unknown_field) 

772 

773 

774def ParseLines(lines, 

775 message, 

776 allow_unknown_extension=False, 

777 allow_field_number=False, 

778 descriptor_pool=None, 

779 allow_unknown_field=False): 

780 """Parses a text representation of a protocol message into a message. 

781 

782 See Parse() for caveats. 

783 

784 Args: 

785 lines: An iterable of lines of a message's text representation. 

786 message: A protocol buffer message to merge into. 

787 allow_unknown_extension: if True, skip over missing extensions and keep 

788 parsing 

789 allow_field_number: if True, both field number and field name are allowed. 

790 descriptor_pool: A DescriptorPool used to resolve Any types. 

791 allow_unknown_field: if True, skip over unknown field and keep 

792 parsing. Avoid to use this option if possible. It may hide some 

793 errors (e.g. spelling error on field name) 

794 

795 Returns: 

796 The same message passed as argument. 

797 

798 Raises: 

799 ParseError: On text parsing problems. 

800 """ 

801 parser = _Parser(allow_unknown_extension, 

802 allow_field_number, 

803 descriptor_pool=descriptor_pool, 

804 allow_unknown_field=allow_unknown_field) 

805 return parser.ParseLines(lines, message) 

806 

807 

808def MergeLines(lines, 

809 message, 

810 allow_unknown_extension=False, 

811 allow_field_number=False, 

812 descriptor_pool=None, 

813 allow_unknown_field=False): 

814 """Parses a text representation of a protocol message into a message. 

815 

816 See Merge() for more details. 

817 

818 Args: 

819 lines: An iterable of lines of a message's text representation. 

820 message: A protocol buffer message to merge into. 

821 allow_unknown_extension: if True, skip over missing extensions and keep 

822 parsing 

823 allow_field_number: if True, both field number and field name are allowed. 

824 descriptor_pool: A DescriptorPool used to resolve Any types. 

825 allow_unknown_field: if True, skip over unknown field and keep 

826 parsing. Avoid to use this option if possible. It may hide some 

827 errors (e.g. spelling error on field name) 

828 

829 Returns: 

830 The same message passed as argument. 

831 

832 Raises: 

833 ParseError: On text parsing problems. 

834 """ 

835 parser = _Parser(allow_unknown_extension, 

836 allow_field_number, 

837 descriptor_pool=descriptor_pool, 

838 allow_unknown_field=allow_unknown_field) 

839 return parser.MergeLines(lines, message) 

840 

841 

842class _Parser(object): 

843 """Text format parser for protocol message.""" 

844 

845 def __init__(self, 

846 allow_unknown_extension=False, 

847 allow_field_number=False, 

848 descriptor_pool=None, 

849 allow_unknown_field=False): 

850 self.allow_unknown_extension = allow_unknown_extension 

851 self.allow_field_number = allow_field_number 

852 self.descriptor_pool = descriptor_pool 

853 self.allow_unknown_field = allow_unknown_field 

854 

855 def ParseLines(self, lines, message): 

856 """Parses a text representation of a protocol message into a message.""" 

857 self._allow_multiple_scalars = False 

858 self._ParseOrMerge(lines, message) 

859 return message 

860 

861 def MergeLines(self, lines, message): 

862 """Merges a text representation of a protocol message into a message.""" 

863 self._allow_multiple_scalars = True 

864 self._ParseOrMerge(lines, message) 

865 return message 

866 

867 def _ParseOrMerge(self, lines, message): 

868 """Converts a text representation of a protocol message into a message. 

869 

870 Args: 

871 lines: Lines of a message's text representation. 

872 message: A protocol buffer message to merge into. 

873 

874 Raises: 

875 ParseError: On text parsing problems. 

876 """ 

877 # Tokenize expects native str lines. 

878 try: 

879 str_lines = ( 

880 line if isinstance(line, str) else line.decode('utf-8') 

881 for line in lines) 

882 tokenizer = Tokenizer(str_lines) 

883 except UnicodeDecodeError as e: 

884 raise ParseError from e 

885 if message: 

886 self.root_type = message.DESCRIPTOR.full_name 

887 while not tokenizer.AtEnd(): 

888 self._MergeField(tokenizer, message) 

889 

890 def _MergeField(self, tokenizer, message): 

891 """Merges a single protocol message field into a message. 

892 

893 Args: 

894 tokenizer: A tokenizer to parse the field name and values. 

895 message: A protocol message to record the data. 

896 

897 Raises: 

898 ParseError: In case of text parsing problems. 

899 """ 

900 message_descriptor = message.DESCRIPTOR 

901 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

902 tokenizer.TryConsume('[')): 

903 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

904 tokenizer.Consume(']') 

905 tokenizer.TryConsume(':') 

906 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

907 type_url_prefix + '/' + packed_type_name) 

908 if tokenizer.TryConsume('<'): 

909 expanded_any_end_token = '>' 

910 else: 

911 tokenizer.Consume('{') 

912 expanded_any_end_token = '}' 

913 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

914 self.descriptor_pool) 

915 # Direct comparison with None is used instead of implicit bool conversion 

916 # to avoid false positives with falsy initial values, e.g. for 

917 # google.protobuf.ListValue. 

918 if expanded_any_sub_message is None: 

919 raise ParseError('Type %s not found in descriptor pool' % 

920 packed_type_name) 

921 while not tokenizer.TryConsume(expanded_any_end_token): 

922 if tokenizer.AtEnd(): 

923 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 

924 (expanded_any_end_token,)) 

925 self._MergeField(tokenizer, expanded_any_sub_message) 

926 deterministic = False 

927 

928 message.Pack(expanded_any_sub_message, 

929 type_url_prefix=type_url_prefix, 

930 deterministic=deterministic) 

931 return 

932 

933 if tokenizer.TryConsume('['): 

934 name = [tokenizer.ConsumeIdentifier()] 

935 while tokenizer.TryConsume('.'): 

936 name.append(tokenizer.ConsumeIdentifier()) 

937 name = '.'.join(name) 

938 

939 if not message_descriptor.is_extendable: 

940 raise tokenizer.ParseErrorPreviousToken( 

941 'Message type "%s" does not have extensions.' % 

942 message_descriptor.full_name) 

943 # pylint: disable=protected-access 

944 field = message.Extensions._FindExtensionByName(name) 

945 # pylint: enable=protected-access 

946 if not field: 

947 if self.allow_unknown_extension: 

948 field = None 

949 else: 

950 raise tokenizer.ParseErrorPreviousToken( 

951 'Extension "%s" not registered. ' 

952 'Did you import the _pb2 module which defines it? ' 

953 'If you are trying to place the extension in the MessageSet ' 

954 'field of another message that is in an Any or MessageSet field, ' 

955 'that message\'s _pb2 module must be imported as well' % name) 

956 elif message_descriptor != field.containing_type: 

957 raise tokenizer.ParseErrorPreviousToken( 

958 'Extension "%s" does not extend message type "%s".' % 

959 (name, message_descriptor.full_name)) 

960 

961 tokenizer.Consume(']') 

962 

963 else: 

964 name = tokenizer.ConsumeIdentifierOrNumber() 

965 if self.allow_field_number and name.isdigit(): 

966 number = ParseInteger(name, True, True) 

967 field = message_descriptor.fields_by_number.get(number, None) 

968 if not field and message_descriptor.is_extendable: 

969 field = message.Extensions._FindExtensionByNumber(number) 

970 else: 

971 field = message_descriptor.fields_by_name.get(name, None) 

972 

973 # Group names are expected to be capitalized as they appear in the 

974 # .proto file, which actually matches their type names, not their field 

975 # names. 

976 if not field: 

977 field = message_descriptor.fields_by_name.get(name.lower(), None) 

978 if field and not _IsGroupLike(field): 

979 field = None 

980 if field and field.message_type.name != name: 

981 field = None 

982 

983 if not field and not self.allow_unknown_field: 

984 raise tokenizer.ParseErrorPreviousToken( 

985 'Message type "%s" has no field named "%s".' % 

986 (message_descriptor.full_name, name)) 

987 

988 if field: 

989 if not self._allow_multiple_scalars and field.containing_oneof: 

990 # Check if there's a different field set in this oneof. 

991 # Note that we ignore the case if the same field was set before, and we 

992 # apply _allow_multiple_scalars to non-scalar fields as well. 

993 which_oneof = message.WhichOneof(field.containing_oneof.name) 

994 if which_oneof is not None and which_oneof != field.name: 

995 raise tokenizer.ParseErrorPreviousToken( 

996 'Field "%s" is specified along with field "%s", another member ' 

997 'of oneof "%s" for message type "%s".' % 

998 (field.name, which_oneof, field.containing_oneof.name, 

999 message_descriptor.full_name)) 

1000 

1001 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1002 tokenizer.TryConsume(':') 

1003 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

1004 field.full_name) 

1005 merger = self._MergeMessageField 

1006 else: 

1007 tokenizer.Consume(':') 

1008 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

1009 field.full_name) 

1010 merger = self._MergeScalarField 

1011 

1012 if (field.is_repeated and 

1013 tokenizer.TryConsume('[')): 

1014 # Short repeated format, e.g. "foo: [1, 2, 3]" 

1015 if not tokenizer.TryConsume(']'): 

1016 while True: 

1017 merger(tokenizer, message, field) 

1018 if tokenizer.TryConsume(']'): 

1019 break 

1020 tokenizer.Consume(',') 

1021 

1022 else: 

1023 merger(tokenizer, message, field) 

1024 

1025 else: # Proto field is unknown. 

1026 assert (self.allow_unknown_extension or self.allow_unknown_field) 

1027 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

1028 

1029 # For historical reasons, fields may optionally be separated by commas or 

1030 # semicolons. 

1031 if not tokenizer.TryConsume(','): 

1032 tokenizer.TryConsume(';') 

1033 

1034 def _LogSilentMarker(self, immediate_message_type, field_name): 

1035 pass 

1036 

1037 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

1038 if tokenizer.contains_silent_marker_before_current_token: 

1039 self._LogSilentMarker(immediate_message_type, field_name) 

1040 

1041 def _ConsumeAnyTypeUrl(self, tokenizer): 

1042 """Consumes a google.protobuf.Any type URL and returns the type name.""" 

1043 # Consume "type.googleapis.com/". 

1044 prefix = [tokenizer.ConsumeIdentifier()] 

1045 tokenizer.Consume('.') 

1046 prefix.append(tokenizer.ConsumeIdentifier()) 

1047 tokenizer.Consume('.') 

1048 prefix.append(tokenizer.ConsumeIdentifier()) 

1049 tokenizer.Consume('/') 

1050 # Consume the fully-qualified type name. 

1051 name = [tokenizer.ConsumeIdentifier()] 

1052 while tokenizer.TryConsume('.'): 

1053 name.append(tokenizer.ConsumeIdentifier()) 

1054 return '.'.join(prefix), '.'.join(name) 

1055 

1056 def _MergeMessageField(self, tokenizer, message, field): 

1057 """Merges a single scalar field into a message. 

1058 

1059 Args: 

1060 tokenizer: A tokenizer to parse the field value. 

1061 message: The message of which field is a member. 

1062 field: The descriptor of the field to be merged. 

1063 

1064 Raises: 

1065 ParseError: In case of text parsing problems. 

1066 """ 

1067 is_map_entry = _IsMapEntry(field) 

1068 

1069 if tokenizer.TryConsume('<'): 

1070 end_token = '>' 

1071 else: 

1072 tokenizer.Consume('{') 

1073 end_token = '}' 

1074 

1075 if field.is_repeated: 

1076 if field.is_extension: 

1077 sub_message = message.Extensions[field].add() 

1078 elif is_map_entry: 

1079 sub_message = getattr(message, field.name).GetEntryClass()() 

1080 else: 

1081 sub_message = getattr(message, field.name).add() 

1082 else: 

1083 if field.is_extension: 

1084 if (not self._allow_multiple_scalars and 

1085 message.HasExtension(field)): 

1086 raise tokenizer.ParseErrorPreviousToken( 

1087 'Message type "%s" should not have multiple "%s" extensions.' % 

1088 (message.DESCRIPTOR.full_name, field.full_name)) 

1089 sub_message = message.Extensions[field] 

1090 else: 

1091 # Also apply _allow_multiple_scalars to message field. 

1092 # TODO: Change to _allow_singular_overwrites. 

1093 if (not self._allow_multiple_scalars and 

1094 message.HasField(field.name)): 

1095 raise tokenizer.ParseErrorPreviousToken( 

1096 'Message type "%s" should not have multiple "%s" fields.' % 

1097 (message.DESCRIPTOR.full_name, field.name)) 

1098 sub_message = getattr(message, field.name) 

1099 sub_message.SetInParent() 

1100 

1101 while not tokenizer.TryConsume(end_token): 

1102 if tokenizer.AtEnd(): 

1103 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 

1104 self._MergeField(tokenizer, sub_message) 

1105 

1106 if is_map_entry: 

1107 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1108 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1109 value = getattr(message, field.name)[sub_message.key] 

1110 value.CopyFrom(sub_message.value) 

1111 else: 

1112 getattr(message, field.name)[sub_message.key] = sub_message.value 

1113 

1114 def _MergeScalarField(self, tokenizer, message, field): 

1115 """Merges a single scalar field into a message. 

1116 

1117 Args: 

1118 tokenizer: A tokenizer to parse the field value. 

1119 message: A protocol message to record the data. 

1120 field: The descriptor of the field to be merged. 

1121 

1122 Raises: 

1123 ParseError: In case of text parsing problems. 

1124 RuntimeError: On runtime errors. 

1125 """ 

1126 _ = self.allow_unknown_extension 

1127 value = None 

1128 

1129 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1130 descriptor.FieldDescriptor.TYPE_SINT32, 

1131 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1132 value = _ConsumeInt32(tokenizer) 

1133 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1134 descriptor.FieldDescriptor.TYPE_SINT64, 

1135 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1136 value = _ConsumeInt64(tokenizer) 

1137 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1138 descriptor.FieldDescriptor.TYPE_FIXED32): 

1139 value = _ConsumeUint32(tokenizer) 

1140 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1141 descriptor.FieldDescriptor.TYPE_FIXED64): 

1142 value = _ConsumeUint64(tokenizer) 

1143 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1144 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1145 value = tokenizer.ConsumeFloat() 

1146 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1147 value = tokenizer.ConsumeBool() 

1148 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1149 value = tokenizer.ConsumeString() 

1150 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1151 value = tokenizer.ConsumeByteString() 

1152 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1153 value = tokenizer.ConsumeEnum(field) 

1154 else: 

1155 raise RuntimeError('Unknown field type %d' % field.type) 

1156 

1157 if field.is_repeated: 

1158 if field.is_extension: 

1159 message.Extensions[field].append(value) 

1160 else: 

1161 getattr(message, field.name).append(value) 

1162 else: 

1163 if field.is_extension: 

1164 if (not self._allow_multiple_scalars and 

1165 field.has_presence and 

1166 message.HasExtension(field)): 

1167 raise tokenizer.ParseErrorPreviousToken( 

1168 'Message type "%s" should not have multiple "%s" extensions.' % 

1169 (message.DESCRIPTOR.full_name, field.full_name)) 

1170 else: 

1171 message.Extensions[field] = value 

1172 else: 

1173 duplicate_error = False 

1174 if not self._allow_multiple_scalars: 

1175 if field.has_presence: 

1176 duplicate_error = message.HasField(field.name) 

1177 else: 

1178 # For field that doesn't represent presence, try best effort to 

1179 # check multiple scalars by compare to default values. 

1180 duplicate_error = not decoder.IsDefaultScalarValue( 

1181 getattr(message, field.name) 

1182 ) 

1183 

1184 if duplicate_error: 

1185 raise tokenizer.ParseErrorPreviousToken( 

1186 'Message type "%s" should not have multiple "%s" fields.' % 

1187 (message.DESCRIPTOR.full_name, field.name)) 

1188 else: 

1189 setattr(message, field.name, value) 

1190 

1191 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1192 """Skips over contents (value or message) of a field. 

1193 

1194 Args: 

1195 tokenizer: A tokenizer to parse the field name and values. 

1196 field_name: The field name currently being parsed. 

1197 immediate_message_type: The type of the message immediately containing 

1198 the silent marker. 

1199 """ 

1200 # Try to guess the type of this field. 

1201 # If this field is not a message, there should be a ":" between the 

1202 # field name and the field value and also the field value should not 

1203 # start with "{" or "<" which indicates the beginning of a message body. 

1204 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1205 # to be a message or the input is ill-formed. 

1206 if tokenizer.TryConsume( 

1207 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1208 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1209 if tokenizer.LookingAt('['): 

1210 self._SkipRepeatedFieldValue(tokenizer, immediate_message_type) 

1211 else: 

1212 self._SkipFieldValue(tokenizer) 

1213 else: 

1214 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1215 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1216 

1217 def _SkipField(self, tokenizer, immediate_message_type): 

1218 """Skips over a complete field (name and value/message). 

1219 

1220 Args: 

1221 tokenizer: A tokenizer to parse the field name and values. 

1222 immediate_message_type: The type of the message immediately containing 

1223 the silent marker. 

1224 """ 

1225 field_name = '' 

1226 if tokenizer.TryConsume('['): 

1227 # Consume extension or google.protobuf.Any type URL 

1228 field_name += '[' + tokenizer.ConsumeIdentifier() 

1229 num_identifiers = 1 

1230 while tokenizer.TryConsume('.'): 

1231 field_name += '.' + tokenizer.ConsumeIdentifier() 

1232 num_identifiers += 1 

1233 # This is possibly a type URL for an Any message. 

1234 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1235 field_name += '/' + tokenizer.ConsumeIdentifier() 

1236 while tokenizer.TryConsume('.'): 

1237 field_name += '.' + tokenizer.ConsumeIdentifier() 

1238 tokenizer.Consume(']') 

1239 field_name += ']' 

1240 else: 

1241 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1242 

1243 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1244 

1245 # For historical reasons, fields may optionally be separated by commas or 

1246 # semicolons. 

1247 if not tokenizer.TryConsume(','): 

1248 tokenizer.TryConsume(';') 

1249 

1250 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1251 """Skips over a field message. 

1252 

1253 Args: 

1254 tokenizer: A tokenizer to parse the field name and values. 

1255 immediate_message_type: The type of the message immediately containing 

1256 the silent marker 

1257 """ 

1258 if tokenizer.TryConsume('<'): 

1259 delimiter = '>' 

1260 else: 

1261 tokenizer.Consume('{') 

1262 delimiter = '}' 

1263 

1264 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1265 self._SkipField(tokenizer, immediate_message_type) 

1266 

1267 tokenizer.Consume(delimiter) 

1268 

1269 def _SkipFieldValue(self, tokenizer): 

1270 """Skips over a field value. 

1271 

1272 Args: 

1273 tokenizer: A tokenizer to parse the field name and values. 

1274 

1275 Raises: 

1276 ParseError: In case an invalid field value is found. 

1277 """ 

1278 if (not tokenizer.TryConsumeByteString()and 

1279 not tokenizer.TryConsumeIdentifier() and 

1280 not _TryConsumeInt64(tokenizer) and 

1281 not _TryConsumeUint64(tokenizer) and 

1282 not tokenizer.TryConsumeFloat()): 

1283 raise ParseError('Invalid field value: ' + tokenizer.token) 

1284 

1285 def _SkipRepeatedFieldValue(self, tokenizer, immediate_message_type): 

1286 """Skips over a repeated field value. 

1287 

1288 Args: 

1289 tokenizer: A tokenizer to parse the field value. 

1290 """ 

1291 tokenizer.Consume('[') 

1292 if not tokenizer.TryConsume(']'): 

1293 while True: 

1294 if tokenizer.LookingAt('<') or tokenizer.LookingAt('{'): 

1295 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1296 else: 

1297 self._SkipFieldValue(tokenizer) 

1298 if tokenizer.TryConsume(']'): 

1299 break 

1300 tokenizer.Consume(',') 

1301 

1302 

1303class Tokenizer(object): 

1304 """Protocol buffer text representation tokenizer. 

1305 

1306 This class handles the lower level string parsing by splitting it into 

1307 meaningful tokens. 

1308 

1309 It was directly ported from the Java protocol buffer API. 

1310 """ 

1311 

1312 _WHITESPACE = re.compile(r'\s+') 

1313 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1314 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1315 _TOKEN = re.compile('|'.join([ 

1316 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1317 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1318 ] + [ # quoted str for each quote mark 

1319 # Avoid backtracking! https://stackoverflow.com/a/844267 

1320 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 

1321 for mark in _QUOTES 

1322 ])) 

1323 

1324 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1325 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1326 

1327 def __init__(self, lines, skip_comments=True): 

1328 self._position = 0 

1329 self._line = -1 

1330 self._column = 0 

1331 self._token_start = None 

1332 self.token = '' 

1333 self._lines = iter(lines) 

1334 self._current_line = '' 

1335 self._previous_line = 0 

1336 self._previous_column = 0 

1337 self._more_lines = True 

1338 self._skip_comments = skip_comments 

1339 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1340 or self._WHITESPACE) 

1341 self.contains_silent_marker_before_current_token = False 

1342 

1343 self._SkipWhitespace() 

1344 self.NextToken() 

1345 

1346 def LookingAt(self, token): 

1347 return self.token == token 

1348 

1349 def AtEnd(self): 

1350 """Checks the end of the text was reached. 

1351 

1352 Returns: 

1353 True iff the end was reached. 

1354 """ 

1355 return not self.token 

1356 

1357 def _PopLine(self): 

1358 while len(self._current_line) <= self._column: 

1359 try: 

1360 self._current_line = next(self._lines) 

1361 except StopIteration: 

1362 self._current_line = '' 

1363 self._more_lines = False 

1364 return 

1365 else: 

1366 self._line += 1 

1367 self._column = 0 

1368 

1369 def _SkipWhitespace(self): 

1370 while True: 

1371 self._PopLine() 

1372 match = self._whitespace_pattern.match(self._current_line, self._column) 

1373 if not match: 

1374 break 

1375 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1376 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1377 length = len(match.group(0)) 

1378 self._column += length 

1379 

1380 def TryConsume(self, token): 

1381 """Tries to consume a given piece of text. 

1382 

1383 Args: 

1384 token: Text to consume. 

1385 

1386 Returns: 

1387 True iff the text was consumed. 

1388 """ 

1389 if self.token == token: 

1390 self.NextToken() 

1391 return True 

1392 return False 

1393 

1394 def Consume(self, token): 

1395 """Consumes a piece of text. 

1396 

1397 Args: 

1398 token: Text to consume. 

1399 

1400 Raises: 

1401 ParseError: If the text couldn't be consumed. 

1402 """ 

1403 if not self.TryConsume(token): 

1404 raise self.ParseError('Expected "%s".' % token) 

1405 

1406 def ConsumeComment(self): 

1407 result = self.token 

1408 if not self._COMMENT.match(result): 

1409 raise self.ParseError('Expected comment.') 

1410 self.NextToken() 

1411 return result 

1412 

1413 def ConsumeCommentOrTrailingComment(self): 

1414 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1415 

1416 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1417 # tokenizer starts, it looks like there is a previous token on the line. 

1418 just_started = self._line == 0 and self._column == 0 

1419 

1420 before_parsing = self._previous_line 

1421 comment = self.ConsumeComment() 

1422 

1423 # A trailing comment is a comment on the same line than the previous token. 

1424 trailing = (self._previous_line == before_parsing 

1425 and not just_started) 

1426 

1427 return trailing, comment 

1428 

1429 def TryConsumeIdentifier(self): 

1430 try: 

1431 self.ConsumeIdentifier() 

1432 return True 

1433 except ParseError: 

1434 return False 

1435 

1436 def ConsumeIdentifier(self): 

1437 """Consumes protocol message field identifier. 

1438 

1439 Returns: 

1440 Identifier string. 

1441 

1442 Raises: 

1443 ParseError: If an identifier couldn't be consumed. 

1444 """ 

1445 result = self.token 

1446 if not self._IDENTIFIER.match(result): 

1447 raise self.ParseError('Expected identifier.') 

1448 self.NextToken() 

1449 return result 

1450 

1451 def TryConsumeIdentifierOrNumber(self): 

1452 try: 

1453 self.ConsumeIdentifierOrNumber() 

1454 return True 

1455 except ParseError: 

1456 return False 

1457 

1458 def ConsumeIdentifierOrNumber(self): 

1459 """Consumes protocol message field identifier. 

1460 

1461 Returns: 

1462 Identifier string. 

1463 

1464 Raises: 

1465 ParseError: If an identifier couldn't be consumed. 

1466 """ 

1467 result = self.token 

1468 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1469 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1470 self.NextToken() 

1471 return result 

1472 

1473 def TryConsumeInteger(self): 

1474 try: 

1475 self.ConsumeInteger() 

1476 return True 

1477 except ParseError: 

1478 return False 

1479 

1480 def ConsumeInteger(self): 

1481 """Consumes an integer number. 

1482 

1483 Returns: 

1484 The integer parsed. 

1485 

1486 Raises: 

1487 ParseError: If an integer couldn't be consumed. 

1488 """ 

1489 try: 

1490 result = _ParseAbstractInteger(self.token) 

1491 except ValueError as e: 

1492 raise self.ParseError(str(e)) 

1493 self.NextToken() 

1494 return result 

1495 

1496 def TryConsumeFloat(self): 

1497 try: 

1498 self.ConsumeFloat() 

1499 return True 

1500 except ParseError: 

1501 return False 

1502 

1503 def ConsumeFloat(self): 

1504 """Consumes an floating point number. 

1505 

1506 Returns: 

1507 The number parsed. 

1508 

1509 Raises: 

1510 ParseError: If a floating point number couldn't be consumed. 

1511 """ 

1512 try: 

1513 result = ParseFloat(self.token) 

1514 except ValueError as e: 

1515 raise self.ParseError(str(e)) 

1516 self.NextToken() 

1517 return result 

1518 

1519 def ConsumeBool(self): 

1520 """Consumes a boolean value. 

1521 

1522 Returns: 

1523 The bool parsed. 

1524 

1525 Raises: 

1526 ParseError: If a boolean value couldn't be consumed. 

1527 """ 

1528 try: 

1529 result = ParseBool(self.token) 

1530 except ValueError as e: 

1531 raise self.ParseError(str(e)) 

1532 self.NextToken() 

1533 return result 

1534 

1535 def TryConsumeByteString(self): 

1536 try: 

1537 self.ConsumeByteString() 

1538 return True 

1539 except ParseError: 

1540 return False 

1541 

1542 def ConsumeString(self): 

1543 """Consumes a string value. 

1544 

1545 Returns: 

1546 The string parsed. 

1547 

1548 Raises: 

1549 ParseError: If a string value couldn't be consumed. 

1550 """ 

1551 the_bytes = self.ConsumeByteString() 

1552 try: 

1553 return str(the_bytes, 'utf-8') 

1554 except UnicodeDecodeError as e: 

1555 raise self._StringParseError(e) 

1556 

1557 def ConsumeByteString(self): 

1558 """Consumes a byte array value. 

1559 

1560 Returns: 

1561 The array parsed (as a string). 

1562 

1563 Raises: 

1564 ParseError: If a byte array value couldn't be consumed. 

1565 """ 

1566 the_list = [self._ConsumeSingleByteString()] 

1567 while self.token and self.token[0] in _QUOTES: 

1568 the_list.append(self._ConsumeSingleByteString()) 

1569 return b''.join(the_list) 

1570 

1571 def _ConsumeSingleByteString(self): 

1572 """Consume one token of a string literal. 

1573 

1574 String literals (whether bytes or text) can come in multiple adjacent 

1575 tokens which are automatically concatenated, like in C or Python. This 

1576 method only consumes one token. 

1577 

1578 Returns: 

1579 The token parsed. 

1580 Raises: 

1581 ParseError: When the wrong format data is found. 

1582 """ 

1583 text = self.token 

1584 if len(text) < 1 or text[0] not in _QUOTES: 

1585 raise self.ParseError('Expected string but found: %r' % (text,)) 

1586 

1587 if len(text) < 2 or text[-1] != text[0]: 

1588 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1589 

1590 try: 

1591 result = text_encoding.CUnescape(text[1:-1]) 

1592 except ValueError as e: 

1593 raise self.ParseError(str(e)) 

1594 self.NextToken() 

1595 return result 

1596 

1597 def ConsumeEnum(self, field): 

1598 try: 

1599 result = ParseEnum(field, self.token) 

1600 except ValueError as e: 

1601 raise self.ParseError(str(e)) 

1602 self.NextToken() 

1603 return result 

1604 

1605 def ParseErrorPreviousToken(self, message): 

1606 """Creates and *returns* a ParseError for the previously read token. 

1607 

1608 Args: 

1609 message: A message to set for the exception. 

1610 

1611 Returns: 

1612 A ParseError instance. 

1613 """ 

1614 return ParseError(message, self._previous_line + 1, 

1615 self._previous_column + 1) 

1616 

1617 def ParseError(self, message): 

1618 """Creates and *returns* a ParseError for the current token.""" 

1619 return ParseError('\'' + self._current_line + '\': ' + message, 

1620 self._line + 1, self._column + 1) 

1621 

1622 def _StringParseError(self, e): 

1623 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1624 

1625 def NextToken(self): 

1626 """Reads the next meaningful token.""" 

1627 self._previous_line = self._line 

1628 self._previous_column = self._column 

1629 self.contains_silent_marker_before_current_token = False 

1630 

1631 self._column += len(self.token) 

1632 self._SkipWhitespace() 

1633 

1634 if not self._more_lines: 

1635 self.token = '' 

1636 return 

1637 

1638 match = self._TOKEN.match(self._current_line, self._column) 

1639 if not match and not self._skip_comments: 

1640 match = self._COMMENT.match(self._current_line, self._column) 

1641 if match: 

1642 token = match.group(0) 

1643 self.token = token 

1644 else: 

1645 self.token = self._current_line[self._column] 

1646 

1647# Aliased so it can still be accessed by current visibility violators. 

1648# TODO: Migrate violators to textformat_tokenizer. 

1649_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1650 

1651 

1652def _ConsumeInt32(tokenizer): 

1653 """Consumes a signed 32bit integer number from tokenizer. 

1654 

1655 Args: 

1656 tokenizer: A tokenizer used to parse the number. 

1657 

1658 Returns: 

1659 The integer parsed. 

1660 

1661 Raises: 

1662 ParseError: If a signed 32bit integer couldn't be consumed. 

1663 """ 

1664 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1665 

1666 

1667def _ConsumeUint32(tokenizer): 

1668 """Consumes an unsigned 32bit integer number from tokenizer. 

1669 

1670 Args: 

1671 tokenizer: A tokenizer used to parse the number. 

1672 

1673 Returns: 

1674 The integer parsed. 

1675 

1676 Raises: 

1677 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1678 """ 

1679 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1680 

1681 

1682def _TryConsumeInt64(tokenizer): 

1683 try: 

1684 _ConsumeInt64(tokenizer) 

1685 return True 

1686 except ParseError: 

1687 return False 

1688 

1689 

1690def _ConsumeInt64(tokenizer): 

1691 """Consumes a signed 32bit integer number from tokenizer. 

1692 

1693 Args: 

1694 tokenizer: A tokenizer used to parse the number. 

1695 

1696 Returns: 

1697 The integer parsed. 

1698 

1699 Raises: 

1700 ParseError: If a signed 32bit integer couldn't be consumed. 

1701 """ 

1702 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1703 

1704 

1705def _TryConsumeUint64(tokenizer): 

1706 try: 

1707 _ConsumeUint64(tokenizer) 

1708 return True 

1709 except ParseError: 

1710 return False 

1711 

1712 

1713def _ConsumeUint64(tokenizer): 

1714 """Consumes an unsigned 64bit integer number from tokenizer. 

1715 

1716 Args: 

1717 tokenizer: A tokenizer used to parse the number. 

1718 

1719 Returns: 

1720 The integer parsed. 

1721 

1722 Raises: 

1723 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1724 """ 

1725 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1726 

1727 

1728def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1729 """Consumes an integer number from tokenizer. 

1730 

1731 Args: 

1732 tokenizer: A tokenizer used to parse the number. 

1733 is_signed: True if a signed integer must be parsed. 

1734 is_long: True if a long integer must be parsed. 

1735 

1736 Returns: 

1737 The integer parsed. 

1738 

1739 Raises: 

1740 ParseError: If an integer with given characteristics couldn't be consumed. 

1741 """ 

1742 try: 

1743 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1744 except ValueError as e: 

1745 raise tokenizer.ParseError(str(e)) 

1746 tokenizer.NextToken() 

1747 return result 

1748 

1749 

1750def ParseInteger(text, is_signed=False, is_long=False): 

1751 """Parses an integer. 

1752 

1753 Args: 

1754 text: The text to parse. 

1755 is_signed: True if a signed integer must be parsed. 

1756 is_long: True if a long integer must be parsed. 

1757 

1758 Returns: 

1759 The integer value. 

1760 

1761 Raises: 

1762 ValueError: Thrown Iff the text is not a valid integer. 

1763 """ 

1764 # Do the actual parsing. Exception handling is propagated to caller. 

1765 result = _ParseAbstractInteger(text) 

1766 

1767 # Check if the integer is sane. Exceptions handled by callers. 

1768 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1769 checker.CheckValue(result) 

1770 return result 

1771 

1772 

1773def _ParseAbstractInteger(text): 

1774 """Parses an integer without checking size/signedness. 

1775 

1776 Args: 

1777 text: The text to parse. 

1778 

1779 Returns: 

1780 The integer value. 

1781 

1782 Raises: 

1783 ValueError: Thrown Iff the text is not a valid integer. 

1784 """ 

1785 # Do the actual parsing. Exception handling is propagated to caller. 

1786 orig_text = text 

1787 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1788 if c_octal_match: 

1789 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1790 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1791 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1792 try: 

1793 return int(text, 0) 

1794 except ValueError: 

1795 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1796 

1797 

1798def ParseFloat(text): 

1799 """Parse a floating point number. 

1800 

1801 Args: 

1802 text: Text to parse. 

1803 

1804 Returns: 

1805 The number parsed. 

1806 

1807 Raises: 

1808 ValueError: If a floating point number couldn't be parsed. 

1809 """ 

1810 if _FLOAT_OCTAL_PREFIX.match(text): 

1811 raise ValueError('Invalid octal float: %s' % text) 

1812 try: 

1813 # Assume Python compatible syntax. 

1814 return float(text) 

1815 except ValueError: 

1816 # Check alternative spellings. 

1817 if _FLOAT_INFINITY.match(text): 

1818 if text[0] == '-': 

1819 return float('-inf') 

1820 else: 

1821 return float('inf') 

1822 elif _FLOAT_NAN.match(text): 

1823 return float('nan') 

1824 else: 

1825 # assume '1.0f' format 

1826 try: 

1827 return float(text.rstrip('fF')) 

1828 except ValueError: 

1829 raise ValueError("Couldn't parse float: %s" % text) 

1830 

1831 

1832def ParseBool(text): 

1833 """Parse a boolean value. 

1834 

1835 Args: 

1836 text: Text to parse. 

1837 

1838 Returns: 

1839 Boolean values parsed 

1840 

1841 Raises: 

1842 ValueError: If text is not a valid boolean. 

1843 """ 

1844 if text in ('true', 't', '1', 'True'): 

1845 return True 

1846 elif text in ('false', 'f', '0', 'False'): 

1847 return False 

1848 else: 

1849 raise ValueError('Expected "true" or "false".') 

1850 

1851 

1852def ParseEnum(field, value): 

1853 """Parse an enum value. 

1854 

1855 The value can be specified by a number (the enum value), or by 

1856 a string literal (the enum name). 

1857 

1858 Args: 

1859 field: Enum field descriptor. 

1860 value: String value. 

1861 

1862 Returns: 

1863 Enum value number. 

1864 

1865 Raises: 

1866 ValueError: If the enum value could not be parsed. 

1867 """ 

1868 enum_descriptor = field.enum_type 

1869 try: 

1870 number = int(value, 0) 

1871 except ValueError: 

1872 # Identifier. 

1873 enum_value = enum_descriptor.values_by_name.get(value, None) 

1874 if enum_value is None: 

1875 raise ValueError('Enum type "%s" has no value named %s.' % 

1876 (enum_descriptor.full_name, value)) 

1877 else: 

1878 if not field.enum_type.is_closed: 

1879 return number 

1880 enum_value = enum_descriptor.values_by_number.get(number, None) 

1881 if enum_value is None: 

1882 raise ValueError('Enum type "%s" has no value with number %d.' % 

1883 (enum_descriptor.full_name, number)) 

1884 return enum_value.number