Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/protobuf/text_format.py: 15%

753 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:10 +0000

1# Protocol Buffers - Google's data interchange format 

2# Copyright 2008 Google Inc. All rights reserved. 

3# https://developers.google.com/protocol-buffers/ 

4# 

5# Redistribution and use in source and binary forms, with or without 

6# modification, are permitted provided that the following conditions are 

7# met: 

8# 

9# * Redistributions of source code must retain the above copyright 

10# notice, this list of conditions and the following disclaimer. 

11# * Redistributions in binary form must reproduce the above 

12# copyright notice, this list of conditions and the following disclaimer 

13# in the documentation and/or other materials provided with the 

14# distribution. 

15# * Neither the name of Google Inc. nor the names of its 

16# contributors may be used to endorse or promote products derived from 

17# this software without specific prior written permission. 

18# 

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

30 

31"""Contains routines for printing protocol messages in text format. 

32 

33Simple usage example:: 

34 

35 # Create a proto object and serialize it to a text proto string. 

36 message = my_proto_pb2.MyMessage(foo='bar') 

37 text_proto = text_format.MessageToString(message) 

38 

39 # Parse a text proto string. 

40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage()) 

41""" 

42 

43__author__ = 'kenton@google.com (Kenton Varda)' 

44 

45# TODO(b/129989314) Import thread contention leads to test failures. 

46import encodings.raw_unicode_escape # pylint: disable=unused-import 

47import encodings.unicode_escape # pylint: disable=unused-import 

48import io 

49import math 

50import re 

51 

52from google.protobuf.internal import decoder 

53from google.protobuf.internal import type_checkers 

54from google.protobuf import descriptor 

55from google.protobuf import text_encoding 

56from google.protobuf import unknown_fields 

57 

58# pylint: disable=g-import-not-at-top 

59__all__ = ['MessageToString', 'Parse', 'PrintMessage', 'PrintField', 

60 'PrintFieldValue', 'Merge', 'MessageToBytes'] 

61 

62_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), 

63 type_checkers.Int32ValueChecker(), 

64 type_checkers.Uint64ValueChecker(), 

65 type_checkers.Int64ValueChecker()) 

66_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?$', re.IGNORECASE) 

67_FLOAT_NAN = re.compile('nanf?$', re.IGNORECASE) 

68_QUOTES = frozenset(("'", '"')) 

69_ANY_FULL_TYPE_NAME = 'google.protobuf.Any' 

70_DEBUG_STRING_SILENT_MARKER = '\t ' 

71 

72 

73class Error(Exception): 

74 """Top-level module error for text_format.""" 

75 

76 

77class ParseError(Error): 

78 """Thrown in case of text parsing or tokenizing error.""" 

79 

80 def __init__(self, message=None, line=None, column=None): 

81 if message is not None and line is not None: 

82 loc = str(line) 

83 if column is not None: 

84 loc += ':{0}'.format(column) 

85 message = '{0} : {1}'.format(loc, message) 

86 if message is not None: 

87 super(ParseError, self).__init__(message) 

88 else: 

89 super(ParseError, self).__init__() 

90 self._line = line 

91 self._column = column 

92 

93 def GetLine(self): 

94 return self._line 

95 

96 def GetColumn(self): 

97 return self._column 

98 

99 

100class TextWriter(object): 

101 

102 def __init__(self, as_utf8): 

103 self._writer = io.StringIO() 

104 

105 def write(self, val): 

106 return self._writer.write(val) 

107 

108 def close(self): 

109 return self._writer.close() 

110 

111 def getvalue(self): 

112 return self._writer.getvalue() 

113 

114 

115def MessageToString( 

116 message, 

117 as_utf8=False, 

118 as_one_line=False, 

119 use_short_repeated_primitives=False, 

120 pointy_brackets=False, 

121 use_index_order=False, 

122 float_format=None, 

123 double_format=None, 

124 use_field_number=False, 

125 descriptor_pool=None, 

126 indent=0, 

127 message_formatter=None, 

128 print_unknown_fields=False, 

129 force_colon=False) -> str: 

130 """Convert protobuf message to text format. 

131 

132 Double values can be formatted compactly with 15 digits of 

133 precision (which is the most that IEEE 754 "double" can guarantee) 

134 using double_format='.15g'. To ensure that converting to text and back to a 

135 proto will result in an identical value, double_format='.17g' should be used. 

136 

137 Args: 

138 message: The protocol buffers message. 

139 as_utf8: Return unescaped Unicode for non-ASCII characters. 

140 as_one_line: Don't introduce newlines between fields. 

141 use_short_repeated_primitives: Use short repeated format for primitives. 

142 pointy_brackets: If True, use angle brackets instead of curly braces for 

143 nesting. 

144 use_index_order: If True, fields of a proto message will be printed using 

145 the order defined in source code instead of the field number, extensions 

146 will be printed at the end of the message and their relative order is 

147 determined by the extension number. By default, use the field number 

148 order. 

149 float_format (str): If set, use this to specify float field formatting 

150 (per the "Format Specification Mini-Language"); otherwise, shortest float 

151 that has same value in wire will be printed. Also affect double field 

152 if double_format is not set but float_format is set. 

153 double_format (str): If set, use this to specify double field formatting 

154 (per the "Format Specification Mini-Language"); if it is not set but 

155 float_format is set, use float_format. Otherwise, use ``str()`` 

156 use_field_number: If True, print field numbers instead of names. 

157 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

158 indent (int): The initial indent level, in terms of spaces, for pretty 

159 print. 

160 message_formatter (function(message, indent, as_one_line) -> unicode|None): 

161 Custom formatter for selected sub-messages (usually based on message 

162 type). Use to pretty print parts of the protobuf for easier diffing. 

163 print_unknown_fields: If True, unknown fields will be printed. 

164 force_colon: If set, a colon will be added after the field name even if the 

165 field is a proto message. 

166 

167 Returns: 

168 str: A string of the text formatted protocol buffer message. 

169 """ 

170 out = TextWriter(as_utf8) 

171 printer = _Printer( 

172 out, 

173 indent, 

174 as_utf8, 

175 as_one_line, 

176 use_short_repeated_primitives, 

177 pointy_brackets, 

178 use_index_order, 

179 float_format, 

180 double_format, 

181 use_field_number, 

182 descriptor_pool, 

183 message_formatter, 

184 print_unknown_fields=print_unknown_fields, 

185 force_colon=force_colon) 

186 printer.PrintMessage(message) 

187 result = out.getvalue() 

188 out.close() 

189 if as_one_line: 

190 return result.rstrip() 

191 return result 

192 

193 

194def MessageToBytes(message, **kwargs) -> bytes: 

195 """Convert protobuf message to encoded text format. See MessageToString.""" 

196 text = MessageToString(message, **kwargs) 

197 if isinstance(text, bytes): 

198 return text 

199 codec = 'utf-8' if kwargs.get('as_utf8') else 'ascii' 

200 return text.encode(codec) 

201 

202 

203def _IsMapEntry(field): 

204 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

205 field.message_type.has_options and 

206 field.message_type.GetOptions().map_entry) 

207 

208 

209def PrintMessage(message, 

210 out, 

211 indent=0, 

212 as_utf8=False, 

213 as_one_line=False, 

214 use_short_repeated_primitives=False, 

215 pointy_brackets=False, 

216 use_index_order=False, 

217 float_format=None, 

218 double_format=None, 

219 use_field_number=False, 

220 descriptor_pool=None, 

221 message_formatter=None, 

222 print_unknown_fields=False, 

223 force_colon=False): 

224 """Convert the message to text format and write it to the out stream. 

225 

226 Args: 

227 message: The Message object to convert to text format. 

228 out: A file handle to write the message to. 

229 indent: The initial indent level for pretty print. 

230 as_utf8: Return unescaped Unicode for non-ASCII characters. 

231 as_one_line: Don't introduce newlines between fields. 

232 use_short_repeated_primitives: Use short repeated format for primitives. 

233 pointy_brackets: If True, use angle brackets instead of curly braces for 

234 nesting. 

235 use_index_order: If True, print fields of a proto message using the order 

236 defined in source code instead of the field number. By default, use the 

237 field number order. 

238 float_format: If set, use this to specify float field formatting 

239 (per the "Format Specification Mini-Language"); otherwise, shortest 

240 float that has same value in wire will be printed. Also affect double 

241 field if double_format is not set but float_format is set. 

242 double_format: If set, use this to specify double field formatting 

243 (per the "Format Specification Mini-Language"); if it is not set but 

244 float_format is set, use float_format. Otherwise, str() is used. 

245 use_field_number: If True, print field numbers instead of names. 

246 descriptor_pool: A DescriptorPool used to resolve Any types. 

247 message_formatter: A function(message, indent, as_one_line): unicode|None 

248 to custom format selected sub-messages (usually based on message type). 

249 Use to pretty print parts of the protobuf for easier diffing. 

250 print_unknown_fields: If True, unknown fields will be printed. 

251 force_colon: If set, a colon will be added after the field name even if 

252 the field is a proto message. 

253 """ 

254 printer = _Printer( 

255 out=out, indent=indent, as_utf8=as_utf8, 

256 as_one_line=as_one_line, 

257 use_short_repeated_primitives=use_short_repeated_primitives, 

258 pointy_brackets=pointy_brackets, 

259 use_index_order=use_index_order, 

260 float_format=float_format, 

261 double_format=double_format, 

262 use_field_number=use_field_number, 

263 descriptor_pool=descriptor_pool, 

264 message_formatter=message_formatter, 

265 print_unknown_fields=print_unknown_fields, 

266 force_colon=force_colon) 

267 printer.PrintMessage(message) 

268 

269 

270def PrintField(field, 

271 value, 

272 out, 

273 indent=0, 

274 as_utf8=False, 

275 as_one_line=False, 

276 use_short_repeated_primitives=False, 

277 pointy_brackets=False, 

278 use_index_order=False, 

279 float_format=None, 

280 double_format=None, 

281 message_formatter=None, 

282 print_unknown_fields=False, 

283 force_colon=False): 

284 """Print a single field name/value pair.""" 

285 printer = _Printer(out, indent, as_utf8, as_one_line, 

286 use_short_repeated_primitives, pointy_brackets, 

287 use_index_order, float_format, double_format, 

288 message_formatter=message_formatter, 

289 print_unknown_fields=print_unknown_fields, 

290 force_colon=force_colon) 

291 printer.PrintField(field, value) 

292 

293 

294def PrintFieldValue(field, 

295 value, 

296 out, 

297 indent=0, 

298 as_utf8=False, 

299 as_one_line=False, 

300 use_short_repeated_primitives=False, 

301 pointy_brackets=False, 

302 use_index_order=False, 

303 float_format=None, 

304 double_format=None, 

305 message_formatter=None, 

306 print_unknown_fields=False, 

307 force_colon=False): 

308 """Print a single field value (not including name).""" 

309 printer = _Printer(out, indent, as_utf8, as_one_line, 

310 use_short_repeated_primitives, pointy_brackets, 

311 use_index_order, float_format, double_format, 

312 message_formatter=message_formatter, 

313 print_unknown_fields=print_unknown_fields, 

314 force_colon=force_colon) 

315 printer.PrintFieldValue(field, value) 

316 

317 

318def _BuildMessageFromTypeName(type_name, descriptor_pool): 

319 """Returns a protobuf message instance. 

320 

321 Args: 

322 type_name: Fully-qualified protobuf message type name string. 

323 descriptor_pool: DescriptorPool instance. 

324 

325 Returns: 

326 A Message instance of type matching type_name, or None if the a Descriptor 

327 wasn't found matching type_name. 

328 """ 

329 # pylint: disable=g-import-not-at-top 

330 if descriptor_pool is None: 

331 from google.protobuf import descriptor_pool as pool_mod 

332 descriptor_pool = pool_mod.Default() 

333 from google.protobuf import message_factory 

334 try: 

335 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name) 

336 except KeyError: 

337 return None 

338 message_type = message_factory.GetMessageClass(message_descriptor) 

339 return message_type() 

340 

341 

342# These values must match WireType enum in //google/protobuf/wire_format.h. 

343WIRETYPE_LENGTH_DELIMITED = 2 

344WIRETYPE_START_GROUP = 3 

345 

346 

347class _Printer(object): 

348 """Text format printer for protocol message.""" 

349 

350 def __init__( 

351 self, 

352 out, 

353 indent=0, 

354 as_utf8=False, 

355 as_one_line=False, 

356 use_short_repeated_primitives=False, 

357 pointy_brackets=False, 

358 use_index_order=False, 

359 float_format=None, 

360 double_format=None, 

361 use_field_number=False, 

362 descriptor_pool=None, 

363 message_formatter=None, 

364 print_unknown_fields=False, 

365 force_colon=False): 

366 """Initialize the Printer. 

367 

368 Double values can be formatted compactly with 15 digits of precision 

369 (which is the most that IEEE 754 "double" can guarantee) using 

370 double_format='.15g'. To ensure that converting to text and back to a proto 

371 will result in an identical value, double_format='.17g' should be used. 

372 

373 Args: 

374 out: To record the text format result. 

375 indent: The initial indent level for pretty print. 

376 as_utf8: Return unescaped Unicode for non-ASCII characters. 

377 as_one_line: Don't introduce newlines between fields. 

378 use_short_repeated_primitives: Use short repeated format for primitives. 

379 pointy_brackets: If True, use angle brackets instead of curly braces for 

380 nesting. 

381 use_index_order: If True, print fields of a proto message using the order 

382 defined in source code instead of the field number. By default, use the 

383 field number order. 

384 float_format: If set, use this to specify float field formatting 

385 (per the "Format Specification Mini-Language"); otherwise, shortest 

386 float that has same value in wire will be printed. Also affect double 

387 field if double_format is not set but float_format is set. 

388 double_format: If set, use this to specify double field formatting 

389 (per the "Format Specification Mini-Language"); if it is not set but 

390 float_format is set, use float_format. Otherwise, str() is used. 

391 use_field_number: If True, print field numbers instead of names. 

392 descriptor_pool: A DescriptorPool used to resolve Any types. 

393 message_formatter: A function(message, indent, as_one_line): unicode|None 

394 to custom format selected sub-messages (usually based on message type). 

395 Use to pretty print parts of the protobuf for easier diffing. 

396 print_unknown_fields: If True, unknown fields will be printed. 

397 force_colon: If set, a colon will be added after the field name even if 

398 the field is a proto message. 

399 """ 

400 self.out = out 

401 self.indent = indent 

402 self.as_utf8 = as_utf8 

403 self.as_one_line = as_one_line 

404 self.use_short_repeated_primitives = use_short_repeated_primitives 

405 self.pointy_brackets = pointy_brackets 

406 self.use_index_order = use_index_order 

407 self.float_format = float_format 

408 if double_format is not None: 

409 self.double_format = double_format 

410 else: 

411 self.double_format = float_format 

412 self.use_field_number = use_field_number 

413 self.descriptor_pool = descriptor_pool 

414 self.message_formatter = message_formatter 

415 self.print_unknown_fields = print_unknown_fields 

416 self.force_colon = force_colon 

417 

418 def _TryPrintAsAnyMessage(self, message): 

419 """Serializes if message is a google.protobuf.Any field.""" 

420 if '/' not in message.type_url: 

421 return False 

422 packed_message = _BuildMessageFromTypeName(message.TypeName(), 

423 self.descriptor_pool) 

424 if packed_message: 

425 packed_message.MergeFromString(message.value) 

426 colon = ':' if self.force_colon else '' 

427 self.out.write('%s[%s]%s ' % (self.indent * ' ', message.type_url, colon)) 

428 self._PrintMessageFieldValue(packed_message) 

429 self.out.write(' ' if self.as_one_line else '\n') 

430 return True 

431 else: 

432 return False 

433 

434 def _TryCustomFormatMessage(self, message): 

435 formatted = self.message_formatter(message, self.indent, self.as_one_line) 

436 if formatted is None: 

437 return False 

438 

439 out = self.out 

440 out.write(' ' * self.indent) 

441 out.write(formatted) 

442 out.write(' ' if self.as_one_line else '\n') 

443 return True 

444 

445 def PrintMessage(self, message): 

446 """Convert protobuf message to text format. 

447 

448 Args: 

449 message: The protocol buffers message. 

450 """ 

451 if self.message_formatter and self._TryCustomFormatMessage(message): 

452 return 

453 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME and 

454 self._TryPrintAsAnyMessage(message)): 

455 return 

456 fields = message.ListFields() 

457 if self.use_index_order: 

458 fields.sort( 

459 key=lambda x: x[0].number if x[0].is_extension else x[0].index) 

460 for field, value in fields: 

461 if _IsMapEntry(field): 

462 for key in sorted(value): 

463 # This is slow for maps with submessage entries because it copies the 

464 # entire tree. Unfortunately this would take significant refactoring 

465 # of this file to work around. 

466 # 

467 # TODO(haberman): refactor and optimize if this becomes an issue. 

468 entry_submsg = value.GetEntryClass()(key=key, value=value[key]) 

469 self.PrintField(field, entry_submsg) 

470 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

471 if (self.use_short_repeated_primitives 

472 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE 

473 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING): 

474 self._PrintShortRepeatedPrimitivesValue(field, value) 

475 else: 

476 for element in value: 

477 self.PrintField(field, element) 

478 else: 

479 self.PrintField(field, value) 

480 

481 if self.print_unknown_fields: 

482 self._PrintUnknownFields(unknown_fields.UnknownFieldSet(message)) 

483 

484 def _PrintUnknownFields(self, unknown_field_set): 

485 """Print unknown fields.""" 

486 out = self.out 

487 for field in unknown_field_set: 

488 out.write(' ' * self.indent) 

489 out.write(str(field.field_number)) 

490 if field.wire_type == WIRETYPE_START_GROUP: 

491 if self.as_one_line: 

492 out.write(' { ') 

493 else: 

494 out.write(' {\n') 

495 self.indent += 2 

496 

497 self._PrintUnknownFields(field.data) 

498 

499 if self.as_one_line: 

500 out.write('} ') 

501 else: 

502 self.indent -= 2 

503 out.write(' ' * self.indent + '}\n') 

504 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED: 

505 try: 

506 # If this field is parseable as a Message, it is probably 

507 # an embedded message. 

508 # pylint: disable=protected-access 

509 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet( 

510 memoryview(field.data), 0, len(field.data)) 

511 except Exception: # pylint: disable=broad-except 

512 pos = 0 

513 

514 if pos == len(field.data): 

515 if self.as_one_line: 

516 out.write(' { ') 

517 else: 

518 out.write(' {\n') 

519 self.indent += 2 

520 

521 self._PrintUnknownFields(embedded_unknown_message) 

522 

523 if self.as_one_line: 

524 out.write('} ') 

525 else: 

526 self.indent -= 2 

527 out.write(' ' * self.indent + '}\n') 

528 else: 

529 # A string or bytes field. self.as_utf8 may not work. 

530 out.write(': \"') 

531 out.write(text_encoding.CEscape(field.data, False)) 

532 out.write('\" ' if self.as_one_line else '\"\n') 

533 else: 

534 # varint, fixed32, fixed64 

535 out.write(': ') 

536 out.write(str(field.data)) 

537 out.write(' ' if self.as_one_line else '\n') 

538 

539 def _PrintFieldName(self, field): 

540 """Print field name.""" 

541 out = self.out 

542 out.write(' ' * self.indent) 

543 if self.use_field_number: 

544 out.write(str(field.number)) 

545 else: 

546 if field.is_extension: 

547 out.write('[') 

548 if (field.containing_type.GetOptions().message_set_wire_format and 

549 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and 

550 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL): 

551 out.write(field.message_type.full_name) 

552 else: 

553 out.write(field.full_name) 

554 out.write(']') 

555 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP: 

556 # For groups, use the capitalized name. 

557 out.write(field.message_type.name) 

558 else: 

559 out.write(field.name) 

560 

561 if (self.force_colon or 

562 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE): 

563 # The colon is optional in this case, but our cross-language golden files 

564 # don't include it. Here, the colon is only included if force_colon is 

565 # set to True 

566 out.write(':') 

567 

568 def PrintField(self, field, value): 

569 """Print a single field name/value pair.""" 

570 self._PrintFieldName(field) 

571 self.out.write(' ') 

572 self.PrintFieldValue(field, value) 

573 self.out.write(' ' if self.as_one_line else '\n') 

574 

575 def _PrintShortRepeatedPrimitivesValue(self, field, value): 

576 """"Prints short repeated primitives value.""" 

577 # Note: this is called only when value has at least one element. 

578 self._PrintFieldName(field) 

579 self.out.write(' [') 

580 for i in range(len(value) - 1): 

581 self.PrintFieldValue(field, value[i]) 

582 self.out.write(', ') 

583 self.PrintFieldValue(field, value[-1]) 

584 self.out.write(']') 

585 self.out.write(' ' if self.as_one_line else '\n') 

586 

587 def _PrintMessageFieldValue(self, value): 

588 if self.pointy_brackets: 

589 openb = '<' 

590 closeb = '>' 

591 else: 

592 openb = '{' 

593 closeb = '}' 

594 

595 if self.as_one_line: 

596 self.out.write('%s ' % openb) 

597 self.PrintMessage(value) 

598 self.out.write(closeb) 

599 else: 

600 self.out.write('%s\n' % openb) 

601 self.indent += 2 

602 self.PrintMessage(value) 

603 self.indent -= 2 

604 self.out.write(' ' * self.indent + closeb) 

605 

606 def PrintFieldValue(self, field, value): 

607 """Print a single field value (not including name). 

608 

609 For repeated fields, the value should be a single element. 

610 

611 Args: 

612 field: The descriptor of the field to be printed. 

613 value: The value of the field. 

614 """ 

615 out = self.out 

616 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

617 self._PrintMessageFieldValue(value) 

618 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM: 

619 enum_value = field.enum_type.values_by_number.get(value, None) 

620 if enum_value is not None: 

621 out.write(enum_value.name) 

622 else: 

623 out.write(str(value)) 

624 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING: 

625 out.write('\"') 

626 if isinstance(value, str) and not self.as_utf8: 

627 out_value = value.encode('utf-8') 

628 else: 

629 out_value = value 

630 if field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

631 # We always need to escape all binary data in TYPE_BYTES fields. 

632 out_as_utf8 = False 

633 else: 

634 out_as_utf8 = self.as_utf8 

635 out.write(text_encoding.CEscape(out_value, out_as_utf8)) 

636 out.write('\"') 

637 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL: 

638 if value: 

639 out.write('true') 

640 else: 

641 out.write('false') 

642 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT: 

643 if self.float_format is not None: 

644 out.write('{1:{0}}'.format(self.float_format, value)) 

645 else: 

646 if math.isnan(value): 

647 out.write(str(value)) 

648 else: 

649 out.write(str(type_checkers.ToShortestFloat(value))) 

650 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE and 

651 self.double_format is not None): 

652 out.write('{1:{0}}'.format(self.double_format, value)) 

653 else: 

654 out.write(str(value)) 

655 

656 

657def Parse(text, 

658 message, 

659 allow_unknown_extension=False, 

660 allow_field_number=False, 

661 descriptor_pool=None, 

662 allow_unknown_field=False): 

663 """Parses a text representation of a protocol message into a message. 

664 

665 NOTE: for historical reasons this function does not clear the input 

666 message. This is different from what the binary msg.ParseFrom(...) does. 

667 If text contains a field already set in message, the value is appended if the 

668 field is repeated. Otherwise, an error is raised. 

669 

670 Example:: 

671 

672 a = MyProto() 

673 a.repeated_field.append('test') 

674 b = MyProto() 

675 

676 # Repeated fields are combined 

677 text_format.Parse(repr(a), b) 

678 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"] 

679 

680 # Non-repeated fields cannot be overwritten 

681 a.singular_field = 1 

682 b.singular_field = 2 

683 text_format.Parse(repr(a), b) # ParseError 

684 

685 # Binary version: 

686 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test" 

687 

688 Caller is responsible for clearing the message as needed. 

689 

690 Args: 

691 text (str): Message text representation. 

692 message (Message): A protocol buffer message to merge into. 

693 allow_unknown_extension: if True, skip over missing extensions and keep 

694 parsing 

695 allow_field_number: if True, both field number and field name are allowed. 

696 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

697 allow_unknown_field: if True, skip over unknown field and keep 

698 parsing. Avoid to use this option if possible. It may hide some 

699 errors (e.g. spelling error on field name) 

700 

701 Returns: 

702 Message: The same message passed as argument. 

703 

704 Raises: 

705 ParseError: On text parsing problems. 

706 """ 

707 return ParseLines(text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

708 message, 

709 allow_unknown_extension, 

710 allow_field_number, 

711 descriptor_pool=descriptor_pool, 

712 allow_unknown_field=allow_unknown_field) 

713 

714 

715def Merge(text, 

716 message, 

717 allow_unknown_extension=False, 

718 allow_field_number=False, 

719 descriptor_pool=None, 

720 allow_unknown_field=False): 

721 """Parses a text representation of a protocol message into a message. 

722 

723 Like Parse(), but allows repeated values for a non-repeated field, and uses 

724 the last one. This means any non-repeated, top-level fields specified in text 

725 replace those in the message. 

726 

727 Args: 

728 text (str): Message text representation. 

729 message (Message): A protocol buffer message to merge into. 

730 allow_unknown_extension: if True, skip over missing extensions and keep 

731 parsing 

732 allow_field_number: if True, both field number and field name are allowed. 

733 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types. 

734 allow_unknown_field: if True, skip over unknown field and keep 

735 parsing. Avoid to use this option if possible. It may hide some 

736 errors (e.g. spelling error on field name) 

737 

738 Returns: 

739 Message: The same message passed as argument. 

740 

741 Raises: 

742 ParseError: On text parsing problems. 

743 """ 

744 return MergeLines( 

745 text.split(b'\n' if isinstance(text, bytes) else u'\n'), 

746 message, 

747 allow_unknown_extension, 

748 allow_field_number, 

749 descriptor_pool=descriptor_pool, 

750 allow_unknown_field=allow_unknown_field) 

751 

752 

753def ParseLines(lines, 

754 message, 

755 allow_unknown_extension=False, 

756 allow_field_number=False, 

757 descriptor_pool=None, 

758 allow_unknown_field=False): 

759 """Parses a text representation of a protocol message into a message. 

760 

761 See Parse() for caveats. 

762 

763 Args: 

764 lines: An iterable of lines of a message's text representation. 

765 message: A protocol buffer message to merge into. 

766 allow_unknown_extension: if True, skip over missing extensions and keep 

767 parsing 

768 allow_field_number: if True, both field number and field name are allowed. 

769 descriptor_pool: A DescriptorPool used to resolve Any types. 

770 allow_unknown_field: if True, skip over unknown field and keep 

771 parsing. Avoid to use this option if possible. It may hide some 

772 errors (e.g. spelling error on field name) 

773 

774 Returns: 

775 The same message passed as argument. 

776 

777 Raises: 

778 ParseError: On text parsing problems. 

779 """ 

780 parser = _Parser(allow_unknown_extension, 

781 allow_field_number, 

782 descriptor_pool=descriptor_pool, 

783 allow_unknown_field=allow_unknown_field) 

784 return parser.ParseLines(lines, message) 

785 

786 

787def MergeLines(lines, 

788 message, 

789 allow_unknown_extension=False, 

790 allow_field_number=False, 

791 descriptor_pool=None, 

792 allow_unknown_field=False): 

793 """Parses a text representation of a protocol message into a message. 

794 

795 See Merge() for more details. 

796 

797 Args: 

798 lines: An iterable of lines of a message's text representation. 

799 message: A protocol buffer message to merge into. 

800 allow_unknown_extension: if True, skip over missing extensions and keep 

801 parsing 

802 allow_field_number: if True, both field number and field name are allowed. 

803 descriptor_pool: A DescriptorPool used to resolve Any types. 

804 allow_unknown_field: if True, skip over unknown field and keep 

805 parsing. Avoid to use this option if possible. It may hide some 

806 errors (e.g. spelling error on field name) 

807 

808 Returns: 

809 The same message passed as argument. 

810 

811 Raises: 

812 ParseError: On text parsing problems. 

813 """ 

814 parser = _Parser(allow_unknown_extension, 

815 allow_field_number, 

816 descriptor_pool=descriptor_pool, 

817 allow_unknown_field=allow_unknown_field) 

818 return parser.MergeLines(lines, message) 

819 

820 

821class _Parser(object): 

822 """Text format parser for protocol message.""" 

823 

824 def __init__(self, 

825 allow_unknown_extension=False, 

826 allow_field_number=False, 

827 descriptor_pool=None, 

828 allow_unknown_field=False): 

829 self.allow_unknown_extension = allow_unknown_extension 

830 self.allow_field_number = allow_field_number 

831 self.descriptor_pool = descriptor_pool 

832 self.allow_unknown_field = allow_unknown_field 

833 

834 def ParseLines(self, lines, message): 

835 """Parses a text representation of a protocol message into a message.""" 

836 self._allow_multiple_scalars = False 

837 self._ParseOrMerge(lines, message) 

838 return message 

839 

840 def MergeLines(self, lines, message): 

841 """Merges a text representation of a protocol message into a message.""" 

842 self._allow_multiple_scalars = True 

843 self._ParseOrMerge(lines, message) 

844 return message 

845 

846 def _ParseOrMerge(self, lines, message): 

847 """Converts a text representation of a protocol message into a message. 

848 

849 Args: 

850 lines: Lines of a message's text representation. 

851 message: A protocol buffer message to merge into. 

852 

853 Raises: 

854 ParseError: On text parsing problems. 

855 """ 

856 # Tokenize expects native str lines. 

857 try: 

858 str_lines = ( 

859 line if isinstance(line, str) else line.decode('utf-8') 

860 for line in lines) 

861 tokenizer = Tokenizer(str_lines) 

862 except UnicodeDecodeError as e: 

863 raise ParseError from e 

864 if message: 

865 self.root_type = message.DESCRIPTOR.full_name 

866 while not tokenizer.AtEnd(): 

867 self._MergeField(tokenizer, message) 

868 

869 def _MergeField(self, tokenizer, message): 

870 """Merges a single protocol message field into a message. 

871 

872 Args: 

873 tokenizer: A tokenizer to parse the field name and values. 

874 message: A protocol message to record the data. 

875 

876 Raises: 

877 ParseError: In case of text parsing problems. 

878 """ 

879 message_descriptor = message.DESCRIPTOR 

880 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME and 

881 tokenizer.TryConsume('[')): 

882 type_url_prefix, packed_type_name = self._ConsumeAnyTypeUrl(tokenizer) 

883 tokenizer.Consume(']') 

884 tokenizer.TryConsume(':') 

885 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

886 type_url_prefix + '/' + packed_type_name) 

887 if tokenizer.TryConsume('<'): 

888 expanded_any_end_token = '>' 

889 else: 

890 tokenizer.Consume('{') 

891 expanded_any_end_token = '}' 

892 expanded_any_sub_message = _BuildMessageFromTypeName(packed_type_name, 

893 self.descriptor_pool) 

894 # Direct comparison with None is used instead of implicit bool conversion 

895 # to avoid false positives with falsy initial values, e.g. for 

896 # google.protobuf.ListValue. 

897 if expanded_any_sub_message is None: 

898 raise ParseError('Type %s not found in descriptor pool' % 

899 packed_type_name) 

900 while not tokenizer.TryConsume(expanded_any_end_token): 

901 if tokenizer.AtEnd(): 

902 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % 

903 (expanded_any_end_token,)) 

904 self._MergeField(tokenizer, expanded_any_sub_message) 

905 deterministic = False 

906 

907 message.Pack(expanded_any_sub_message, 

908 type_url_prefix=type_url_prefix, 

909 deterministic=deterministic) 

910 return 

911 

912 if tokenizer.TryConsume('['): 

913 name = [tokenizer.ConsumeIdentifier()] 

914 while tokenizer.TryConsume('.'): 

915 name.append(tokenizer.ConsumeIdentifier()) 

916 name = '.'.join(name) 

917 

918 if not message_descriptor.is_extendable: 

919 raise tokenizer.ParseErrorPreviousToken( 

920 'Message type "%s" does not have extensions.' % 

921 message_descriptor.full_name) 

922 # pylint: disable=protected-access 

923 field = message.Extensions._FindExtensionByName(name) 

924 # pylint: enable=protected-access 

925 if not field: 

926 if self.allow_unknown_extension: 

927 field = None 

928 else: 

929 raise tokenizer.ParseErrorPreviousToken( 

930 'Extension "%s" not registered. ' 

931 'Did you import the _pb2 module which defines it? ' 

932 'If you are trying to place the extension in the MessageSet ' 

933 'field of another message that is in an Any or MessageSet field, ' 

934 'that message\'s _pb2 module must be imported as well' % name) 

935 elif message_descriptor != field.containing_type: 

936 raise tokenizer.ParseErrorPreviousToken( 

937 'Extension "%s" does not extend message type "%s".' % 

938 (name, message_descriptor.full_name)) 

939 

940 tokenizer.Consume(']') 

941 

942 else: 

943 name = tokenizer.ConsumeIdentifierOrNumber() 

944 if self.allow_field_number and name.isdigit(): 

945 number = ParseInteger(name, True, True) 

946 field = message_descriptor.fields_by_number.get(number, None) 

947 if not field and message_descriptor.is_extendable: 

948 field = message.Extensions._FindExtensionByNumber(number) 

949 else: 

950 field = message_descriptor.fields_by_name.get(name, None) 

951 

952 # Group names are expected to be capitalized as they appear in the 

953 # .proto file, which actually matches their type names, not their field 

954 # names. 

955 if not field: 

956 field = message_descriptor.fields_by_name.get(name.lower(), None) 

957 if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP: 

958 field = None 

959 

960 if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and 

961 field.message_type.name != name): 

962 field = None 

963 

964 if not field and not self.allow_unknown_field: 

965 raise tokenizer.ParseErrorPreviousToken( 

966 'Message type "%s" has no field named "%s".' % 

967 (message_descriptor.full_name, name)) 

968 

969 if field: 

970 if not self._allow_multiple_scalars and field.containing_oneof: 

971 # Check if there's a different field set in this oneof. 

972 # Note that we ignore the case if the same field was set before, and we 

973 # apply _allow_multiple_scalars to non-scalar fields as well. 

974 which_oneof = message.WhichOneof(field.containing_oneof.name) 

975 if which_oneof is not None and which_oneof != field.name: 

976 raise tokenizer.ParseErrorPreviousToken( 

977 'Field "%s" is specified along with field "%s", another member ' 

978 'of oneof "%s" for message type "%s".' % 

979 (field.name, which_oneof, field.containing_oneof.name, 

980 message_descriptor.full_name)) 

981 

982 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

983 tokenizer.TryConsume(':') 

984 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

985 field.full_name) 

986 merger = self._MergeMessageField 

987 else: 

988 tokenizer.Consume(':') 

989 self._DetectSilentMarker(tokenizer, message_descriptor.full_name, 

990 field.full_name) 

991 merger = self._MergeScalarField 

992 

993 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED and 

994 tokenizer.TryConsume('[')): 

995 # Short repeated format, e.g. "foo: [1, 2, 3]" 

996 if not tokenizer.TryConsume(']'): 

997 while True: 

998 merger(tokenizer, message, field) 

999 if tokenizer.TryConsume(']'): 

1000 break 

1001 tokenizer.Consume(',') 

1002 

1003 else: 

1004 merger(tokenizer, message, field) 

1005 

1006 else: # Proto field is unknown. 

1007 assert (self.allow_unknown_extension or self.allow_unknown_field) 

1008 self._SkipFieldContents(tokenizer, name, message_descriptor.full_name) 

1009 

1010 # For historical reasons, fields may optionally be separated by commas or 

1011 # semicolons. 

1012 if not tokenizer.TryConsume(','): 

1013 tokenizer.TryConsume(';') 

1014 

1015 def _LogSilentMarker(self, immediate_message_type, field_name): 

1016 pass 

1017 

1018 def _DetectSilentMarker(self, tokenizer, immediate_message_type, field_name): 

1019 if tokenizer.contains_silent_marker_before_current_token: 

1020 self._LogSilentMarker(immediate_message_type, field_name) 

1021 

1022 def _ConsumeAnyTypeUrl(self, tokenizer): 

1023 """Consumes a google.protobuf.Any type URL and returns the type name.""" 

1024 # Consume "type.googleapis.com/". 

1025 prefix = [tokenizer.ConsumeIdentifier()] 

1026 tokenizer.Consume('.') 

1027 prefix.append(tokenizer.ConsumeIdentifier()) 

1028 tokenizer.Consume('.') 

1029 prefix.append(tokenizer.ConsumeIdentifier()) 

1030 tokenizer.Consume('/') 

1031 # Consume the fully-qualified type name. 

1032 name = [tokenizer.ConsumeIdentifier()] 

1033 while tokenizer.TryConsume('.'): 

1034 name.append(tokenizer.ConsumeIdentifier()) 

1035 return '.'.join(prefix), '.'.join(name) 

1036 

1037 def _MergeMessageField(self, tokenizer, message, field): 

1038 """Merges a single scalar field into a message. 

1039 

1040 Args: 

1041 tokenizer: A tokenizer to parse the field value. 

1042 message: The message of which field is a member. 

1043 field: The descriptor of the field to be merged. 

1044 

1045 Raises: 

1046 ParseError: In case of text parsing problems. 

1047 """ 

1048 is_map_entry = _IsMapEntry(field) 

1049 

1050 if tokenizer.TryConsume('<'): 

1051 end_token = '>' 

1052 else: 

1053 tokenizer.Consume('{') 

1054 end_token = '}' 

1055 

1056 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1057 if field.is_extension: 

1058 sub_message = message.Extensions[field].add() 

1059 elif is_map_entry: 

1060 sub_message = getattr(message, field.name).GetEntryClass()() 

1061 else: 

1062 sub_message = getattr(message, field.name).add() 

1063 else: 

1064 if field.is_extension: 

1065 if (not self._allow_multiple_scalars and 

1066 message.HasExtension(field)): 

1067 raise tokenizer.ParseErrorPreviousToken( 

1068 'Message type "%s" should not have multiple "%s" extensions.' % 

1069 (message.DESCRIPTOR.full_name, field.full_name)) 

1070 sub_message = message.Extensions[field] 

1071 else: 

1072 # Also apply _allow_multiple_scalars to message field. 

1073 # TODO(jieluo): Change to _allow_singular_overwrites. 

1074 if (not self._allow_multiple_scalars and 

1075 message.HasField(field.name)): 

1076 raise tokenizer.ParseErrorPreviousToken( 

1077 'Message type "%s" should not have multiple "%s" fields.' % 

1078 (message.DESCRIPTOR.full_name, field.name)) 

1079 sub_message = getattr(message, field.name) 

1080 sub_message.SetInParent() 

1081 

1082 while not tokenizer.TryConsume(end_token): 

1083 if tokenizer.AtEnd(): 

1084 raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token,)) 

1085 self._MergeField(tokenizer, sub_message) 

1086 

1087 if is_map_entry: 

1088 value_cpptype = field.message_type.fields_by_name['value'].cpp_type 

1089 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE: 

1090 value = getattr(message, field.name)[sub_message.key] 

1091 value.CopyFrom(sub_message.value) 

1092 else: 

1093 getattr(message, field.name)[sub_message.key] = sub_message.value 

1094 

1095 def _MergeScalarField(self, tokenizer, message, field): 

1096 """Merges a single scalar field into a message. 

1097 

1098 Args: 

1099 tokenizer: A tokenizer to parse the field value. 

1100 message: A protocol message to record the data. 

1101 field: The descriptor of the field to be merged. 

1102 

1103 Raises: 

1104 ParseError: In case of text parsing problems. 

1105 RuntimeError: On runtime errors. 

1106 """ 

1107 _ = self.allow_unknown_extension 

1108 value = None 

1109 

1110 if field.type in (descriptor.FieldDescriptor.TYPE_INT32, 

1111 descriptor.FieldDescriptor.TYPE_SINT32, 

1112 descriptor.FieldDescriptor.TYPE_SFIXED32): 

1113 value = _ConsumeInt32(tokenizer) 

1114 elif field.type in (descriptor.FieldDescriptor.TYPE_INT64, 

1115 descriptor.FieldDescriptor.TYPE_SINT64, 

1116 descriptor.FieldDescriptor.TYPE_SFIXED64): 

1117 value = _ConsumeInt64(tokenizer) 

1118 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32, 

1119 descriptor.FieldDescriptor.TYPE_FIXED32): 

1120 value = _ConsumeUint32(tokenizer) 

1121 elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64, 

1122 descriptor.FieldDescriptor.TYPE_FIXED64): 

1123 value = _ConsumeUint64(tokenizer) 

1124 elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT, 

1125 descriptor.FieldDescriptor.TYPE_DOUBLE): 

1126 value = tokenizer.ConsumeFloat() 

1127 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL: 

1128 value = tokenizer.ConsumeBool() 

1129 elif field.type == descriptor.FieldDescriptor.TYPE_STRING: 

1130 value = tokenizer.ConsumeString() 

1131 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES: 

1132 value = tokenizer.ConsumeByteString() 

1133 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM: 

1134 value = tokenizer.ConsumeEnum(field) 

1135 else: 

1136 raise RuntimeError('Unknown field type %d' % field.type) 

1137 

1138 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED: 

1139 if field.is_extension: 

1140 message.Extensions[field].append(value) 

1141 else: 

1142 getattr(message, field.name).append(value) 

1143 else: 

1144 if field.is_extension: 

1145 if (not self._allow_multiple_scalars and 

1146 field.has_presence and 

1147 message.HasExtension(field)): 

1148 raise tokenizer.ParseErrorPreviousToken( 

1149 'Message type "%s" should not have multiple "%s" extensions.' % 

1150 (message.DESCRIPTOR.full_name, field.full_name)) 

1151 else: 

1152 message.Extensions[field] = value 

1153 else: 

1154 duplicate_error = False 

1155 if not self._allow_multiple_scalars: 

1156 if field.has_presence: 

1157 duplicate_error = message.HasField(field.name) 

1158 else: 

1159 # For field that doesn't represent presence, try best effort to 

1160 # check multiple scalars by compare to default values. 

1161 duplicate_error = bool(getattr(message, field.name)) 

1162 

1163 if duplicate_error: 

1164 raise tokenizer.ParseErrorPreviousToken( 

1165 'Message type "%s" should not have multiple "%s" fields.' % 

1166 (message.DESCRIPTOR.full_name, field.name)) 

1167 else: 

1168 setattr(message, field.name, value) 

1169 

1170 def _SkipFieldContents(self, tokenizer, field_name, immediate_message_type): 

1171 """Skips over contents (value or message) of a field. 

1172 

1173 Args: 

1174 tokenizer: A tokenizer to parse the field name and values. 

1175 field_name: The field name currently being parsed. 

1176 immediate_message_type: The type of the message immediately containing 

1177 the silent marker. 

1178 """ 

1179 # Try to guess the type of this field. 

1180 # If this field is not a message, there should be a ":" between the 

1181 # field name and the field value and also the field value should not 

1182 # start with "{" or "<" which indicates the beginning of a message body. 

1183 # If there is no ":" or there is a "{" or "<" after ":", this field has 

1184 # to be a message or the input is ill-formed. 

1185 if tokenizer.TryConsume( 

1186 ':') and not tokenizer.LookingAt('{') and not tokenizer.LookingAt('<'): 

1187 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1188 if tokenizer.LookingAt('['): 

1189 self._SkipRepeatedFieldValue(tokenizer) 

1190 else: 

1191 self._SkipFieldValue(tokenizer) 

1192 else: 

1193 self._DetectSilentMarker(tokenizer, immediate_message_type, field_name) 

1194 self._SkipFieldMessage(tokenizer, immediate_message_type) 

1195 

1196 def _SkipField(self, tokenizer, immediate_message_type): 

1197 """Skips over a complete field (name and value/message). 

1198 

1199 Args: 

1200 tokenizer: A tokenizer to parse the field name and values. 

1201 immediate_message_type: The type of the message immediately containing 

1202 the silent marker. 

1203 """ 

1204 field_name = '' 

1205 if tokenizer.TryConsume('['): 

1206 # Consume extension or google.protobuf.Any type URL 

1207 field_name += '[' + tokenizer.ConsumeIdentifier() 

1208 num_identifiers = 1 

1209 while tokenizer.TryConsume('.'): 

1210 field_name += '.' + tokenizer.ConsumeIdentifier() 

1211 num_identifiers += 1 

1212 # This is possibly a type URL for an Any message. 

1213 if num_identifiers == 3 and tokenizer.TryConsume('/'): 

1214 field_name += '/' + tokenizer.ConsumeIdentifier() 

1215 while tokenizer.TryConsume('.'): 

1216 field_name += '.' + tokenizer.ConsumeIdentifier() 

1217 tokenizer.Consume(']') 

1218 field_name += ']' 

1219 else: 

1220 field_name += tokenizer.ConsumeIdentifierOrNumber() 

1221 

1222 self._SkipFieldContents(tokenizer, field_name, immediate_message_type) 

1223 

1224 # For historical reasons, fields may optionally be separated by commas or 

1225 # semicolons. 

1226 if not tokenizer.TryConsume(','): 

1227 tokenizer.TryConsume(';') 

1228 

1229 def _SkipFieldMessage(self, tokenizer, immediate_message_type): 

1230 """Skips over a field message. 

1231 

1232 Args: 

1233 tokenizer: A tokenizer to parse the field name and values. 

1234 immediate_message_type: The type of the message immediately containing 

1235 the silent marker 

1236 """ 

1237 if tokenizer.TryConsume('<'): 

1238 delimiter = '>' 

1239 else: 

1240 tokenizer.Consume('{') 

1241 delimiter = '}' 

1242 

1243 while not tokenizer.LookingAt('>') and not tokenizer.LookingAt('}'): 

1244 self._SkipField(tokenizer, immediate_message_type) 

1245 

1246 tokenizer.Consume(delimiter) 

1247 

1248 def _SkipFieldValue(self, tokenizer): 

1249 """Skips over a field value. 

1250 

1251 Args: 

1252 tokenizer: A tokenizer to parse the field name and values. 

1253 

1254 Raises: 

1255 ParseError: In case an invalid field value is found. 

1256 """ 

1257 # String/bytes tokens can come in multiple adjacent string literals. 

1258 # If we can consume one, consume as many as we can. 

1259 if tokenizer.TryConsumeByteString(): 

1260 while tokenizer.TryConsumeByteString(): 

1261 pass 

1262 return 

1263 

1264 if (not tokenizer.TryConsumeIdentifier() and 

1265 not _TryConsumeInt64(tokenizer) and not _TryConsumeUint64(tokenizer) and 

1266 not tokenizer.TryConsumeFloat()): 

1267 raise ParseError('Invalid field value: ' + tokenizer.token) 

1268 

1269 def _SkipRepeatedFieldValue(self, tokenizer): 

1270 """Skips over a repeated field value. 

1271 

1272 Args: 

1273 tokenizer: A tokenizer to parse the field value. 

1274 """ 

1275 tokenizer.Consume('[') 

1276 if not tokenizer.LookingAt(']'): 

1277 self._SkipFieldValue(tokenizer) 

1278 while tokenizer.TryConsume(','): 

1279 self._SkipFieldValue(tokenizer) 

1280 tokenizer.Consume(']') 

1281 

1282 

1283class Tokenizer(object): 

1284 """Protocol buffer text representation tokenizer. 

1285 

1286 This class handles the lower level string parsing by splitting it into 

1287 meaningful tokens. 

1288 

1289 It was directly ported from the Java protocol buffer API. 

1290 """ 

1291 

1292 _WHITESPACE = re.compile(r'\s+') 

1293 _COMMENT = re.compile(r'(\s*#.*$)', re.MULTILINE) 

1294 _WHITESPACE_OR_COMMENT = re.compile(r'(\s|(#.*$))+', re.MULTILINE) 

1295 _TOKEN = re.compile('|'.join([ 

1296 r'[a-zA-Z_][0-9a-zA-Z_+-]*', # an identifier 

1297 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*', # a number 

1298 ] + [ # quoted str for each quote mark 

1299 # Avoid backtracking! https://stackoverflow.com/a/844267 

1300 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark) 

1301 for mark in _QUOTES 

1302 ])) 

1303 

1304 _IDENTIFIER = re.compile(r'[^\d\W]\w*') 

1305 _IDENTIFIER_OR_NUMBER = re.compile(r'\w+') 

1306 

1307 def __init__(self, lines, skip_comments=True): 

1308 self._position = 0 

1309 self._line = -1 

1310 self._column = 0 

1311 self._token_start = None 

1312 self.token = '' 

1313 self._lines = iter(lines) 

1314 self._current_line = '' 

1315 self._previous_line = 0 

1316 self._previous_column = 0 

1317 self._more_lines = True 

1318 self._skip_comments = skip_comments 

1319 self._whitespace_pattern = (skip_comments and self._WHITESPACE_OR_COMMENT 

1320 or self._WHITESPACE) 

1321 self.contains_silent_marker_before_current_token = False 

1322 

1323 self._SkipWhitespace() 

1324 self.NextToken() 

1325 

1326 def LookingAt(self, token): 

1327 return self.token == token 

1328 

1329 def AtEnd(self): 

1330 """Checks the end of the text was reached. 

1331 

1332 Returns: 

1333 True iff the end was reached. 

1334 """ 

1335 return not self.token 

1336 

1337 def _PopLine(self): 

1338 while len(self._current_line) <= self._column: 

1339 try: 

1340 self._current_line = next(self._lines) 

1341 except StopIteration: 

1342 self._current_line = '' 

1343 self._more_lines = False 

1344 return 

1345 else: 

1346 self._line += 1 

1347 self._column = 0 

1348 

1349 def _SkipWhitespace(self): 

1350 while True: 

1351 self._PopLine() 

1352 match = self._whitespace_pattern.match(self._current_line, self._column) 

1353 if not match: 

1354 break 

1355 self.contains_silent_marker_before_current_token = match.group(0) == ( 

1356 ' ' + _DEBUG_STRING_SILENT_MARKER) 

1357 length = len(match.group(0)) 

1358 self._column += length 

1359 

1360 def TryConsume(self, token): 

1361 """Tries to consume a given piece of text. 

1362 

1363 Args: 

1364 token: Text to consume. 

1365 

1366 Returns: 

1367 True iff the text was consumed. 

1368 """ 

1369 if self.token == token: 

1370 self.NextToken() 

1371 return True 

1372 return False 

1373 

1374 def Consume(self, token): 

1375 """Consumes a piece of text. 

1376 

1377 Args: 

1378 token: Text to consume. 

1379 

1380 Raises: 

1381 ParseError: If the text couldn't be consumed. 

1382 """ 

1383 if not self.TryConsume(token): 

1384 raise self.ParseError('Expected "%s".' % token) 

1385 

1386 def ConsumeComment(self): 

1387 result = self.token 

1388 if not self._COMMENT.match(result): 

1389 raise self.ParseError('Expected comment.') 

1390 self.NextToken() 

1391 return result 

1392 

1393 def ConsumeCommentOrTrailingComment(self): 

1394 """Consumes a comment, returns a 2-tuple (trailing bool, comment str).""" 

1395 

1396 # Tokenizer initializes _previous_line and _previous_column to 0. As the 

1397 # tokenizer starts, it looks like there is a previous token on the line. 

1398 just_started = self._line == 0 and self._column == 0 

1399 

1400 before_parsing = self._previous_line 

1401 comment = self.ConsumeComment() 

1402 

1403 # A trailing comment is a comment on the same line than the previous token. 

1404 trailing = (self._previous_line == before_parsing 

1405 and not just_started) 

1406 

1407 return trailing, comment 

1408 

1409 def TryConsumeIdentifier(self): 

1410 try: 

1411 self.ConsumeIdentifier() 

1412 return True 

1413 except ParseError: 

1414 return False 

1415 

1416 def ConsumeIdentifier(self): 

1417 """Consumes protocol message field identifier. 

1418 

1419 Returns: 

1420 Identifier string. 

1421 

1422 Raises: 

1423 ParseError: If an identifier couldn't be consumed. 

1424 """ 

1425 result = self.token 

1426 if not self._IDENTIFIER.match(result): 

1427 raise self.ParseError('Expected identifier.') 

1428 self.NextToken() 

1429 return result 

1430 

1431 def TryConsumeIdentifierOrNumber(self): 

1432 try: 

1433 self.ConsumeIdentifierOrNumber() 

1434 return True 

1435 except ParseError: 

1436 return False 

1437 

1438 def ConsumeIdentifierOrNumber(self): 

1439 """Consumes protocol message field identifier. 

1440 

1441 Returns: 

1442 Identifier string. 

1443 

1444 Raises: 

1445 ParseError: If an identifier couldn't be consumed. 

1446 """ 

1447 result = self.token 

1448 if not self._IDENTIFIER_OR_NUMBER.match(result): 

1449 raise self.ParseError('Expected identifier or number, got %s.' % result) 

1450 self.NextToken() 

1451 return result 

1452 

1453 def TryConsumeInteger(self): 

1454 try: 

1455 self.ConsumeInteger() 

1456 return True 

1457 except ParseError: 

1458 return False 

1459 

1460 def ConsumeInteger(self): 

1461 """Consumes an integer number. 

1462 

1463 Returns: 

1464 The integer parsed. 

1465 

1466 Raises: 

1467 ParseError: If an integer couldn't be consumed. 

1468 """ 

1469 try: 

1470 result = _ParseAbstractInteger(self.token) 

1471 except ValueError as e: 

1472 raise self.ParseError(str(e)) 

1473 self.NextToken() 

1474 return result 

1475 

1476 def TryConsumeFloat(self): 

1477 try: 

1478 self.ConsumeFloat() 

1479 return True 

1480 except ParseError: 

1481 return False 

1482 

1483 def ConsumeFloat(self): 

1484 """Consumes an floating point number. 

1485 

1486 Returns: 

1487 The number parsed. 

1488 

1489 Raises: 

1490 ParseError: If a floating point number couldn't be consumed. 

1491 """ 

1492 try: 

1493 result = ParseFloat(self.token) 

1494 except ValueError as e: 

1495 raise self.ParseError(str(e)) 

1496 self.NextToken() 

1497 return result 

1498 

1499 def ConsumeBool(self): 

1500 """Consumes a boolean value. 

1501 

1502 Returns: 

1503 The bool parsed. 

1504 

1505 Raises: 

1506 ParseError: If a boolean value couldn't be consumed. 

1507 """ 

1508 try: 

1509 result = ParseBool(self.token) 

1510 except ValueError as e: 

1511 raise self.ParseError(str(e)) 

1512 self.NextToken() 

1513 return result 

1514 

1515 def TryConsumeByteString(self): 

1516 try: 

1517 self.ConsumeByteString() 

1518 return True 

1519 except ParseError: 

1520 return False 

1521 

1522 def ConsumeString(self): 

1523 """Consumes a string value. 

1524 

1525 Returns: 

1526 The string parsed. 

1527 

1528 Raises: 

1529 ParseError: If a string value couldn't be consumed. 

1530 """ 

1531 the_bytes = self.ConsumeByteString() 

1532 try: 

1533 return str(the_bytes, 'utf-8') 

1534 except UnicodeDecodeError as e: 

1535 raise self._StringParseError(e) 

1536 

1537 def ConsumeByteString(self): 

1538 """Consumes a byte array value. 

1539 

1540 Returns: 

1541 The array parsed (as a string). 

1542 

1543 Raises: 

1544 ParseError: If a byte array value couldn't be consumed. 

1545 """ 

1546 the_list = [self._ConsumeSingleByteString()] 

1547 while self.token and self.token[0] in _QUOTES: 

1548 the_list.append(self._ConsumeSingleByteString()) 

1549 return b''.join(the_list) 

1550 

1551 def _ConsumeSingleByteString(self): 

1552 """Consume one token of a string literal. 

1553 

1554 String literals (whether bytes or text) can come in multiple adjacent 

1555 tokens which are automatically concatenated, like in C or Python. This 

1556 method only consumes one token. 

1557 

1558 Returns: 

1559 The token parsed. 

1560 Raises: 

1561 ParseError: When the wrong format data is found. 

1562 """ 

1563 text = self.token 

1564 if len(text) < 1 or text[0] not in _QUOTES: 

1565 raise self.ParseError('Expected string but found: %r' % (text,)) 

1566 

1567 if len(text) < 2 or text[-1] != text[0]: 

1568 raise self.ParseError('String missing ending quote: %r' % (text,)) 

1569 

1570 try: 

1571 result = text_encoding.CUnescape(text[1:-1]) 

1572 except ValueError as e: 

1573 raise self.ParseError(str(e)) 

1574 self.NextToken() 

1575 return result 

1576 

1577 def ConsumeEnum(self, field): 

1578 try: 

1579 result = ParseEnum(field, self.token) 

1580 except ValueError as e: 

1581 raise self.ParseError(str(e)) 

1582 self.NextToken() 

1583 return result 

1584 

1585 def ParseErrorPreviousToken(self, message): 

1586 """Creates and *returns* a ParseError for the previously read token. 

1587 

1588 Args: 

1589 message: A message to set for the exception. 

1590 

1591 Returns: 

1592 A ParseError instance. 

1593 """ 

1594 return ParseError(message, self._previous_line + 1, 

1595 self._previous_column + 1) 

1596 

1597 def ParseError(self, message): 

1598 """Creates and *returns* a ParseError for the current token.""" 

1599 return ParseError('\'' + self._current_line + '\': ' + message, 

1600 self._line + 1, self._column + 1) 

1601 

1602 def _StringParseError(self, e): 

1603 return self.ParseError('Couldn\'t parse string: ' + str(e)) 

1604 

1605 def NextToken(self): 

1606 """Reads the next meaningful token.""" 

1607 self._previous_line = self._line 

1608 self._previous_column = self._column 

1609 self.contains_silent_marker_before_current_token = False 

1610 

1611 self._column += len(self.token) 

1612 self._SkipWhitespace() 

1613 

1614 if not self._more_lines: 

1615 self.token = '' 

1616 return 

1617 

1618 match = self._TOKEN.match(self._current_line, self._column) 

1619 if not match and not self._skip_comments: 

1620 match = self._COMMENT.match(self._current_line, self._column) 

1621 if match: 

1622 token = match.group(0) 

1623 self.token = token 

1624 else: 

1625 self.token = self._current_line[self._column] 

1626 

1627# Aliased so it can still be accessed by current visibility violators. 

1628# TODO(dbarnett): Migrate violators to textformat_tokenizer. 

1629_Tokenizer = Tokenizer # pylint: disable=invalid-name 

1630 

1631 

1632def _ConsumeInt32(tokenizer): 

1633 """Consumes a signed 32bit integer number from tokenizer. 

1634 

1635 Args: 

1636 tokenizer: A tokenizer used to parse the number. 

1637 

1638 Returns: 

1639 The integer parsed. 

1640 

1641 Raises: 

1642 ParseError: If a signed 32bit integer couldn't be consumed. 

1643 """ 

1644 return _ConsumeInteger(tokenizer, is_signed=True, is_long=False) 

1645 

1646 

1647def _ConsumeUint32(tokenizer): 

1648 """Consumes an unsigned 32bit integer number from tokenizer. 

1649 

1650 Args: 

1651 tokenizer: A tokenizer used to parse the number. 

1652 

1653 Returns: 

1654 The integer parsed. 

1655 

1656 Raises: 

1657 ParseError: If an unsigned 32bit integer couldn't be consumed. 

1658 """ 

1659 return _ConsumeInteger(tokenizer, is_signed=False, is_long=False) 

1660 

1661 

1662def _TryConsumeInt64(tokenizer): 

1663 try: 

1664 _ConsumeInt64(tokenizer) 

1665 return True 

1666 except ParseError: 

1667 return False 

1668 

1669 

1670def _ConsumeInt64(tokenizer): 

1671 """Consumes a signed 32bit integer number from tokenizer. 

1672 

1673 Args: 

1674 tokenizer: A tokenizer used to parse the number. 

1675 

1676 Returns: 

1677 The integer parsed. 

1678 

1679 Raises: 

1680 ParseError: If a signed 32bit integer couldn't be consumed. 

1681 """ 

1682 return _ConsumeInteger(tokenizer, is_signed=True, is_long=True) 

1683 

1684 

1685def _TryConsumeUint64(tokenizer): 

1686 try: 

1687 _ConsumeUint64(tokenizer) 

1688 return True 

1689 except ParseError: 

1690 return False 

1691 

1692 

1693def _ConsumeUint64(tokenizer): 

1694 """Consumes an unsigned 64bit integer number from tokenizer. 

1695 

1696 Args: 

1697 tokenizer: A tokenizer used to parse the number. 

1698 

1699 Returns: 

1700 The integer parsed. 

1701 

1702 Raises: 

1703 ParseError: If an unsigned 64bit integer couldn't be consumed. 

1704 """ 

1705 return _ConsumeInteger(tokenizer, is_signed=False, is_long=True) 

1706 

1707 

1708def _ConsumeInteger(tokenizer, is_signed=False, is_long=False): 

1709 """Consumes an integer number from tokenizer. 

1710 

1711 Args: 

1712 tokenizer: A tokenizer used to parse the number. 

1713 is_signed: True if a signed integer must be parsed. 

1714 is_long: True if a long integer must be parsed. 

1715 

1716 Returns: 

1717 The integer parsed. 

1718 

1719 Raises: 

1720 ParseError: If an integer with given characteristics couldn't be consumed. 

1721 """ 

1722 try: 

1723 result = ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long) 

1724 except ValueError as e: 

1725 raise tokenizer.ParseError(str(e)) 

1726 tokenizer.NextToken() 

1727 return result 

1728 

1729 

1730def ParseInteger(text, is_signed=False, is_long=False): 

1731 """Parses an integer. 

1732 

1733 Args: 

1734 text: The text to parse. 

1735 is_signed: True if a signed integer must be parsed. 

1736 is_long: True if a long integer must be parsed. 

1737 

1738 Returns: 

1739 The integer value. 

1740 

1741 Raises: 

1742 ValueError: Thrown Iff the text is not a valid integer. 

1743 """ 

1744 # Do the actual parsing. Exception handling is propagated to caller. 

1745 result = _ParseAbstractInteger(text) 

1746 

1747 # Check if the integer is sane. Exceptions handled by callers. 

1748 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] 

1749 checker.CheckValue(result) 

1750 return result 

1751 

1752 

1753def _ParseAbstractInteger(text): 

1754 """Parses an integer without checking size/signedness. 

1755 

1756 Args: 

1757 text: The text to parse. 

1758 

1759 Returns: 

1760 The integer value. 

1761 

1762 Raises: 

1763 ValueError: Thrown Iff the text is not a valid integer. 

1764 """ 

1765 # Do the actual parsing. Exception handling is propagated to caller. 

1766 orig_text = text 

1767 c_octal_match = re.match(r'(-?)0(\d+)$', text) 

1768 if c_octal_match: 

1769 # Python 3 no longer supports 0755 octal syntax without the 'o', so 

1770 # we always use the '0o' prefix for multi-digit numbers starting with 0. 

1771 text = c_octal_match.group(1) + '0o' + c_octal_match.group(2) 

1772 try: 

1773 return int(text, 0) 

1774 except ValueError: 

1775 raise ValueError('Couldn\'t parse integer: %s' % orig_text) 

1776 

1777 

1778def ParseFloat(text): 

1779 """Parse a floating point number. 

1780 

1781 Args: 

1782 text: Text to parse. 

1783 

1784 Returns: 

1785 The number parsed. 

1786 

1787 Raises: 

1788 ValueError: If a floating point number couldn't be parsed. 

1789 """ 

1790 try: 

1791 # Assume Python compatible syntax. 

1792 return float(text) 

1793 except ValueError: 

1794 # Check alternative spellings. 

1795 if _FLOAT_INFINITY.match(text): 

1796 if text[0] == '-': 

1797 return float('-inf') 

1798 else: 

1799 return float('inf') 

1800 elif _FLOAT_NAN.match(text): 

1801 return float('nan') 

1802 else: 

1803 # assume '1.0f' format 

1804 try: 

1805 return float(text.rstrip('f')) 

1806 except ValueError: 

1807 raise ValueError('Couldn\'t parse float: %s' % text) 

1808 

1809 

1810def ParseBool(text): 

1811 """Parse a boolean value. 

1812 

1813 Args: 

1814 text: Text to parse. 

1815 

1816 Returns: 

1817 Boolean values parsed 

1818 

1819 Raises: 

1820 ValueError: If text is not a valid boolean. 

1821 """ 

1822 if text in ('true', 't', '1', 'True'): 

1823 return True 

1824 elif text in ('false', 'f', '0', 'False'): 

1825 return False 

1826 else: 

1827 raise ValueError('Expected "true" or "false".') 

1828 

1829 

1830def ParseEnum(field, value): 

1831 """Parse an enum value. 

1832 

1833 The value can be specified by a number (the enum value), or by 

1834 a string literal (the enum name). 

1835 

1836 Args: 

1837 field: Enum field descriptor. 

1838 value: String value. 

1839 

1840 Returns: 

1841 Enum value number. 

1842 

1843 Raises: 

1844 ValueError: If the enum value could not be parsed. 

1845 """ 

1846 enum_descriptor = field.enum_type 

1847 try: 

1848 number = int(value, 0) 

1849 except ValueError: 

1850 # Identifier. 

1851 enum_value = enum_descriptor.values_by_name.get(value, None) 

1852 if enum_value is None: 

1853 raise ValueError('Enum type "%s" has no value named %s.' % 

1854 (enum_descriptor.full_name, value)) 

1855 else: 

1856 if not field.enum_type.is_closed: 

1857 return number 

1858 enum_value = enum_descriptor.values_by_number.get(number, None) 

1859 if enum_value is None: 

1860 raise ValueError('Enum type "%s" has no value with number %d.' % 

1861 (enum_descriptor.full_name, number)) 

1862 return enum_value.number