Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/parsing_ops.py: 38%

178 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16"""Parsing Ops.""" 

17from tensorflow.python.framework import ops 

18from tensorflow.python.framework import sparse_tensor 

19from tensorflow.python.ops import array_ops 

20from tensorflow.python.ops import control_flow_assert 

21from tensorflow.python.ops import control_flow_ops 

22from tensorflow.python.ops import gen_parsing_ops 

23from tensorflow.python.ops import math_ops 

24from tensorflow.python.ops import parsing_config 

25# go/tf-wildcard-import 

26# pylint: disable=wildcard-import,undefined-variable 

27from tensorflow.python.ops.gen_parsing_ops import * 

28# pylint: enable=wildcard-import,undefined-variable 

29from tensorflow.python.util import deprecation 

30from tensorflow.python.util import dispatch 

31from tensorflow.python.util.tf_export import tf_export 

32 

33 

34ops.NotDifferentiable("DecodeRaw") 

35ops.NotDifferentiable("DecodePaddedRaw") 

36ops.NotDifferentiable("ParseTensor") 

37ops.NotDifferentiable("SerializeTensor") 

38ops.NotDifferentiable("StringToNumber") 

39 

40 

41VarLenFeature = parsing_config.VarLenFeature 

42RaggedFeature = parsing_config.RaggedFeature 

43SparseFeature = parsing_config.SparseFeature 

44FixedLenFeature = parsing_config.FixedLenFeature 

45FixedLenSequenceFeature = parsing_config.FixedLenSequenceFeature 

46# pylint: disable=protected-access 

47_ParseOpParams = parsing_config._ParseOpParams 

48_construct_tensors_for_composite_features = ( 

49 parsing_config._construct_tensors_for_composite_features) 

50# pylint: enable=protected-access 

51 

52 

53# TODO(b/122887740) Switch files that use this private symbol to use new name. 

54_construct_sparse_tensors_for_sparse_features = \ 

55 _construct_tensors_for_composite_features 

56 

57 

58def _prepend_none_dimension(features): 

59 """Returns a copy of features with adjusted FixedLenSequenceFeature shapes.""" 

60 if features: 

61 modified_features = dict(features) # Create a copy to modify 

62 for key, feature in features.items(): 

63 if isinstance(feature, FixedLenSequenceFeature): 

64 if not feature.allow_missing: 

65 raise ValueError("Unsupported: FixedLenSequenceFeature requires " 

66 "allow_missing to be True.") 

67 modified_features[key] = FixedLenSequenceFeature( 

68 [None] + list(feature.shape), 

69 feature.dtype, 

70 feature.allow_missing, 

71 feature.default_value) 

72 return modified_features 

73 else: 

74 return features 

75 

76 

77@tf_export("io.parse_example", v1=[]) 

78@dispatch.add_dispatch_support 

79def parse_example_v2(serialized, features, example_names=None, name=None): 

80 # pylint: disable=line-too-long 

81 """Parses `Example` protos into a `dict` of tensors. 

82 

83 Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 

84 protos given in `serialized`. We refer to `serialized` as a batch with 

85 `batch_size` many entries of individual `Example` protos. 

86 

87 `example_names` may contain descriptive names for the corresponding serialized 

88 protos. These may be useful for debugging purposes, but they have no effect on 

89 the output. If not `None`, `example_names` must be the same length as 

90 `serialized`. 

91 

92 This op parses serialized examples into a dictionary mapping keys to `Tensor` 

93 `SparseTensor`, and `RaggedTensor` objects. `features` is a Mapping from keys 

94 to `VarLenFeature`, `SparseFeature`, `RaggedFeature`, and `FixedLenFeature` 

95 objects. Each `VarLenFeature` and `SparseFeature` is mapped to a 

96 `SparseTensor`; each `FixedLenFeature` is mapped to a `Tensor`; and each 

97 `RaggedFeature` is mapped to a `RaggedTensor`. 

98 

99 Each `VarLenFeature` maps to a `SparseTensor` of the specified type 

100 representing a ragged matrix. Its indices are `[batch, index]` where `batch` 

101 identifies the example in `serialized`, and `index` is the value's index in 

102 the list of values associated with that feature and example. 

103 

104 Each `SparseFeature` maps to a `SparseTensor` of the specified type 

105 representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`. 

106 Its `values` come from the feature in the examples with key `value_key`. 

107 A `values[i]` comes from a position `k` in the feature of an example at batch 

108 entry `batch`. This positional information is recorded in `indices[i]` as 

109 `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of 

110 the feature in the example at with key `SparseFeature.index_key[j]`. 

111 In other words, we split the indices (except the first index indicating the 

112 batch entry) of a `SparseTensor` by dimension into different features of the 

113 `Example`. Due to its complexity a `VarLenFeature` should be preferred over a 

114 `SparseFeature` whenever possible. 

115 

116 Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or 

117 `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`. 

118 

119 `FixedLenFeature` entries with a `default_value` are optional. With no default 

120 value, we will fail if that `Feature` is missing from any example in 

121 `serialized`. 

122 

123 Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type 

124 (or `tf.float32` if not specified) and shape 

125 `(serialized.size(), None) + df.shape`. 

126 All examples in `serialized` will be padded with `default_value` along the 

127 second dimension. 

128 

129 Each `RaggedFeature` maps to a `RaggedTensor` of the specified type. It 

130 is formed by stacking the `RaggedTensor` for each example, where the 

131 `RaggedTensor` for each individual example is constructed using the tensors 

132 specified by `RaggedTensor.values_key` and `RaggedTensor.partition`. See 

133 the `tf.io.RaggedFeature` documentation for details and examples. 

134 

135 Examples: 

136 

137 For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three 

138 serialized `Example`s are provided: 

139 

140 ``` 

141 serialized = [ 

142 features 

143 { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } }, 

144 features 

145 { feature []}, 

146 features 

147 { feature { key: "ft" value { float_list { value: [3.0] } } } 

148 ] 

149 ``` 

150 

151 then the output will look like: 

152 

153 ```python 

154 {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]], 

155 values=[1.0, 2.0, 3.0], 

156 dense_shape=(3, 2)) } 

157 ``` 

158 

159 If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and 

160 `shape=[]` is used then the output will look like: 

161 

162 ```python 

163 {"ft": [[1.0, 2.0], [3.0, -1.0]]} 

164 ``` 

165 

166 Given two `Example` input protos in `serialized`: 

167 

168 ``` 

169 [ 

170 features { 

171 feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } } 

172 feature { key: "gps" value { float_list { value: [] } } } 

173 }, 

174 features { 

175 feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } } 

176 feature { key: "dank" value { int64_list { value: [ 42 ] } } } 

177 feature { key: "gps" value { } } 

178 } 

179 ] 

180 ``` 

181 

182 And arguments 

183 

184 ``` 

185 example_names: ["input0", "input1"], 

186 features: { 

187 "kw": VarLenFeature(tf.string), 

188 "dank": VarLenFeature(tf.int64), 

189 "gps": VarLenFeature(tf.float32), 

190 } 

191 ``` 

192 

193 Then the output is a dictionary: 

194 

195 ```python 

196 { 

197 "kw": SparseTensor( 

198 indices=[[0, 0], [0, 1], [1, 0]], 

199 values=["knit", "big", "emmy"] 

200 dense_shape=[2, 2]), 

201 "dank": SparseTensor( 

202 indices=[[1, 0]], 

203 values=[42], 

204 dense_shape=[2, 1]), 

205 "gps": SparseTensor( 

206 indices=[], 

207 values=[], 

208 dense_shape=[2, 0]), 

209 } 

210 ``` 

211 

212 For dense results in two serialized `Example`s: 

213 

214 ``` 

215 [ 

216 features { 

217 feature { key: "age" value { int64_list { value: [ 0 ] } } } 

218 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 

219 }, 

220 features { 

221 feature { key: "age" value { int64_list { value: [] } } } 

222 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 

223 } 

224 ] 

225 ``` 

226 

227 We can use arguments: 

228 

229 ``` 

230 example_names: ["input0", "input1"], 

231 features: { 

232 "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), 

233 "gender": FixedLenFeature([], dtype=tf.string), 

234 } 

235 ``` 

236 

237 And the expected output is: 

238 

239 ```python 

240 { 

241 "age": [[0], [-1]], 

242 "gender": [["f"], ["f"]], 

243 } 

244 ``` 

245 

246 An alternative to `VarLenFeature` to obtain a `SparseTensor` is 

247 `SparseFeature`. For example, given two `Example` input protos in 

248 `serialized`: 

249 

250 ``` 

251 [ 

252 features { 

253 feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } } 

254 feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } } 

255 }, 

256 features { 

257 feature { key: "val" value { float_list { value: [ 0.0 ] } } } 

258 feature { key: "ix" value { int64_list { value: [ 42 ] } } } 

259 } 

260 ] 

261 ``` 

262 

263 And arguments 

264 

265 ``` 

266 example_names: ["input0", "input1"], 

267 features: { 

268 "sparse": SparseFeature( 

269 index_key="ix", value_key="val", dtype=tf.float32, size=100), 

270 } 

271 ``` 

272 

273 Then the output is a dictionary: 

274 

275 ```python 

276 { 

277 "sparse": SparseTensor( 

278 indices=[[0, 3], [0, 20], [1, 42]], 

279 values=[0.5, -1.0, 0.0] 

280 dense_shape=[2, 100]), 

281 } 

282 ``` 

283 

284 See the `tf.io.RaggedFeature` documentation for examples showing how 

285 `RaggedFeature` can be used to obtain `RaggedTensor`s. 

286 

287 Args: 

288 serialized: A vector (1-D Tensor) of strings, a batch of binary 

289 serialized `Example` protos. 

290 features: A mapping of feature keys to `FixedLenFeature`, 

291 `VarLenFeature`, `SparseFeature`, and `RaggedFeature` values. 

292 example_names: A vector (1-D Tensor) of strings (optional), the names of 

293 the serialized protos in the batch. 

294 name: A name for this operation (optional). 

295 

296 Returns: 

297 A `dict` mapping feature keys to `Tensor`, `SparseTensor`, and 

298 `RaggedTensor` values. 

299 

300 Raises: 

301 ValueError: if any feature is invalid. 

302 """ 

303 if not features: 

304 raise ValueError("Argument `features` cannot be None.") 

305 features = _prepend_none_dimension(features) 

306 params = _ParseOpParams.from_features(features, [ 

307 VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature, 

308 RaggedFeature 

309 ]) 

310 

311 outputs = _parse_example_raw(serialized, example_names, params, name=name) 

312 return _construct_tensors_for_composite_features(features, outputs) 

313 

314 

315@tf_export(v1=["io.parse_example", "parse_example"]) 

316@dispatch.add_dispatch_support 

317def parse_example(serialized, features, name=None, example_names=None): 

318 return parse_example_v2(serialized, features, example_names, name) 

319 

320 

321parse_example.__doc__ = parse_example_v2.__doc__ 

322 

323 

324def _parse_example_raw(serialized, names, params, name): 

325 """Parses `Example` protos. 

326 

327 Args: 

328 serialized: A vector (1-D Tensor) of strings, a batch of binary 

329 serialized `Example` protos. 

330 names: A vector (1-D Tensor) of strings (optional), the names of 

331 the serialized protos. 

332 params: A `ParseOpParams` containing the parameters for the parse op. 

333 name: A name for this operation (optional). 

334 

335 Returns: 

336 A `dict` mapping keys to `Tensor`s and `SparseTensor`s and `RaggedTensor`s. 

337 

338 """ 

339 if params.num_features == 0: 

340 raise ValueError("Must provide at least one feature key.") 

341 with ops.name_scope(name, "ParseExample", [serialized, names]): 

342 names = [] if names is None else names 

343 serialized = ops.convert_to_tensor(serialized, name="serialized") 

344 if params.ragged_keys and serialized.shape.ndims is None: 

345 raise ValueError("serialized must have statically-known rank to " 

346 "parse ragged features.") 

347 outputs = gen_parsing_ops.parse_example_v2( 

348 serialized=serialized, 

349 names=names, 

350 sparse_keys=params.sparse_keys, 

351 dense_keys=params.dense_keys, 

352 ragged_keys=params.ragged_keys, 

353 dense_defaults=params.dense_defaults_vec, 

354 num_sparse=len(params.sparse_keys), 

355 sparse_types=params.sparse_types, 

356 ragged_value_types=params.ragged_value_types, 

357 ragged_split_types=params.ragged_split_types, 

358 dense_shapes=params.dense_shapes_as_proto, 

359 name=name) 

360 (sparse_indices, sparse_values, sparse_shapes, dense_values, 

361 ragged_values, ragged_row_splits) = outputs 

362 # pylint: disable=protected-access 

363 ragged_tensors = parsing_config._build_ragged_tensors( 

364 serialized.shape, ragged_values, ragged_row_splits) 

365 

366 sparse_tensors = [ 

367 sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) 

368 in zip(sparse_indices, sparse_values, sparse_shapes)] 

369 

370 return dict( 

371 zip(params.sparse_keys + params.dense_keys + params.ragged_keys, 

372 sparse_tensors + dense_values + ragged_tensors)) 

373 

374 

375@tf_export(v1=["io.parse_single_example", "parse_single_example"]) 

376@dispatch.add_dispatch_support 

377def parse_single_example(serialized, features, name=None, example_names=None): 

378 """Parses a single `Example` proto. 

379 

380 Similar to `parse_example`, except: 

381 

382 For dense tensors, the returned `Tensor` is identical to the output of 

383 `parse_example`, except there is no batch dimension, the output shape is the 

384 same as the shape given in `dense_shape`. 

385 

386 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 

387 (the indices matrix is a column vector), the values vector is unchanged, and 

388 the first (`batch_size`) entry of the shape vector is removed (it is now a 

389 single element vector). 

390 

391 One might see performance advantages by batching `Example` protos with 

392 `parse_example` instead of using this function directly. 

393 

394 Args: 

395 serialized: A scalar string Tensor, a single serialized Example. 

396 features: A mapping of feature keys to `FixedLenFeature` or 

397 `VarLenFeature` values. 

398 name: A name for this operation (optional). 

399 example_names: (Optional) A scalar string Tensor, the associated name. 

400 

401 Returns: 

402 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 

403 

404 Raises: 

405 ValueError: if any feature is invalid. 

406 """ 

407 return parse_single_example_v2(serialized, features, example_names, name) 

408 

409 

410@tf_export("io.parse_single_example", v1=[]) 

411@dispatch.add_dispatch_support 

412def parse_single_example_v2( 

413 serialized, features, example_names=None, name=None 

414 ): 

415 """Parses a single `Example` proto. 

416 

417 Similar to `parse_example`, except: 

418 

419 For dense tensors, the returned `Tensor` is identical to the output of 

420 `parse_example`, except there is no batch dimension, the output shape is the 

421 same as the shape given in `dense_shape`. 

422 

423 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 

424 (the indices matrix is a column vector), the values vector is unchanged, and 

425 the first (`batch_size`) entry of the shape vector is removed (it is now a 

426 single element vector). 

427 

428 One might see performance advantages by batching `Example` protos with 

429 `parse_example` instead of using this function directly. 

430 

431 Args: 

432 serialized: A scalar string Tensor, a single serialized Example. 

433 features: A mapping of feature keys to `FixedLenFeature` or 

434 `VarLenFeature` values. 

435 example_names: (Optional) A scalar string Tensor, the associated name. 

436 name: A name for this operation (optional). 

437 

438 Returns: 

439 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 

440 

441 Raises: 

442 ValueError: if any feature is invalid. 

443 """ 

444 if not features: 

445 raise ValueError("Invalid argument: features cannot be None.") 

446 with ops.name_scope(name, "ParseSingleExample", [serialized, example_names]): 

447 serialized = ops.convert_to_tensor(serialized, name="serialized") 

448 serialized = _assert_scalar(serialized, "serialized") 

449 return parse_example_v2(serialized, features, example_names, name) 

450 

451 

452@tf_export("io.parse_sequence_example") 

453@dispatch.add_dispatch_support 

454def parse_sequence_example(serialized, 

455 context_features=None, 

456 sequence_features=None, 

457 example_names=None, 

458 name=None): 

459 # pylint: disable=line-too-long 

460 """Parses a batch of `SequenceExample` protos. 

461 

462 Parses a vector of serialized 

463 [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 

464 protos given in `serialized`. 

465 

466 This op parses serialized sequence examples into a tuple of dictionaries, 

467 each mapping keys to `Tensor` and `SparseTensor` objects. 

468 The first dictionary contains mappings for keys appearing in 

469 `context_features`, and the second dictionary contains mappings for keys 

470 appearing in `sequence_features`. 

471 

472 At least one of `context_features` and `sequence_features` must be provided 

473 and non-empty. 

474 

475 The `context_features` keys are associated with a `SequenceExample` as a 

476 whole, independent of time / frame. In contrast, the `sequence_features` keys 

477 provide a way to access variable-length data within the `FeatureList` section 

478 of the `SequenceExample` proto. While the shapes of `context_features` values 

479 are fixed with respect to frame, the frame dimension (the first dimension) 

480 of `sequence_features` values may vary between `SequenceExample` protos, 

481 and even between `feature_list` keys within the same `SequenceExample`. 

482 

483 `context_features` contains `VarLenFeature`, `RaggedFeature`, and 

484 `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a 

485 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each 

486 `FixedLenFeature` is mapped to a `Tensor`, of the specified type, shape, and 

487 default value. 

488 

489 `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and 

490 `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a 

491 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and 

492 each `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified 

493 type. The shape will be `(B,T,) + df.dense_shape` for 

494 `FixedLenSequenceFeature` `df`, where `B` is the batch size, and `T` is the 

495 length of the associated `FeatureList` in the `SequenceExample`. For instance, 

496 `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape 

497 `[None, None]` and dynamic shape `[B, T]`, while 

498 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor` 

499 of static shape `[None, None, k]` and dynamic shape `[B, T, k]`. 

500 

501 Like the input, the resulting output tensors have a batch dimension. This 

502 means that the original per-example shapes of `VarLenFeature`s and 

503 `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also 

504 provides dicts of shape tensors as part of the output. There is one dict for 

505 the context features, and one for the feature_list features. Context features 

506 of type `FixedLenFeature`s will not be present, since their shapes are already 

507 known by the caller. In situations where the input `FixedLenSequenceFeature`s 

508 are of different sequence lengths across examples, the shorter examples will 

509 be padded with default datatype values: 0 for numeric types, and the empty 

510 string for string types. 

511 

512 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 

513 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 

514 entry and `index` is the value's index in the list of values associated with 

515 that time. 

516 

517 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 

518 entries with `allow_missing=True` are optional; otherwise, we will fail if 

519 that `Feature` or `FeatureList` is missing from any example in `serialized`. 

520 

521 `example_name` may contain a descriptive name for the corresponding serialized 

522 proto. This may be useful for debugging purposes, but it has no effect on the 

523 output. If not `None`, `example_name` must be a scalar. 

524 

525 Args: 

526 serialized: A vector (1-D Tensor) of type string containing binary 

527 serialized `SequenceExample` protos. 

528 context_features: A mapping of feature keys to `FixedLenFeature` or 

529 `VarLenFeature` or `RaggedFeature` values. These features are associated 

530 with a `SequenceExample` as a whole. 

531 sequence_features: A mapping of feature keys to 

532 `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values. 

533 These features are associated with data within the `FeatureList` section 

534 of the `SequenceExample` proto. 

535 example_names: A vector (1-D Tensor) of strings (optional), the name of the 

536 serialized protos. 

537 name: A name for this operation (optional). 

538 

539 Returns: 

540 A tuple of three `dict`s, each mapping keys to `Tensor`s, 

541 `SparseTensor`s, and `RaggedTensor`. The first dict contains the context 

542 key/values, the second dict contains the feature_list key/values, and the 

543 final dict contains the lengths of any dense feature_list features. 

544 

545 Raises: 

546 ValueError: if any feature is invalid. 

547 """ 

548 if not (context_features or sequence_features): 

549 raise ValueError("Both `context_features` and `sequence_features` argument " 

550 "are None, but at least one should have values.") 

551 context_params = _ParseOpParams.from_features( 

552 context_features, [VarLenFeature, FixedLenFeature, RaggedFeature]) 

553 feature_list_params = _ParseOpParams.from_features( 

554 sequence_features, 

555 [VarLenFeature, FixedLenSequenceFeature, RaggedFeature]) 

556 

557 with ops.name_scope(name, "ParseSequenceExample", 

558 [serialized, example_names]): 

559 outputs = _parse_sequence_example_raw(serialized, example_names, 

560 context_params, feature_list_params, 

561 name) 

562 context_output, feature_list_output, feature_list_lengths = outputs 

563 

564 if context_params.ragged_keys: 

565 context_output = _construct_tensors_for_composite_features( 

566 context_features, context_output) 

567 if feature_list_params.ragged_keys: 

568 feature_list_output = _construct_tensors_for_composite_features( 

569 sequence_features, feature_list_output) 

570 

571 return context_output, feature_list_output, feature_list_lengths 

572 

573 

574def _parse_sequence_example_raw(serialized, 

575 debug_name, 

576 context, 

577 feature_list, 

578 name=None): 

579 """Parses a vector of `SequenceExample` protos. 

580 

581 Args: 

582 serialized: A vector (1-D Tensor) of type string, containing binary 

583 serialized `SequenceExample` protos. 

584 debug_name: A vector (1-D Tensor) of strings (optional), the names of the 

585 serialized protos. 

586 context: A `ParseOpParams` containing the parameters for the parse 

587 op for the context features. 

588 feature_list: A `ParseOpParams` containing the parameters for the 

589 parse op for the feature_list features. 

590 name: A name for this operation (optional). 

591 

592 Returns: 

593 A tuple of three `dict`s, each mapping keys to `Tensor`s, `SparseTensor`s, 

594 and `RaggedTensor`s. The first dict contains the context key/values, the 

595 second dict contains the feature_list key/values, and the final dict 

596 contains the lengths of any dense feature_list features. 

597 

598 Raises: 

599 TypeError: if feature_list.dense_defaults is not either None or a dict. 

600 """ 

601 if context.num_features + feature_list.num_features == 0: 

602 raise ValueError("Must provide at least one feature key.") 

603 with ops.name_scope(name, "ParseSequenceExample", [serialized]): 

604 debug_name = [] if debug_name is None else debug_name 

605 

606 # Internal 

607 feature_list_dense_missing_assumed_empty = [] 

608 for k, v in feature_list.dense_defaults.items(): 

609 if v is not None: 

610 raise ValueError("Value feature_list.dense_defaults[%s] must be None" % 

611 k) 

612 feature_list_dense_missing_assumed_empty.append(k) 

613 

614 has_ragged = context.ragged_keys or feature_list.ragged_keys 

615 serialized = ops.convert_to_tensor(serialized, name="serialized") 

616 if has_ragged and serialized.shape.ndims is None: 

617 raise ValueError("serialized must have statically-known rank to " 

618 "parse ragged features.") 

619 feature_list_dense_missing_assumed_empty_vector = [ 

620 key in feature_list_dense_missing_assumed_empty 

621 for key in feature_list.dense_keys 

622 ] 

623 outputs = gen_parsing_ops.parse_sequence_example_v2( 

624 # Inputs 

625 serialized=serialized, 

626 debug_name=debug_name, 

627 context_sparse_keys=context.sparse_keys, 

628 context_dense_keys=context.dense_keys, 

629 context_ragged_keys=context.ragged_keys, 

630 feature_list_sparse_keys=feature_list.sparse_keys, 

631 feature_list_dense_keys=feature_list.dense_keys, 

632 feature_list_ragged_keys=feature_list.ragged_keys, 

633 feature_list_dense_missing_assumed_empty=( 

634 feature_list_dense_missing_assumed_empty_vector), 

635 context_dense_defaults=context.dense_defaults_vec, 

636 # Attrs 

637 Ncontext_sparse=len(context.sparse_keys), 

638 Nfeature_list_sparse=len(feature_list.sparse_keys), 

639 Nfeature_list_dense=len(feature_list.dense_keys), 

640 context_sparse_types=context.sparse_types, 

641 context_ragged_value_types=context.ragged_value_types, 

642 context_ragged_split_types=context.ragged_split_types, 

643 feature_list_dense_types=feature_list.dense_types, 

644 feature_list_sparse_types=feature_list.sparse_types, 

645 feature_list_ragged_value_types=feature_list.ragged_value_types, 

646 feature_list_ragged_split_types=feature_list.ragged_split_types, 

647 context_dense_shapes=context.dense_shapes_as_proto, 

648 feature_list_dense_shapes=feature_list.dense_shapes, 

649 name=name) 

650 (context_sparse_indices, context_sparse_values, context_sparse_shapes, 

651 context_dense_values, context_ragged_values, context_ragged_row_splits, 

652 feature_list_sparse_indices, feature_list_sparse_values, 

653 feature_list_sparse_shapes, feature_list_dense_values, 

654 feature_list_dense_lengths, feature_list_ragged_values, 

655 feature_list_ragged_outer_splits, 

656 feature_list_ragged_inner_splits) = outputs 

657 # pylint: disable=protected-access 

658 context_ragged_tensors = parsing_config._build_ragged_tensors( 

659 serialized.shape, context_ragged_values, context_ragged_row_splits) 

660 feature_list_ragged_tensors = parsing_config._build_ragged_tensors( 

661 serialized.shape, feature_list_ragged_values, 

662 feature_list_ragged_outer_splits, feature_list_ragged_inner_splits) 

663 

664 # pylint: disable=g-complex-comprehension 

665 context_sparse_tensors = [ 

666 sparse_tensor.SparseTensor(ix, val, shape) 

667 for (ix, val, 

668 shape) in zip(context_sparse_indices, context_sparse_values, 

669 context_sparse_shapes) 

670 ] 

671 

672 feature_list_sparse_tensors = [ 

673 sparse_tensor.SparseTensor(ix, val, shape) 

674 for (ix, val, shape 

675 ) in zip(feature_list_sparse_indices, feature_list_sparse_values, 

676 feature_list_sparse_shapes) 

677 ] 

678 # pylint: enable=g-complex-comprehension 

679 

680 context_output = dict( 

681 zip( 

682 context.sparse_keys + context.dense_keys + context.ragged_keys, 

683 context_sparse_tensors + context_dense_values + 

684 context_ragged_tensors)) 

685 feature_list_output = dict( 

686 zip( 

687 feature_list.sparse_keys + feature_list.dense_keys + 

688 feature_list.ragged_keys, feature_list_sparse_tensors + 

689 feature_list_dense_values + feature_list_ragged_tensors)) 

690 feature_list_lengths = dict( 

691 zip(feature_list.dense_keys, feature_list_dense_lengths)) 

692 

693 return (context_output, feature_list_output, feature_list_lengths) 

694 

695 

696@tf_export("io.parse_single_sequence_example", 

697 v1=["io.parse_single_sequence_example", 

698 "parse_single_sequence_example"]) 

699@dispatch.add_dispatch_support 

700def parse_single_sequence_example( 

701 serialized, context_features=None, sequence_features=None, 

702 example_name=None, name=None): 

703 # pylint: disable=line-too-long 

704 """Parses a single `SequenceExample` proto. 

705 

706 Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 

707 proto given in `serialized`. 

708 

709 This op parses a serialized sequence example into a tuple of dictionaries, 

710 each mapping keys to `Tensor` and `SparseTensor` objects. 

711 The first dictionary contains mappings for keys appearing in 

712 `context_features`, and the second dictionary contains mappings for keys 

713 appearing in `sequence_features`. 

714 

715 At least one of `context_features` and `sequence_features` must be provided 

716 and non-empty. 

717 

718 The `context_features` keys are associated with a `SequenceExample` as a 

719 whole, independent of time / frame. In contrast, the `sequence_features` keys 

720 provide a way to access variable-length data within the `FeatureList` section 

721 of the `SequenceExample` proto. While the shapes of `context_features` values 

722 are fixed with respect to frame, the frame dimension (the first dimension) 

723 of `sequence_features` values may vary between `SequenceExample` protos, 

724 and even between `feature_list` keys within the same `SequenceExample`. 

725 

726 `context_features` contains `VarLenFeature`, `RaggedFeature`, and 

727 `FixedLenFeature` objects. Each `VarLenFeature` is mapped to a `SparseTensor`; 

728 each `RaggedFeature` is mapped to a `RaggedTensor`; and each `FixedLenFeature` 

729 is mapped to a `Tensor`, of the specified type, shape, and default value. 

730 

731 `sequence_features` contains `VarLenFeature`, `RaggedFeature`, and 

732 `FixedLenSequenceFeature` objects. Each `VarLenFeature` is mapped to a 

733 `SparseTensor`; each `RaggedFeature` is mapped to a `RaggedTensor`; and each 

734 `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type. 

735 The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`, 

736 where `T` is the length of the associated `FeatureList` in the 

737 `SequenceExample`. For instance, `FixedLenSequenceFeature([])` yields a scalar 

738 1-D `Tensor` of static shape `[None]` and dynamic shape `[T]`, while 

739 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor` 

740 of static shape `[None, k]` and dynamic shape `[T, k]`. 

741 

742 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 

743 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 

744 entry and `index` is the value's index in the list of values associated with 

745 that time. 

746 

747 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 

748 entries with `allow_missing=True` are optional; otherwise, we will fail if 

749 that `Feature` or `FeatureList` is missing from any example in `serialized`. 

750 

751 `example_name` may contain a descriptive name for the corresponding serialized 

752 proto. This may be useful for debugging purposes, but it has no effect on the 

753 output. If not `None`, `example_name` must be a scalar. 

754 

755 Note that the batch version of this function, `tf.parse_sequence_example`, 

756 is written for better memory efficiency and will be faster on large 

757 `SequenceExample`s. 

758 

759 Args: 

760 serialized: A scalar (0-D Tensor) of type string, a single binary 

761 serialized `SequenceExample` proto. 

762 context_features: A mapping of feature keys to `FixedLenFeature` or 

763 `VarLenFeature` or `RaggedFeature` values. These features are associated 

764 with a `SequenceExample` as a whole. 

765 sequence_features: A mapping of feature keys to 

766 `FixedLenSequenceFeature` or `VarLenFeature` or `RaggedFeature` values. 

767 These features are associated with data within the `FeatureList` section 

768 of the `SequenceExample` proto. 

769 example_name: A scalar (0-D Tensor) of strings (optional), the name of 

770 the serialized proto. 

771 name: A name for this operation (optional). 

772 

773 Returns: 

774 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s 

775 and `RaggedTensor`s. 

776 

777 * The first dict contains the context key/values. 

778 * The second dict contains the feature_list key/values. 

779 

780 Raises: 

781 ValueError: if any feature is invalid. 

782 """ 

783 # pylint: enable=line-too-long 

784 if not (context_features or sequence_features): 

785 raise ValueError("Both context_features and sequence_features are None, but" 

786 " at least one should have values.") 

787 context_params = _ParseOpParams.from_features( 

788 context_features, [VarLenFeature, FixedLenFeature, RaggedFeature]) 

789 feature_list_params = _ParseOpParams.from_features( 

790 sequence_features, 

791 [VarLenFeature, FixedLenSequenceFeature, RaggedFeature]) 

792 

793 with ops.name_scope(name, "ParseSingleSequenceExample", 

794 [serialized, example_name]): 

795 context_output, feature_list_output = ( 

796 _parse_single_sequence_example_raw(serialized, context_params, 

797 feature_list_params, example_name, 

798 name)) 

799 

800 if context_params.ragged_keys: 

801 context_output = _construct_tensors_for_composite_features( 

802 context_features, context_output) 

803 if feature_list_params.ragged_keys: 

804 feature_list_output = _construct_tensors_for_composite_features( 

805 sequence_features, feature_list_output) 

806 

807 return context_output, feature_list_output 

808 

809 

810def _parse_single_sequence_example_raw(serialized, 

811 context, 

812 feature_list, 

813 debug_name, 

814 name=None): 

815 """Parses a single `SequenceExample` proto. 

816 

817 Args: 

818 serialized: A scalar (0-D Tensor) of type string, a single binary serialized 

819 `SequenceExample` proto. 

820 context: A `ParseOpParams` containing the parameters for the parse op for 

821 the context features. 

822 feature_list: A `ParseOpParams` containing the parameters for the parse op 

823 for the feature_list features. 

824 debug_name: A scalar (0-D Tensor) of strings (optional), the name of the 

825 serialized proto. 

826 name: A name for this operation (optional). 

827 

828 Returns: 

829 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. 

830 The first dict contains the context key/values. 

831 The second dict contains the feature_list key/values. 

832 

833 Raises: 

834 TypeError: if feature_list.dense_defaults is not either None or a dict. 

835 """ 

836 with ops.name_scope(name, "ParseSingleExample", [serialized, debug_name]): 

837 serialized = ops.convert_to_tensor(serialized, name="serialized") 

838 serialized = _assert_scalar(serialized, "serialized") 

839 return _parse_sequence_example_raw(serialized, debug_name, context, 

840 feature_list, name)[:2] 

841 

842 

843@tf_export("io.decode_raw", v1=[]) 

844@dispatch.add_dispatch_support 

845def decode_raw(input_bytes, 

846 out_type, 

847 little_endian=True, 

848 fixed_length=None, 

849 name=None): 

850 r"""Convert raw bytes from input tensor into numeric tensors. 

851 

852 Every component of the input tensor is interpreted as a sequence of bytes. 

853 These bytes are then decoded as numbers in the format specified by `out_type`. 

854 

855 >>> tf.io.decode_raw(tf.constant("1"), tf.uint8) 

856 <tf.Tensor: shape=(1,), dtype=uint8, numpy=array([49], dtype=uint8)> 

857 >>> tf.io.decode_raw(tf.constant("1,2"), tf.uint8) 

858 <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 44, 50], dtype=uint8)> 

859 

860 Note that the rank of the output tensor is always one more than the input one: 

861 

862 >>> tf.io.decode_raw(tf.constant(["1","2"]), tf.uint8).shape 

863 TensorShape([2, 1]) 

864 >>> tf.io.decode_raw(tf.constant([["1"],["2"]]), tf.uint8).shape 

865 TensorShape([2, 1, 1]) 

866 

867 This is because each byte in the input is converted to a new value on the 

868 output (if output type is `uint8` or `int8`, otherwise chunks of inputs get 

869 coverted to a new value): 

870 

871 >>> tf.io.decode_raw(tf.constant("123"), tf.uint8) 

872 <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 50, 51], dtype=uint8)> 

873 >>> tf.io.decode_raw(tf.constant("1234"), tf.uint8) 

874 <tf.Tensor: shape=(4,), dtype=uint8, numpy=array([49, 50, 51, 52], ... 

875 >>> # chuncked output 

876 >>> tf.io.decode_raw(tf.constant("12"), tf.uint16) 

877 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([12849], dtype=uint16)> 

878 >>> tf.io.decode_raw(tf.constant("1234"), tf.uint16) 

879 <tf.Tensor: shape=(2,), dtype=uint16, numpy=array([12849, 13363], ... 

880 >>> # int64 output 

881 >>> tf.io.decode_raw(tf.constant("12345678"), tf.int64) 

882 <tf.Tensor: ... numpy=array([4050765991979987505])> 

883 >>> tf.io.decode_raw(tf.constant("1234567887654321"), tf.int64) 

884 <tf.Tensor: ... numpy=array([4050765991979987505, 3544952156018063160])> 

885 

886 The operation allows specifying endianness via the `little_endian` parameter. 

887 

888 >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16) 

889 <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2826], dtype=int16)> 

890 >>> hex(2826) 

891 '0xb0a' 

892 >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16, little_endian=False) 

893 <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2571], dtype=int16)> 

894 >>> hex(2571) 

895 '0xa0b' 

896 

897 If the elements of `input_bytes` are of different length, you must specify 

898 `fixed_length`: 

899 

900 >>> tf.io.decode_raw(tf.constant([["1"],["23"]]), tf.uint8, fixed_length=4) 

901 <tf.Tensor: shape=(2, 1, 4), dtype=uint8, numpy= 

902 array([[[49, 0, 0, 0]], 

903 [[50, 51, 0, 0]]], dtype=uint8)> 

904 

905 If the `fixed_length` value is larger that the length of the `out_type` dtype, 

906 multiple values are generated: 

907 

908 >>> tf.io.decode_raw(tf.constant(["1212"]), tf.uint16, fixed_length=4) 

909 <tf.Tensor: shape=(1, 2), dtype=uint16, numpy=array([[12849, 12849]], ... 

910 

911 If the input value is larger than `fixed_length`, it is truncated: 

912 

913 >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) 

914 >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2) 

915 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([513], dtype=uint16)> 

916 >>> hex(513) 

917 '0x201' 

918 

919 If `little_endian` and `fixed_length` are specified, truncation to the fixed 

920 length occurs before endianness conversion: 

921 

922 >>> x=''.join([chr(1), chr(2), chr(3), chr(4)]) 

923 >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2, little_endian=False) 

924 <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([258], dtype=uint16)> 

925 >>> hex(258) 

926 '0x102' 

927 

928 If input values all have the same length, then specifying `fixed_length` 

929 equal to the size of the strings should not change output: 

930 

931 >>> x = ["12345678", "87654321"] 

932 >>> tf.io.decode_raw(x, tf.int16) 

933 <tf.Tensor: shape=(2, 4), dtype=int16, numpy= 

934 array([[12849, 13363, 13877, 14391], 

935 [14136, 13622, 13108, 12594]], dtype=int16)> 

936 >>> tf.io.decode_raw(x, tf.int16, fixed_length=len(x[0])) 

937 <tf.Tensor: shape=(2, 4), dtype=int16, numpy= 

938 array([[12849, 13363, 13877, 14391], 

939 [14136, 13622, 13108, 12594]], dtype=int16)> 

940 

941 Args: 

942 input_bytes: 

943 Each element of the input Tensor is converted to an array of bytes. 

944 

945 Currently, this must be a tensor of strings (bytes), although semantically 

946 the operation should support any input. 

947 out_type: 

948 `DType` of the output. Acceptable types are `half`, `float`, `double`, 

949 `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. 

950 little_endian: 

951 Whether the `input_bytes` data is in little-endian format. Data will be 

952 converted into host byte order if necessary. 

953 fixed_length: 

954 If set, the first `fixed_length` bytes of each element will be converted. 

955 Data will be zero-padded or truncated to the specified length. 

956 

957 `fixed_length` must be a multiple of the size of `out_type`. 

958 

959 `fixed_length` must be specified if the elements of `input_bytes` are of 

960 variable length. 

961 name: A name for the operation (optional). 

962 

963 Returns: 

964 A `Tensor` object storing the decoded bytes. 

965 """ 

966 if fixed_length is not None: 

967 return gen_parsing_ops.decode_padded_raw( 

968 input_bytes, 

969 fixed_length=fixed_length, 

970 out_type=out_type, 

971 little_endian=little_endian, 

972 name=name) 

973 else: 

974 return gen_parsing_ops.decode_raw( 

975 input_bytes, out_type, little_endian=little_endian, name=name) 

976 

977 

978@tf_export(v1=["decode_raw", "io.decode_raw"]) 

979@dispatch.add_dispatch_support 

980@deprecation.deprecated_args(None, 

981 "bytes is deprecated, use input_bytes instead", 

982 "bytes") 

983def decode_raw_v1( 

984 input_bytes=None, 

985 out_type=None, 

986 little_endian=True, 

987 name=None, 

988 bytes=None # pylint: disable=redefined-builtin 

989): 

990 """Convert raw byte strings into tensors. 

991 

992 Args: 

993 input_bytes: 

994 Each element of the input Tensor is converted to an array of bytes. 

995 out_type: 

996 `DType` of the output. Acceptable types are `half`, `float`, `double`, 

997 `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. 

998 little_endian: 

999 Whether the `input_bytes` data is in little-endian format. Data will be 

1000 converted into host byte order if necessary. 

1001 name: A name for the operation (optional). 

1002 bytes: Deprecated parameter. Use `input_bytes` instead. 

1003 

1004 Returns: 

1005 A `Tensor` object storing the decoded bytes. 

1006 """ 

1007 input_bytes = deprecation.deprecated_argument_lookup("input_bytes", 

1008 input_bytes, "bytes", 

1009 bytes) 

1010 

1011 # out_type is a required positional argument in the original API, and had to 

1012 # be changed to a keyword argument in order to facilitate the transition from 

1013 # the reserved named `bytes` to `input_bytes`. Ensure it's still set. 

1014 if out_type is None: 

1015 raise ValueError( 

1016 "decode_raw_v1() missing 1 positional argument: 'out_type'") 

1017 

1018 return gen_parsing_ops.decode_raw( 

1019 input_bytes, out_type, little_endian=little_endian, name=name) 

1020 

1021 

1022# Swap `name` and `na_value` for backward compatibility. 

1023@tf_export(v1=["io.decode_csv", "decode_csv"]) 

1024@dispatch.add_dispatch_support 

1025@deprecation.deprecated_endpoints("decode_csv") 

1026def decode_csv(records, 

1027 record_defaults, 

1028 field_delim=",", 

1029 use_quote_delim=True, 

1030 name=None, 

1031 na_value="", 

1032 select_cols=None): 

1033 """Convert CSV records to tensors. Each column maps to one tensor. 

1034 

1035 RFC 4180 format is expected for the CSV records. 

1036 (https://tools.ietf.org/html/rfc4180) 

1037 Note that we allow leading and trailing spaces with int or float field. 

1038 

1039 Args: 

1040 records: A `Tensor` of type `string`. 

1041 Each string is a record/row in the csv and all records should have 

1042 the same format. 

1043 record_defaults: A list of `Tensor` objects with specific types. 

1044 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 

1045 One tensor per column of the input record, with either a 

1046 scalar default value for that column or an empty vector if the column is 

1047 required. 

1048 field_delim: An optional `string`. Defaults to `","`. 

1049 char delimiter to separate fields in a record. 

1050 use_quote_delim: An optional `bool`. Defaults to `True`. 

1051 If false, treats double quotation marks as regular 

1052 characters inside of the string fields (ignoring RFC 4180, Section 2, 

1053 Bullet 5). 

1054 name: A name for the operation (optional). 

1055 na_value: Additional string to recognize as NA/NaN. 

1056 select_cols: Optional sorted list of column indices to select. If specified, 

1057 only this subset of columns will be parsed and returned. 

1058 

1059 Returns: 

1060 A list of `Tensor` objects. Has the same type as `record_defaults`. 

1061 Each tensor will have the same shape as records. 

1062 

1063 Raises: 

1064 ValueError: If any of the arguments is malformed. 

1065 """ 

1066 return decode_csv_v2( 

1067 records, record_defaults, 

1068 field_delim, use_quote_delim, 

1069 na_value, select_cols, name 

1070 ) 

1071 

1072 

1073@tf_export("io.decode_csv", v1=[]) 

1074@dispatch.add_dispatch_support 

1075def decode_csv_v2(records, 

1076 record_defaults, 

1077 field_delim=",", 

1078 use_quote_delim=True, 

1079 na_value="", 

1080 select_cols=None, 

1081 name=None): 

1082 """Convert CSV records to tensors. Each column maps to one tensor. 

1083 

1084 RFC 4180 format is expected for the CSV records. 

1085 (https://tools.ietf.org/html/rfc4180) 

1086 Note that we allow leading and trailing spaces with int or float field. 

1087 

1088 Args: 

1089 records: A `Tensor` of type `string`. 

1090 Each string is a record/row in the csv and all records should have 

1091 the same format. 

1092 record_defaults: A list of `Tensor` objects with specific types. 

1093 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 

1094 One tensor per column of the input record, with either a 

1095 scalar default value for that column or an empty vector if the column is 

1096 required. 

1097 field_delim: An optional `string`. Defaults to `","`. 

1098 char delimiter to separate fields in a record. 

1099 use_quote_delim: An optional `bool`. Defaults to `True`. 

1100 If false, treats double quotation marks as regular 

1101 characters inside of the string fields (ignoring RFC 4180, Section 2, 

1102 Bullet 5). 

1103 na_value: Additional string to recognize as NA/NaN. 

1104 select_cols: Optional sorted list of column indices to select. If specified, 

1105 only this subset of columns will be parsed and returned. 

1106 name: A name for the operation (optional). 

1107 

1108 Returns: 

1109 A list of `Tensor` objects. Has the same type as `record_defaults`. 

1110 Each tensor will have the same shape as records. 

1111 

1112 Raises: 

1113 ValueError: If any of the arguments is malformed. 

1114 """ 

1115 if select_cols is not None and any(select_cols[i] >= select_cols[i + 1] 

1116 for i in range(len(select_cols) - 1)): 

1117 raise ValueError("select_cols is not strictly increasing.") 

1118 if select_cols is not None and select_cols[0] < 0: 

1119 raise ValueError("select_cols contains negative values.") 

1120 if select_cols is not None and len(select_cols) != len(record_defaults): 

1121 raise ValueError("Length of select_cols and record_defaults do not match.") 

1122 return gen_parsing_ops.decode_csv( 

1123 records=records, 

1124 record_defaults=record_defaults, 

1125 field_delim=field_delim, 

1126 use_quote_delim=use_quote_delim, 

1127 na_value=na_value, 

1128 name=name, 

1129 select_cols=select_cols, 

1130 ) 

1131 

1132 

1133def _assert_scalar(value, name): 

1134 """Asserts that `value` is scalar, and returns `value`.""" 

1135 value_rank = value.shape.rank 

1136 if value_rank is None: 

1137 check = control_flow_assert.Assert( 

1138 math_ops.equal(array_ops.rank(value), 0), 

1139 ["Input %s must be a scalar" % name], 

1140 name="%sIsScalar" % name.capitalize()) 

1141 result = control_flow_ops.with_dependencies([check], 

1142 value, 

1143 name="%sDependencies" % name) 

1144 result.set_shape([]) 

1145 return result 

1146 elif value_rank == 0: 

1147 return value 

1148 else: 

1149 raise ValueError("Input %s must be a scalar" % name) 

1150 

1151 

1152@tf_export("io.decode_json_example", 

1153 v1=["decode_json_example", "io.decode_json_example"]) 

1154def decode_json_example(json_examples, name=None): 

1155 r"""Convert JSON-encoded Example records to binary protocol buffer strings. 

1156 

1157 Note: This is **not** a general purpose JSON parsing op. 

1158 

1159 This op converts JSON-serialized `tf.train.Example` (maybe created with 

1160 `json_format.MessageToJson`, following the 

1161 [standard JSON mapping]( 

1162 https://developers.google.com/protocol-buffers/docs/proto3#json)) 

1163 to a binary-serialized `tf.train.Example` (equivalent to 

1164 `Example.SerializeToString()`) suitable for conversion to tensors with 

1165 `tf.io.parse_example`. 

1166 

1167 Here is a `tf.train.Example` proto: 

1168 

1169 >>> example = tf.train.Example( 

1170 ... features=tf.train.Features( 

1171 ... feature={ 

1172 ... "a": tf.train.Feature( 

1173 ... int64_list=tf.train.Int64List( 

1174 ... value=[1, 1, 3]))})) 

1175 

1176 Here it is converted to JSON: 

1177 

1178 >>> from google.protobuf import json_format 

1179 >>> example_json = json_format.MessageToJson(example) 

1180 >>> print(example_json) 

1181 { 

1182 "features": { 

1183 "feature": { 

1184 "a": { 

1185 "int64List": { 

1186 "value": [ 

1187 "1", 

1188 "1", 

1189 "3" 

1190 ] 

1191 } 

1192 } 

1193 } 

1194 } 

1195 } 

1196 

1197 This op converts the above json string to a binary proto: 

1198 

1199 >>> example_binary = tf.io.decode_json_example(example_json) 

1200 >>> example_binary.numpy() 

1201 b'\n\x0f\n\r\n\x01a\x12\x08\x1a\x06\x08\x01\x08\x01\x08\x03' 

1202 

1203 The OP works on string tensors of andy shape: 

1204 

1205 >>> tf.io.decode_json_example([ 

1206 ... [example_json, example_json], 

1207 ... [example_json, example_json]]).shape.as_list() 

1208 [2, 2] 

1209 

1210 This resulting binary-string is equivalent to `Example.SerializeToString()`, 

1211 and can be converted to Tensors using `tf.io.parse_example` and related 

1212 functions: 

1213 

1214 >>> tf.io.parse_example( 

1215 ... serialized=[example_binary.numpy(), 

1216 ... example.SerializeToString()], 

1217 ... features = {'a': tf.io.FixedLenFeature(shape=[3], dtype=tf.int64)}) 

1218 {'a': <tf.Tensor: shape=(2, 3), dtype=int64, numpy= 

1219 array([[1, 1, 3], 

1220 [1, 1, 3]])>} 

1221 

1222 Args: 

1223 json_examples: A string tensor containing json-serialized `tf.Example` 

1224 protos. 

1225 name: A name for the op. 

1226 

1227 Returns: 

1228 A string Tensor containing the binary-serialized `tf.Example` protos. 

1229 

1230 Raises: 

1231 `tf.errors.InvalidArgumentError`: If the JSON could not be converted to a 

1232 `tf.Example` 

1233 """ 

1234 return gen_parsing_ops.decode_json_example(json_examples, name=name) 

1235 

1236 

1237# Register elementwise ops that don't have Python wrappers. 

1238dispatch.register_unary_elementwise_api(gen_parsing_ops.decode_compressed)