Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/io_ops.py: 55%

124 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15 

16# pylint: disable=line-too-long 

17"""Inputs and Readers. 

18 

19See the [Inputs and 

20Readers](https://tensorflow.org/api_guides/python/io_ops) guide. 

21""" 

22 

23from tensorflow.python.eager import context 

24from tensorflow.python.framework import dtypes 

25from tensorflow.python.framework import ops 

26from tensorflow.python.lib.io import python_io 

27from tensorflow.python.ops import gen_data_flow_ops 

28from tensorflow.python.ops import gen_io_ops 

29from tensorflow.python.ops import gen_parsing_ops 

30# go/tf-wildcard-import 

31# pylint: disable=wildcard-import 

32from tensorflow.python.ops.gen_io_ops import * 

33# pylint: enable=wildcard-import 

34from tensorflow.python.util import deprecation 

35from tensorflow.python.util.tf_export import tf_export 

36 

37 

38# pylint: disable=protected-access 

39def _save(filename, tensor_names, tensors, tensor_slices=None, name="save"): 

40 """Save a list of tensors to a file with given names. 

41 

42 Example usage without slice info: 

43 Save("/foo/bar", ["w", "b"], [w, b]) 

44 

45 Example usage with slices: 

46 Save("/foo/bar", ["w", "w"], [slice0, slice1], 

47 tensor_slices=["4 10 0,2:-", "4 10 2,2:-"]) 

48 

49 Args: 

50 filename: the file name of the sstable. 

51 tensor_names: a list of strings. 

52 tensors: the list of tensors to be saved. 

53 tensor_slices: Optional list of strings to specify the shape and slices of 

54 a larger virtual tensor that each tensor is a part of. If not specified 

55 each tensor is saved as a full slice. 

56 name: string. Optional name for the op. 

57 

58 Requires: 

59 The length of tensors should match the size of tensor_names and of 

60 tensor_slices. 

61 

62 Returns: 

63 An Operation that saves the tensors. 

64 """ 

65 if tensor_slices is None: 

66 return gen_io_ops.save(filename, tensor_names, tensors, name=name) 

67 else: 

68 return gen_io_ops.save_slices(filename, tensor_names, tensor_slices, 

69 tensors, name=name) 

70 

71 

72def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type, 

73 name="restore_slice", preferred_shard=-1): 

74 """Restore a tensor slice from a set of files with a given pattern. 

75 

76 Example usage: 

77 RestoreSlice("/foo/bar-?????-of-?????", "w", "10 10 0,2:-", DT_FLOAT) 

78 

79 Args: 

80 file_pattern: the file pattern used to match a set of checkpoint files. 

81 tensor_name: the name of the tensor to restore. 

82 shape_and_slice: the shape-and-slice spec of the slice. 

83 tensor_type: the type of the tensor to restore. 

84 name: string. Optional name for the op. 

85 preferred_shard: Int. Optional shard to open first in the checkpoint file. 

86 

87 Returns: 

88 A tensor of type "tensor_type". 

89 """ 

90 base_type = dtypes.as_dtype(tensor_type).base_dtype 

91 return gen_io_ops.restore_slice( 

92 file_pattern, tensor_name, shape_and_slice, base_type, 

93 preferred_shard, name=name) 

94 

95 

96@tf_export("io.read_file", v1=["io.read_file", "read_file"]) 

97def read_file(filename, name=None): 

98 """Reads the contents of file. 

99 

100 This operation returns a tensor with the entire contents of the input 

101 filename. It does not do any parsing, it just returns the contents as 

102 they are. Usually, this is the first step in the input pipeline. 

103 

104 Example: 

105 

106 >>> with open("/tmp/file.txt", "w") as f: 

107 ... f.write("asdf") 

108 ... 

109 4 

110 >>> tf.io.read_file("/tmp/file.txt") 

111 <tf.Tensor: shape=(), dtype=string, numpy=b'asdf'> 

112 

113 Example of using the op in a function to read an image, decode it and reshape 

114 the tensor containing the pixel data: 

115 

116 >>> @tf.function 

117 ... def load_image(filename): 

118 ... raw = tf.io.read_file(filename) 

119 ... image = tf.image.decode_png(raw, channels=3) 

120 ... # the `print` executes during tracing. 

121 ... print("Initial shape: ", image.shape) 

122 ... image.set_shape([28, 28, 3]) 

123 ... print("Final shape: ", image.shape) 

124 ... return image 

125 

126 Args: 

127 filename: string. filename to read from. 

128 name: string. Optional name for the op. 

129 

130 Returns: 

131 A tensor of dtype "string", with the file contents. 

132 """ 

133 return gen_io_ops.read_file(filename, name) 

134 

135 

136@tf_export( 

137 "io.serialize_tensor", v1=["io.serialize_tensor", "serialize_tensor"]) 

138def serialize_tensor(tensor, name=None): 

139 r"""Transforms a Tensor into a serialized TensorProto proto. 

140 

141 This operation transforms data in a `tf.Tensor` into a `tf.Tensor` of type 

142 `tf.string` containing the data in a binary string in little-endian format. 

143 This operation can transform scalar data and linear arrays, but it is most 

144 useful in converting multidimensional arrays into a format accepted by binary 

145 storage formats such as a `TFRecord` or `tf.train.Example`. 

146 

147 See also: 

148 - `tf.io.parse_tensor`: inverse operation of `tf.io.serialize_tensor` that 

149 transforms a scalar string containing a serialized Tensor in little-endian 

150 format into a Tensor of a specified type. 

151 - `tf.ensure_shape`: `parse_tensor` cannot statically determine the shape of 

152 the parsed tensor. Use `tf.ensure_shape` to set the static shape when running 

153 under a `tf.function` 

154 - `.SerializeToString`, serializes a proto to a binary-string 

155 

156 Example of serializing scalar data: 

157 

158 >>> t = tf.constant(1) 

159 >>> tf.io.serialize_tensor(t) 

160 <tf.Tensor: shape=(), dtype=string, numpy=b'\x08...\x00'> 

161 

162 Example of storing non-scalar data into a `tf.train.Example`: 

163 

164 >>> t1 = [[1, 2]] 

165 >>> t2 = [[7, 8]] 

166 >>> nonscalar = tf.concat([t1, t2], 0) 

167 >>> nonscalar 

168 <tf.Tensor: shape=(2, 2), dtype=int32, numpy= 

169 array([[1, 2], 

170 [7, 8]], dtype=int32)> 

171 

172 Serialize the data using `tf.io.serialize_tensor`. 

173 

174 >>> serialized_nonscalar = tf.io.serialize_tensor(nonscalar) 

175 >>> serialized_nonscalar 

176 <tf.Tensor: shape=(), dtype=string, numpy=b'\x08...\x00'> 

177 

178 Store the data in a `tf.train.Feature`. 

179 

180 >>> feature_of_bytes = tf.train.Feature( 

181 ... bytes_list=tf.train.BytesList(value=[serialized_nonscalar.numpy()])) 

182 >>> feature_of_bytes 

183 bytes_list { 

184 value: "\010...\000" 

185 } 

186 

187 Put the `tf.train.Feature` message into a `tf.train.Example`. 

188 

189 >>> features_for_example = { 

190 ... 'feature0': feature_of_bytes 

191 ... } 

192 >>> example_proto = tf.train.Example( 

193 ... features=tf.train.Features(feature=features_for_example)) 

194 >>> example_proto 

195 features { 

196 feature { 

197 key: "feature0" 

198 value { 

199 bytes_list { 

200 value: "\010...\000" 

201 } 

202 } 

203 } 

204 } 

205 

206 Args: 

207 tensor: A `tf.Tensor`. 

208 name: string. Optional name for the op. 

209 

210 Returns: 

211 A Tensor of dtype string. 

212 """ 

213 return gen_parsing_ops.serialize_tensor(tensor, name) 

214 

215 

216@tf_export(v1=["ReaderBase"]) 

217class ReaderBase: 

218 """Base class for different Reader types, that produce a record every step. 

219 

220 Conceptually, Readers convert string 'work units' into records (key, 

221 value pairs). Typically the 'work units' are filenames and the 

222 records are extracted from the contents of those files. We want a 

223 single record produced per step, but a work unit can correspond to 

224 many records. 

225 

226 Therefore we introduce some decoupling using a queue. The queue 

227 contains the work units and the Reader dequeues from the queue when 

228 it is asked to produce a record (via Read()) but it has finished the 

229 last work unit. 

230 

231 @compatibility(eager) 

232 Readers are not compatible with eager execution. Instead, please 

233 use `tf.data` to get data into your model. 

234 @end_compatibility 

235 """ 

236 

237 def __init__(self, reader_ref, supports_serialize=False): 

238 """Creates a new ReaderBase. 

239 

240 Args: 

241 reader_ref: The operation that implements the reader. 

242 supports_serialize: True if the reader implementation can 

243 serialize its state. 

244 

245 Raises: 

246 RuntimeError: If eager execution is enabled. 

247 """ 

248 if context.executing_eagerly(): 

249 raise RuntimeError( 

250 "Readers are not supported when eager execution is enabled. " 

251 "Instead, please use tf.data to get data into your model.") 

252 

253 self._reader_ref = reader_ref 

254 self._supports_serialize = supports_serialize 

255 

256 @property 

257 def reader_ref(self): 

258 """Op that implements the reader.""" 

259 return self._reader_ref 

260 

261 def read(self, queue, name=None): 

262 """Returns the next record (key, value) pair produced by a reader. 

263 

264 Will dequeue a work unit from queue if necessary (e.g. when the 

265 Reader needs to start reading from a new file since it has 

266 finished with the previous file). 

267 

268 Args: 

269 queue: A Queue or a mutable string Tensor representing a handle 

270 to a Queue, with string work items. 

271 name: A name for the operation (optional). 

272 

273 Returns: 

274 A tuple of Tensors (key, value). 

275 key: A string scalar Tensor. 

276 value: A string scalar Tensor. 

277 """ 

278 if isinstance(queue, ops.Tensor): 

279 queue_ref = queue 

280 else: 

281 queue_ref = queue.queue_ref 

282 if self._reader_ref.dtype == dtypes.resource: 

283 return gen_io_ops.reader_read_v2(self._reader_ref, queue_ref, name=name) 

284 else: 

285 # For compatibility with pre-resource queues, create a ref(string) tensor 

286 # which can be looked up as the same queue by a resource manager. 

287 old_queue_op = gen_data_flow_ops.fake_queue(queue_ref) 

288 return gen_io_ops.reader_read(self._reader_ref, old_queue_op, name=name) 

289 

290 def read_up_to(self, queue, num_records, # pylint: disable=invalid-name 

291 name=None): 

292 """Returns up to num_records (key, value) pairs produced by a reader. 

293 

294 Will dequeue a work unit from queue if necessary (e.g., when the 

295 Reader needs to start reading from a new file since it has 

296 finished with the previous file). 

297 It may return less than num_records even before the last batch. 

298 

299 Args: 

300 queue: A Queue or a mutable string Tensor representing a handle 

301 to a Queue, with string work items. 

302 num_records: Number of records to read. 

303 name: A name for the operation (optional). 

304 

305 Returns: 

306 A tuple of Tensors (keys, values). 

307 keys: A 1-D string Tensor. 

308 values: A 1-D string Tensor. 

309 """ 

310 if isinstance(queue, ops.Tensor): 

311 queue_ref = queue 

312 else: 

313 queue_ref = queue.queue_ref 

314 if self._reader_ref.dtype == dtypes.resource: 

315 return gen_io_ops.reader_read_up_to_v2(self._reader_ref, 

316 queue_ref, 

317 num_records, 

318 name=name) 

319 else: 

320 # For compatibility with pre-resource queues, create a ref(string) tensor 

321 # which can be looked up as the same queue by a resource manager. 

322 old_queue_op = gen_data_flow_ops.fake_queue(queue_ref) 

323 return gen_io_ops.reader_read_up_to(self._reader_ref, 

324 old_queue_op, 

325 num_records, 

326 name=name) 

327 

328 def num_records_produced(self, name=None): 

329 """Returns the number of records this reader has produced. 

330 

331 This is the same as the number of Read executions that have 

332 succeeded. 

333 

334 Args: 

335 name: A name for the operation (optional). 

336 

337 Returns: 

338 An int64 Tensor. 

339 

340 """ 

341 if self._reader_ref.dtype == dtypes.resource: 

342 return gen_io_ops.reader_num_records_produced_v2(self._reader_ref, 

343 name=name) 

344 else: 

345 return gen_io_ops.reader_num_records_produced(self._reader_ref, 

346 name=name) 

347 

348 def num_work_units_completed(self, name=None): 

349 """Returns the number of work units this reader has finished processing. 

350 

351 Args: 

352 name: A name for the operation (optional). 

353 

354 Returns: 

355 An int64 Tensor. 

356 """ 

357 if self._reader_ref.dtype == dtypes.resource: 

358 return gen_io_ops.reader_num_work_units_completed_v2(self._reader_ref, 

359 name=name) 

360 else: 

361 return gen_io_ops.reader_num_work_units_completed(self._reader_ref, 

362 name=name) 

363 

364 def serialize_state(self, name=None): 

365 """Produce a string tensor that encodes the state of a reader. 

366 

367 Not all Readers support being serialized, so this can produce an 

368 Unimplemented error. 

369 

370 Args: 

371 name: A name for the operation (optional). 

372 

373 Returns: 

374 A string Tensor. 

375 """ 

376 if self._reader_ref.dtype == dtypes.resource: 

377 return gen_io_ops.reader_serialize_state_v2(self._reader_ref, name=name) 

378 else: 

379 return gen_io_ops.reader_serialize_state(self._reader_ref, name=name) 

380 

381 def restore_state(self, state, name=None): 

382 """Restore a reader to a previously saved state. 

383 

384 Not all Readers support being restored, so this can produce an 

385 Unimplemented error. 

386 

387 Args: 

388 state: A string Tensor. 

389 Result of a SerializeState of a Reader with matching type. 

390 name: A name for the operation (optional). 

391 

392 Returns: 

393 The created Operation. 

394 """ 

395 if self._reader_ref.dtype == dtypes.resource: 

396 return gen_io_ops.reader_restore_state_v2( 

397 self._reader_ref, state, name=name) 

398 else: 

399 return gen_io_ops.reader_restore_state(self._reader_ref, state, name=name) 

400 

401 @property 

402 def supports_serialize(self): 

403 """Whether the Reader implementation can serialize its state.""" 

404 return self._supports_serialize 

405 

406 def reset(self, name=None): 

407 """Restore a reader to its initial clean state. 

408 

409 Args: 

410 name: A name for the operation (optional). 

411 

412 Returns: 

413 The created Operation. 

414 """ 

415 if self._reader_ref.dtype == dtypes.resource: 

416 return gen_io_ops.reader_reset_v2(self._reader_ref, name=name) 

417 else: 

418 return gen_io_ops.reader_reset(self._reader_ref, name=name) 

419 

420 

421ops.NotDifferentiable("ReaderRead") 

422ops.NotDifferentiable("ReaderReadUpTo") 

423ops.NotDifferentiable("ReaderNumRecordsProduced") 

424ops.NotDifferentiable("ReaderNumWorkUnitsCompleted") 

425ops.NotDifferentiable("ReaderSerializeState") 

426ops.NotDifferentiable("ReaderRestoreState") 

427ops.NotDifferentiable("ReaderReset") 

428 

429 

430@tf_export(v1=["WholeFileReader"]) 

431class WholeFileReader(ReaderBase): 

432 """A Reader that outputs the entire contents of a file as a value. 

433 

434 To use, enqueue filenames in a Queue. The output of Read will 

435 be a filename (key) and the contents of that file (value). 

436 

437 See ReaderBase for supported methods. 

438 

439 @compatibility(eager) 

440 Readers are not compatible with eager execution. Instead, please 

441 use `tf.data` to get data into your model. 

442 @end_compatibility 

443 """ 

444 

445 @deprecation.deprecated( 

446 None, "Queue-based input pipelines have been replaced by `tf.data`. Use " 

447 "`tf.data.Dataset.map(tf.read_file)`.") 

448 def __init__(self, name=None): 

449 """Create a WholeFileReader. 

450 

451 Args: 

452 name: A name for the operation (optional). 

453 """ 

454 rr = gen_io_ops.whole_file_reader_v2(name=name) 

455 super(WholeFileReader, self).__init__(rr, supports_serialize=True) 

456 

457 

458ops.NotDifferentiable("WholeFileReader") 

459 

460 

461@tf_export(v1=["TextLineReader"]) 

462class TextLineReader(ReaderBase): 

463 """A Reader that outputs the lines of a file delimited by newlines. 

464 

465 Newlines are stripped from the output. 

466 See ReaderBase for supported methods. 

467 

468 @compatibility(eager) 

469 Readers are not compatible with eager execution. Instead, please 

470 use `tf.data` to get data into your model. 

471 @end_compatibility 

472 """ 

473 # TODO(josh11b): Support serializing and restoring state. 

474 

475 @deprecation.deprecated( 

476 None, "Queue-based input pipelines have been replaced by `tf.data`. Use " 

477 "`tf.data.TextLineDataset`.") 

478 def __init__(self, skip_header_lines=None, name=None): 

479 """Create a TextLineReader. 

480 

481 Args: 

482 skip_header_lines: An optional int. Defaults to 0. Number of lines 

483 to skip from the beginning of every file. 

484 name: A name for the operation (optional). 

485 """ 

486 rr = gen_io_ops.text_line_reader_v2(skip_header_lines=skip_header_lines, 

487 name=name) 

488 super(TextLineReader, self).__init__(rr) 

489 

490 

491ops.NotDifferentiable("TextLineReader") 

492 

493 

494@tf_export(v1=["FixedLengthRecordReader"]) 

495class FixedLengthRecordReader(ReaderBase): 

496 """A Reader that outputs fixed-length records from a file. 

497 

498 See ReaderBase for supported methods. 

499 

500 @compatibility(eager) 

501 Readers are not compatible with eager execution. Instead, please 

502 use `tf.data` to get data into your model. 

503 @end_compatibility 

504 """ 

505 # TODO(josh11b): Support serializing and restoring state. 

506 

507 @deprecation.deprecated( 

508 None, "Queue-based input pipelines have been replaced by `tf.data`. Use " 

509 "`tf.data.FixedLengthRecordDataset`.") 

510 def __init__(self, 

511 record_bytes, 

512 header_bytes=None, 

513 footer_bytes=None, 

514 hop_bytes=None, 

515 name=None, 

516 encoding=None): 

517 """Create a FixedLengthRecordReader. 

518 

519 Args: 

520 record_bytes: An int. 

521 header_bytes: An optional int. Defaults to 0. 

522 footer_bytes: An optional int. Defaults to 0. 

523 hop_bytes: An optional int. Defaults to 0. 

524 name: A name for the operation (optional). 

525 encoding: The type of encoding for the file. Defaults to none. 

526 """ 

527 rr = gen_io_ops.fixed_length_record_reader_v2( 

528 record_bytes=record_bytes, 

529 header_bytes=header_bytes, 

530 footer_bytes=footer_bytes, 

531 hop_bytes=hop_bytes, 

532 encoding=encoding, 

533 name=name) 

534 super(FixedLengthRecordReader, self).__init__(rr) 

535 

536 

537ops.NotDifferentiable("FixedLengthRecordReader") 

538 

539 

540@tf_export(v1=["TFRecordReader"]) 

541class TFRecordReader(ReaderBase): 

542 """A Reader that outputs the records from a TFRecords file. 

543 

544 See ReaderBase for supported methods. 

545 

546 @compatibility(eager) 

547 Readers are not compatible with eager execution. Instead, please 

548 use `tf.data` to get data into your model. 

549 @end_compatibility 

550 """ 

551 # TODO(josh11b): Support serializing and restoring state. 

552 

553 @deprecation.deprecated( 

554 None, "Queue-based input pipelines have been replaced by `tf.data`. Use " 

555 "`tf.data.TFRecordDataset`.") 

556 def __init__(self, name=None, options=None): 

557 """Create a TFRecordReader. 

558 

559 Args: 

560 name: A name for the operation (optional). 

561 options: A TFRecordOptions object (optional). 

562 """ 

563 compression_type = python_io.TFRecordOptions.get_compression_type_string( 

564 options) 

565 

566 rr = gen_io_ops.tf_record_reader_v2( 

567 name=name, compression_type=compression_type) 

568 super(TFRecordReader, self).__init__(rr) 

569 

570 

571ops.NotDifferentiable("TFRecordReader") 

572 

573 

574@tf_export(v1=["LMDBReader"]) 

575class LMDBReader(ReaderBase): 

576 """A Reader that outputs the records from a LMDB file. 

577 

578 See ReaderBase for supported methods. 

579 

580 @compatibility(eager) 

581 Readers are not compatible with eager execution. Instead, please 

582 use `tf.data` to get data into your model. 

583 @end_compatibility 

584 """ 

585 

586 @deprecation.deprecated( 

587 None, "Queue-based input pipelines have been replaced by `tf.data`. Use " 

588 "`tf.contrib.data.LMDBDataset`.") 

589 def __init__(self, name=None, options=None): 

590 """Create a LMDBReader. 

591 

592 Args: 

593 name: A name for the operation (optional). 

594 options: A LMDBRecordOptions object (optional). 

595 """ 

596 del options 

597 rr = gen_io_ops.lmdb_reader(name=name) 

598 super(LMDBReader, self).__init__(rr) 

599 

600 

601ops.NotDifferentiable("LMDBReader") 

602 

603 

604@tf_export(v1=["IdentityReader"]) 

605class IdentityReader(ReaderBase): 

606 """A Reader that outputs the queued work as both the key and value. 

607 

608 To use, enqueue strings in a Queue. Read will take the front 

609 work string and output (work, work). 

610 

611 See ReaderBase for supported methods. 

612 

613 @compatibility(eager) 

614 Readers are not compatible with eager execution. Instead, please 

615 use `tf.data` to get data into your model. 

616 @end_compatibility 

617 """ 

618 

619 @deprecation.deprecated( 

620 None, "Queue-based input pipelines have been replaced by `tf.data`. Use " 

621 "`tf.data.Dataset.map(...)`.") 

622 def __init__(self, name=None): 

623 """Create a IdentityReader. 

624 

625 Args: 

626 name: A name for the operation (optional). 

627 """ 

628 rr = gen_io_ops.identity_reader_v2(name=name) 

629 super(IdentityReader, self).__init__(rr, supports_serialize=True) 

630 

631 

632ops.NotDifferentiable("IdentityReader")