Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/lib/io/file_io.py: 36%

271 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""File IO methods that wrap the C++ FileSystem API.""" 

16import binascii 

17import os 

18from posixpath import join as urljoin 

19import uuid 

20 

21import six 

22 

23from tensorflow.python.framework import errors 

24from tensorflow.python.lib.io import _pywrap_file_io 

25from tensorflow.python.util import compat 

26from tensorflow.python.util import deprecation 

27from tensorflow.python.util.tf_export import tf_export 

28 

29# A good default block size depends on the system in question. 

30# A somewhat conservative default chosen here. 

31_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024 

32 

33 

34class FileIO(object): 

35 """FileIO class that exposes methods to read / write to / from files. 

36 

37 The constructor takes the following arguments: 

38 name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object) 

39 giving the pathname of the file to be opened. 

40 mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode. 

41 

42 Can be used as an iterator to iterate over lines in the file. 

43 

44 The default buffer size used for the BufferedInputStream used for reading 

45 the file line by line is 1024 * 512 bytes. 

46 """ 

47 

48 def __init__(self, name, mode, encoding="utf-8"): 

49 self.__name = name 

50 self.__mode = mode 

51 self.__encoding = encoding 

52 self._read_buf = None 

53 self._writable_file = None 

54 self._binary_mode = "b" in mode 

55 mode = mode.replace("b", "") 

56 if mode not in ("r", "w", "a", "r+", "w+", "a+"): 

57 raise errors.InvalidArgumentError( 

58 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'") 

59 self._read_check_passed = mode in ("r", "r+", "a+", "w+") 

60 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+") 

61 

62 @property 

63 def name(self): 

64 """Returns the file name.""" 

65 return self.__name 

66 

67 @property 

68 def mode(self): 

69 """Returns the mode in which the file was opened.""" 

70 return self.__mode 

71 

72 def _preread_check(self): 

73 if not self._read_buf: 

74 if not self._read_check_passed: 

75 raise errors.PermissionDeniedError(None, None, 

76 "File isn't open for reading") 

77 self._read_buf = _pywrap_file_io.BufferedInputStream( 

78 compat.path_to_str(self.__name), 1024 * 512) 

79 

80 def _prewrite_check(self): 

81 if not self._writable_file: 

82 if not self._write_check_passed: 

83 raise errors.PermissionDeniedError(None, None, 

84 "File isn't open for writing") 

85 self._writable_file = _pywrap_file_io.WritableFile( 

86 compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode)) 

87 

88 def _prepare_value(self, val): 

89 if self._binary_mode: 

90 return compat.as_bytes(val, encoding=self.__encoding) 

91 else: 

92 return compat.as_str_any(val, encoding=self.__encoding) 

93 

94 def size(self): 

95 """Returns the size of the file.""" 

96 return stat(self.__name).length 

97 

98 def write(self, file_content): 

99 """Writes file_content to the file. Appends to the end of the file.""" 

100 self._prewrite_check() 

101 self._writable_file.append( 

102 compat.as_bytes(file_content, encoding=self.__encoding)) 

103 

104 def read(self, n=-1): 

105 """Returns the contents of a file as a string. 

106 

107 Starts reading from current position in file. 

108 

109 Args: 

110 n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file. 

111 

112 Returns: 

113 `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the 

114 string if in string (regular) mode. 

115 """ 

116 self._preread_check() 

117 if n == -1: 

118 length = self.size() - self.tell() 

119 else: 

120 length = n 

121 return self._prepare_value(self._read_buf.read(length)) 

122 

123 @deprecation.deprecated_args( 

124 None, "position is deprecated in favor of the offset argument.", 

125 "position") 

126 def seek(self, offset=None, whence=0, position=None): 

127 # TODO(jhseu): Delete later. Used to omit `position` from docs. 

128 # pylint: disable=g-doc-args 

129 """Seeks to the offset in the file. 

130 

131 Args: 

132 offset: The byte count relative to the whence argument. 

133 whence: Valid values for whence are: 

134 0: start of the file (default) 

135 1: relative to the current position of the file 

136 2: relative to the end of file. `offset` is usually negative. 

137 """ 

138 # pylint: enable=g-doc-args 

139 self._preread_check() 

140 # We needed to make offset a keyword argument for backwards-compatibility. 

141 # This check exists so that we can convert back to having offset be a 

142 # positional argument. 

143 # TODO(jhseu): Make `offset` a positional argument after `position` is 

144 # deleted. 

145 if offset is None and position is None: 

146 raise TypeError("seek(): offset argument required") 

147 if offset is not None and position is not None: 

148 raise TypeError("seek(): offset and position may not be set " 

149 "simultaneously.") 

150 

151 if position is not None: 

152 offset = position 

153 

154 if whence == 0: 

155 pass 

156 elif whence == 1: 

157 offset += self.tell() 

158 elif whence == 2: 

159 offset += self.size() 

160 else: 

161 raise errors.InvalidArgumentError( 

162 None, None, 

163 "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format( 

164 whence)) 

165 self._read_buf.seek(offset) 

166 

167 def readline(self): 

168 r"""Reads the next line, keeping \n. At EOF, returns ''.""" 

169 self._preread_check() 

170 return self._prepare_value(self._read_buf.readline()) 

171 

172 def readlines(self): 

173 """Returns all lines from the file in a list.""" 

174 self._preread_check() 

175 lines = [] 

176 while True: 

177 s = self.readline() 

178 if not s: 

179 break 

180 lines.append(s) 

181 return lines 

182 

183 def tell(self): 

184 """Returns the current position in the file.""" 

185 if self._read_check_passed: 

186 self._preread_check() 

187 return self._read_buf.tell() 

188 else: 

189 self._prewrite_check() 

190 

191 return self._writable_file.tell() 

192 

193 def __enter__(self): 

194 """Make usable with "with" statement.""" 

195 return self 

196 

197 def __exit__(self, unused_type, unused_value, unused_traceback): 

198 """Make usable with "with" statement.""" 

199 self.close() 

200 

201 def __iter__(self): 

202 return self 

203 

204 def __next__(self): 

205 retval = self.readline() 

206 if not retval: 

207 raise StopIteration() 

208 return retval 

209 

210 def next(self): 

211 return self.__next__() 

212 

213 def flush(self): 

214 """Flushes the Writable file. 

215 

216 This only ensures that the data has made its way out of the process without 

217 any guarantees on whether it's written to disk. This means that the 

218 data would survive an application crash but not necessarily an OS crash. 

219 """ 

220 if self._writable_file: 

221 self._writable_file.flush() 

222 

223 def close(self): 

224 r"""Closes the file. 

225 

226 Should be called for the WritableFile to be flushed. 

227 

228 In general, if you use the context manager pattern, you don't need to call 

229 this directly. 

230 

231 >>> with tf.io.gfile.GFile("/tmp/x", "w") as f: 

232 ... f.write("asdf\n") 

233 ... f.write("qwer\n") 

234 >>> # implicit f.close() at the end of the block 

235 

236 For cloud filesystems, forgetting to call `close()` might result in data 

237 loss as last write might not have been replicated. 

238 """ 

239 self._read_buf = None 

240 if self._writable_file: 

241 self._writable_file.close() 

242 self._writable_file = None 

243 

244 def seekable(self): 

245 """Returns True as FileIO supports random access ops of seek()/tell()""" 

246 return True 

247 

248 

249@tf_export("io.gfile.exists") 

250def file_exists_v2(path): 

251 """Determines whether a path exists or not. 

252 

253 >>> with open("/tmp/x", "w") as f: 

254 ... f.write("asdf") 

255 ... 

256 4 

257 >>> tf.io.gfile.exists("/tmp/x") 

258 True 

259 

260 You can also specify the URI scheme for selecting a different filesystem: 

261 

262 >>> # for a GCS filesystem path: 

263 >>> # tf.io.gfile.exists("gs://bucket/file") 

264 >>> # for a local filesystem: 

265 >>> with open("/tmp/x", "w") as f: 

266 ... f.write("asdf") 

267 ... 

268 4 

269 >>> tf.io.gfile.exists("file:///tmp/x") 

270 True 

271 

272 This currently returns `True` for existing directories but don't rely on this 

273 behavior, especially if you are using cloud filesystems (e.g., GCS, S3, 

274 Hadoop): 

275 

276 >>> tf.io.gfile.exists("/tmp") 

277 True 

278 

279 Args: 

280 path: string, a path 

281 

282 Returns: 

283 True if the path exists, whether it's a file or a directory. 

284 False if the path does not exist and there are no filesystem errors. 

285 

286 Raises: 

287 errors.OpError: Propagates any errors reported by the FileSystem API. 

288 """ 

289 try: 

290 _pywrap_file_io.FileExists(compat.path_to_bytes(path)) 

291 except errors.NotFoundError: 

292 return False 

293 return True 

294 

295 

296@tf_export(v1=["gfile.Exists"]) 

297def file_exists(filename): 

298 return file_exists_v2(filename) 

299 

300 

301file_exists.__doc__ = file_exists_v2.__doc__ 

302 

303 

304@tf_export(v1=["gfile.Remove"]) 

305def delete_file(filename): 

306 """Deletes the file located at 'filename'. 

307 

308 Args: 

309 filename: string, a filename 

310 

311 Raises: 

312 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 

313 `NotFoundError` if the file does not exist. 

314 """ 

315 delete_file_v2(filename) 

316 

317 

318@tf_export("io.gfile.remove") 

319def delete_file_v2(path): 

320 """Deletes the path located at 'path'. 

321 

322 Args: 

323 path: string, a path 

324 

325 Raises: 

326 errors.OpError: Propagates any errors reported by the FileSystem API. E.g., 

327 `NotFoundError` if the path does not exist. 

328 """ 

329 _pywrap_file_io.DeleteFile(compat.path_to_bytes(path)) 

330 

331 

332def read_file_to_string(filename, binary_mode=False): 

333 """Reads the entire contents of a file to a string. 

334 

335 Args: 

336 filename: string, path to a file 

337 binary_mode: whether to open the file in binary mode or not. This changes 

338 the type of the object returned. 

339 

340 Returns: 

341 contents of the file as a string or bytes. 

342 

343 Raises: 

344 errors.OpError: Raises variety of errors that are subtypes e.g. 

345 `NotFoundError` etc. 

346 """ 

347 if binary_mode: 

348 f = FileIO(filename, mode="rb") 

349 else: 

350 f = FileIO(filename, mode="r") 

351 return f.read() 

352 

353 

354def write_string_to_file(filename, file_content): 

355 """Writes a string to a given file. 

356 

357 Args: 

358 filename: string, path to a file 

359 file_content: string, contents that need to be written to the file 

360 

361 Raises: 

362 errors.OpError: If there are errors during the operation. 

363 """ 

364 with FileIO(filename, mode="w") as f: 

365 f.write(file_content) 

366 

367 

368@tf_export(v1=["gfile.Glob"]) 

369def get_matching_files(filename): 

370 """Returns a list of files that match the given pattern(s). 

371 

372 Args: 

373 filename: string or iterable of strings. The glob pattern(s). 

374 

375 Returns: 

376 A list of strings containing filenames that match the given pattern(s). 

377 

378 Raises: 

379 * errors.OpError: If there are filesystem / directory listing errors. 

380 * errors.NotFoundError: If pattern to be matched is an invalid directory. 

381 """ 

382 return get_matching_files_v2(filename) 

383 

384 

385@tf_export("io.gfile.glob") 

386def get_matching_files_v2(pattern): 

387 r"""Returns a list of files that match the given pattern(s). 

388 

389 The patterns are defined as strings. Supported patterns are defined 

390 here. Note that the pattern can be a Python iteratable of string patterns. 

391 

392 The format definition of the pattern is: 

393 

394 **pattern**: `{ term }` 

395 

396 **term**: 

397 * `'*'`: matches any sequence of non-'/' characters 

398 * `'?'`: matches a single non-'/' character 

399 * `'[' [ '^' ] { match-list } ']'`: matches any single 

400 character (not) on the list 

401 * `c`: matches character `c` where `c != '*', '?', '\\', '['` 

402 * `'\\' c`: matches character `c` 

403 

404 **character range**: 

405 * `c`: matches character `c` while `c != '\\', '-', ']'` 

406 * `'\\' c`: matches character `c` 

407 * `lo '-' hi`: matches character `c` for `lo <= c <= hi` 

408 

409 Examples: 

410 

411 >>> tf.io.gfile.glob("*.py") 

412 ... # For example, ['__init__.py'] 

413 

414 >>> tf.io.gfile.glob("__init__.??") 

415 ... # As above 

416 

417 >>> files = {"*.py"} 

418 >>> the_iterator = iter(files) 

419 >>> tf.io.gfile.glob(the_iterator) 

420 ... # As above 

421 

422 See the C++ function `GetMatchingPaths` in 

423 [`core/platform/file_system.h`] 

424 (../../../core/platform/file_system.h) 

425 for implementation details. 

426 

427 Args: 

428 pattern: string or iterable of strings. The glob pattern(s). 

429 

430 Returns: 

431 A list of strings containing filenames that match the given pattern(s). 

432 

433 Raises: 

434 errors.OpError: If there are filesystem / directory listing errors. 

435 errors.NotFoundError: If pattern to be matched is an invalid directory. 

436 """ 

437 if isinstance(pattern, six.string_types): 

438 return [ 

439 # Convert the filenames to string from bytes. 

440 compat.as_str_any(matching_filename) 

441 for matching_filename in _pywrap_file_io.GetMatchingFiles( 

442 compat.as_bytes(pattern)) 

443 ] 

444 else: 

445 return [ 

446 # Convert the filenames to string from bytes. 

447 compat.as_str_any(matching_filename) # pylint: disable=g-complex-comprehension 

448 for single_filename in pattern 

449 for matching_filename in _pywrap_file_io.GetMatchingFiles( 

450 compat.as_bytes(single_filename)) 

451 ] 

452 

453 

454@tf_export(v1=["gfile.MkDir"]) 

455def create_dir(dirname): 

456 """Creates a directory with the name `dirname`. 

457 

458 Args: 

459 dirname: string, name of the directory to be created 

460 

461 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 

462 instead if there is the possibility that the parent dirs don't exist. 

463 

464 Raises: 

465 errors.OpError: If the operation fails. 

466 """ 

467 create_dir_v2(dirname) 

468 

469 

470@tf_export("io.gfile.mkdir") 

471def create_dir_v2(path): 

472 """Creates a directory with the name given by `path`. 

473 

474 Args: 

475 path: string, name of the directory to be created 

476 

477 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs` 

478 instead if there is the possibility that the parent dirs don't exist. 

479 

480 Raises: 

481 errors.OpError: If the operation fails. 

482 """ 

483 _pywrap_file_io.CreateDir(compat.path_to_bytes(path)) 

484 

485 

486@tf_export(v1=["gfile.MakeDirs"]) 

487def recursive_create_dir(dirname): 

488 """Creates a directory and all parent/intermediate directories. 

489 

490 It succeeds if dirname already exists and is writable. 

491 

492 Args: 

493 dirname: string, name of the directory to be created 

494 

495 Raises: 

496 errors.OpError: If the operation fails. 

497 """ 

498 recursive_create_dir_v2(dirname) 

499 

500 

501@tf_export("io.gfile.makedirs") 

502def recursive_create_dir_v2(path): 

503 """Creates a directory and all parent/intermediate directories. 

504 

505 It succeeds if path already exists and is writable. 

506 

507 Args: 

508 path: string, name of the directory to be created 

509 

510 Raises: 

511 errors.OpError: If the operation fails. 

512 """ 

513 _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path)) 

514 

515 

516@tf_export("io.gfile.copy") 

517def copy_v2(src, dst, overwrite=False): 

518 """Copies data from `src` to `dst`. 

519 

520 >>> with open("/tmp/x", "w") as f: 

521 ... f.write("asdf") 

522 ... 

523 4 

524 >>> tf.io.gfile.exists("/tmp/x") 

525 True 

526 >>> tf.io.gfile.copy("/tmp/x", "/tmp/y") 

527 >>> tf.io.gfile.exists("/tmp/y") 

528 True 

529 >>> tf.io.gfile.remove("/tmp/y") 

530 

531 You can also specify the URI scheme for selecting a different filesystem: 

532 

533 >>> with open("/tmp/x", "w") as f: 

534 ... f.write("asdf") 

535 ... 

536 4 

537 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y") 

538 >>> tf.io.gfile.exists("/tmp/y") 

539 True 

540 >>> tf.io.gfile.remove("/tmp/y") 

541 

542 Note that you need to always specify a file name, even if moving into a new 

543 directory. This is because some cloud filesystems don't have the concept of a 

544 directory. 

545 

546 >>> with open("/tmp/x", "w") as f: 

547 ... f.write("asdf") 

548 ... 

549 4 

550 >>> tf.io.gfile.mkdir("/tmp/new_dir") 

551 >>> tf.io.gfile.copy("/tmp/x", "/tmp/new_dir/y") 

552 >>> tf.io.gfile.exists("/tmp/new_dir/y") 

553 True 

554 >>> tf.io.gfile.rmtree("/tmp/new_dir") 

555 

556 If you want to prevent errors if the path already exists, you can use 

557 `overwrite` argument: 

558 

559 >>> with open("/tmp/x", "w") as f: 

560 ... f.write("asdf") 

561 ... 

562 4 

563 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y") 

564 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y", overwrite=True) 

565 >>> tf.io.gfile.remove("/tmp/y") 

566 

567 Note that the above will still result in an error if you try to overwrite a 

568 directory with a file. 

569 

570 Note that you cannot copy a directory, only file arguments are supported. 

571 

572 Args: 

573 src: string, name of the file whose contents need to be copied 

574 dst: string, name of the file to which to copy to 

575 overwrite: boolean, if false it's an error for `dst` to be occupied by an 

576 existing file. 

577 

578 Raises: 

579 errors.OpError: If the operation fails. 

580 """ 

581 _pywrap_file_io.CopyFile( 

582 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 

583 

584 

585@tf_export(v1=["gfile.Copy"]) 

586def copy(oldpath, newpath, overwrite=False): 

587 copy_v2(oldpath, newpath, overwrite) 

588 

589 

590copy.__doc__ = copy_v2.__doc__ 

591 

592 

593@tf_export(v1=["gfile.Rename"]) 

594def rename(oldname, newname, overwrite=False): 

595 """Rename or move a file / directory. 

596 

597 Args: 

598 oldname: string, pathname for a file 

599 newname: string, pathname to which the file needs to be moved 

600 overwrite: boolean, if false it's an error for `newname` to be occupied by 

601 an existing file. 

602 

603 Raises: 

604 errors.OpError: If the operation fails. 

605 """ 

606 rename_v2(oldname, newname, overwrite) 

607 

608 

609@tf_export("io.gfile.rename") 

610def rename_v2(src, dst, overwrite=False): 

611 """Rename or move a file / directory. 

612 

613 Args: 

614 src: string, pathname for a file 

615 dst: string, pathname to which the file needs to be moved 

616 overwrite: boolean, if false it's an error for `dst` to be occupied by an 

617 existing file. 

618 

619 Raises: 

620 errors.OpError: If the operation fails. 

621 """ 

622 _pywrap_file_io.RenameFile( 

623 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) 

624 

625 

626def atomic_write_string_to_file(filename, contents, overwrite=True): 

627 """Writes to `filename` atomically. 

628 

629 This means that when `filename` appears in the filesystem, it will contain 

630 all of `contents`. With write_string_to_file, it is possible for the file 

631 to appear in the filesystem with `contents` only partially written. 

632 

633 Accomplished by writing to a temp file and then renaming it. 

634 

635 Args: 

636 filename: string, pathname for a file 

637 contents: string, contents that need to be written to the file 

638 overwrite: boolean, if false it's an error for `filename` to be occupied by 

639 an existing file. 

640 """ 

641 if not has_atomic_move(filename): 

642 write_string_to_file(filename, contents) 

643 else: 

644 temp_pathname = filename + ".tmp" + uuid.uuid4().hex 

645 write_string_to_file(temp_pathname, contents) 

646 try: 

647 rename(temp_pathname, filename, overwrite) 

648 except errors.OpError: 

649 delete_file(temp_pathname) 

650 raise 

651 

652 

653@tf_export(v1=["gfile.DeleteRecursively"]) 

654def delete_recursively(dirname): 

655 """Deletes everything under dirname recursively. 

656 

657 Args: 

658 dirname: string, a path to a directory 

659 

660 Raises: 

661 errors.OpError: If the operation fails. 

662 """ 

663 delete_recursively_v2(dirname) 

664 

665 

666@tf_export("io.gfile.rmtree") 

667def delete_recursively_v2(path): 

668 """Deletes everything under path recursively. 

669 

670 Args: 

671 path: string, a path 

672 

673 Raises: 

674 errors.OpError: If the operation fails. 

675 """ 

676 _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path)) 

677 

678 

679@tf_export(v1=["gfile.IsDirectory"]) 

680def is_directory(dirname): 

681 """Returns whether the path is a directory or not. 

682 

683 Args: 

684 dirname: string, path to a potential directory 

685 

686 Returns: 

687 True, if the path is a directory; False otherwise 

688 """ 

689 return is_directory_v2(dirname) 

690 

691 

692@tf_export("io.gfile.isdir") 

693def is_directory_v2(path): 

694 """Returns whether the path is a directory or not. 

695 

696 Args: 

697 path: string, path to a potential directory 

698 

699 Returns: 

700 True, if the path is a directory; False otherwise 

701 """ 

702 try: 

703 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) 

704 except errors.OpError: 

705 return False 

706 

707 

708def has_atomic_move(path): 

709 """Checks whether the file system supports atomic moves. 

710 

711 Returns whether or not the file system of the given path supports the atomic 

712 move operation for a file or folder. If atomic move is supported, it is 

713 recommended to use a temp location for writing and then move to the final 

714 location. 

715 

716 Args: 

717 path: string, path to a file 

718 

719 Returns: 

720 True, if the path is on a file system that supports atomic move 

721 False, if the file system does not support atomic move. In such cases 

722 we need to be careful about using moves. In some cases it is safer 

723 not to use temporary locations in this case. 

724 """ 

725 try: 

726 return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path)) 

727 except errors.OpError: 

728 # defaults to True 

729 return True 

730 

731 

732@tf_export(v1=["gfile.ListDirectory"]) 

733def list_directory(dirname): 

734 """Returns a list of entries contained within a directory. 

735 

736 The list is in arbitrary order. It does not contain the special entries "." 

737 and "..". 

738 

739 Args: 

740 dirname: string, path to a directory 

741 

742 Returns: 

743 [filename1, filename2, ... filenameN] as strings 

744 

745 Raises: 

746 errors.NotFoundError if directory doesn't exist 

747 """ 

748 return list_directory_v2(dirname) 

749 

750 

751@tf_export("io.gfile.listdir") 

752def list_directory_v2(path): 

753 """Returns a list of entries contained within a directory. 

754 

755 The list is in arbitrary order. It does not contain the special entries "." 

756 and "..". 

757 

758 Args: 

759 path: string, path to a directory 

760 

761 Returns: 

762 [filename1, filename2, ... filenameN] as strings 

763 

764 Raises: 

765 errors.NotFoundError if directory doesn't exist 

766 """ 

767 if not is_directory(path): 

768 raise errors.NotFoundError( 

769 node_def=None, 

770 op=None, 

771 message="Could not find directory {}".format(path)) 

772 

773 # Convert each element to string, since the return values of the 

774 # vector of string should be interpreted as strings, not bytes. 

775 return [ 

776 compat.as_str_any(filename) 

777 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path)) 

778 ] 

779 

780 

781@tf_export("io.gfile.join") 

782def join(path, *paths): 

783 r"""Join one or more path components intelligently. 

784 

785 TensorFlow specific filesystems will be joined 

786 like a url (using "/" as the path seperator) on all platforms: 

787 

788 On Windows or Linux/Unix-like: 

789 >>> tf.io.gfile.join("gcs://folder", "file.py") 

790 'gcs://folder/file.py' 

791 

792 >>> tf.io.gfile.join("ram://folder", "file.py") 

793 'ram://folder/file.py' 

794 

795 But the native filesystem is handled just like os.path.join: 

796 

797 >>> path = tf.io.gfile.join("folder", "file.py") 

798 >>> if os.name == "nt": 

799 ... expected = "folder\\file.py" # Windows 

800 ... else: 

801 ... expected = "folder/file.py" # Linux/Unix-like 

802 >>> path == expected 

803 True 

804 

805 Args: 

806 path: string, path to a directory 

807 paths: string, additional paths to concatenate 

808 

809 Returns: 

810 path: the joined path. 

811 """ 

812 # os.path.join won't take mixed bytes/str, so don't overwrite the incoming `path` var 

813 path_ = compat.as_str_any(compat.path_to_str(path)) 

814 if "://" in path_[1:]: 

815 return urljoin(path, *paths) 

816 return os.path.join(path, *paths) 

817 

818 

819@tf_export(v1=["gfile.Walk"]) 

820def walk(top, in_order=True): 

821 """Recursive directory tree generator for directories. 

822 

823 Args: 

824 top: string, a Directory name 

825 in_order: bool, Traverse in order if True, post order if False. Errors that 

826 happen while listing directories are ignored. 

827 

828 Yields: 

829 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 

830 all its subdirectories and leaf files. That is, each yield looks like: 

831 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 

832 Each item is a string. 

833 """ 

834 return walk_v2(top, in_order) 

835 

836 

837@tf_export("io.gfile.walk") 

838def walk_v2(top, topdown=True, onerror=None): 

839 """Recursive directory tree generator for directories. 

840 

841 Args: 

842 top: string, a Directory name 

843 topdown: bool, Traverse pre order if True, post order if False. 

844 onerror: optional handler for errors. Should be a function, it will be 

845 called with the error as argument. Rethrowing the error aborts the walk. 

846 Errors that happen while listing directories are ignored. 

847 

848 Yields: 

849 Each yield is a 3-tuple: the pathname of a directory, followed by lists of 

850 all its subdirectories and leaf files. That is, each yield looks like: 

851 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`. 

852 Each item is a string. 

853 """ 

854 

855 def _make_full_path(parent, item): 

856 # Since `join` discards paths before one that starts with the path 

857 # separator (https://docs.python.org/3/library/os.path.html#join), 

858 # we have to manually handle that case as `/` is a valid character on GCS. 

859 if item[0] == os.sep: 

860 return "".join([join(parent, ""), item]) 

861 return join(parent, item) 

862 

863 top = compat.as_str_any(compat.path_to_str(top)) 

864 try: 

865 listing = list_directory(top) 

866 except errors.NotFoundError as err: 

867 if onerror: 

868 onerror(err) 

869 else: 

870 return 

871 

872 files = [] 

873 subdirs = [] 

874 for item in listing: 

875 full_path = _make_full_path(top, item) 

876 if is_directory(full_path): 

877 subdirs.append(item) 

878 else: 

879 files.append(item) 

880 

881 here = (top, subdirs, files) 

882 

883 if topdown: 

884 yield here 

885 

886 for subdir in subdirs: 

887 for subitem in walk_v2( 

888 _make_full_path(top, subdir), topdown, onerror=onerror): 

889 yield subitem 

890 

891 if not topdown: 

892 yield here 

893 

894 

895@tf_export(v1=["gfile.Stat"]) 

896def stat(filename): 

897 """Returns file statistics for a given path. 

898 

899 Args: 

900 filename: string, path to a file 

901 

902 Returns: 

903 FileStatistics struct that contains information about the path 

904 

905 Raises: 

906 errors.OpError: If the operation fails. 

907 """ 

908 return stat_v2(filename) 

909 

910 

911@tf_export("io.gfile.stat") 

912def stat_v2(path): 

913 """Returns file statistics for a given path. 

914 

915 Args: 

916 path: string, path to a file 

917 

918 Returns: 

919 FileStatistics struct that contains information about the path 

920 

921 Raises: 

922 errors.OpError: If the operation fails. 

923 """ 

924 return _pywrap_file_io.Stat(compat.path_to_str(path)) 

925 

926 

927def filecmp(filename_a, filename_b): 

928 """Compare two files, returning True if they are the same, False otherwise. 

929 

930 We check size first and return False quickly if the files are different sizes. 

931 If they are the same size, we continue to generating a crc for the whole file. 

932 

933 You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is 

934 that the builtin library is not robust to the many different filesystems 

935 TensorFlow runs on, and so we here perform a similar comparison with 

936 the more robust FileIO. 

937 

938 Args: 

939 filename_a: string path to the first file. 

940 filename_b: string path to the second file. 

941 

942 Returns: 

943 True if the files are the same, False otherwise. 

944 """ 

945 size_a = FileIO(filename_a, "rb").size() 

946 size_b = FileIO(filename_b, "rb").size() 

947 if size_a != size_b: 

948 return False 

949 

950 # Size is the same. Do a full check. 

951 crc_a = file_crc32(filename_a) 

952 crc_b = file_crc32(filename_b) 

953 return crc_a == crc_b 

954 

955 

956def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE): 

957 """Get the crc32 of the passed file. 

958 

959 The crc32 of a file can be used for error checking; two files with the same 

960 crc32 are considered equivalent. Note that the entire file must be read 

961 to produce the crc32. 

962 

963 Args: 

964 filename: string, path to a file 

965 block_size: Integer, process the files by reading blocks of `block_size` 

966 bytes. Use -1 to read the file as once. 

967 

968 Returns: 

969 hexadecimal as string, the crc32 of the passed file. 

970 """ 

971 crc = 0 

972 with FileIO(filename, mode="rb") as f: 

973 chunk = f.read(n=block_size) 

974 while chunk: 

975 crc = binascii.crc32(chunk, crc) 

976 chunk = f.read(n=block_size) 

977 return hex(crc & 0xFFFFFFFF) 

978 

979 

980@tf_export("io.gfile.get_registered_schemes") 

981def get_registered_schemes(): 

982 """Returns the currently registered filesystem schemes. 

983 

984 The `tf.io.gfile` APIs, in addition to accepting traditional filesystem paths, 

985 also accept file URIs that begin with a scheme. For example, the local 

986 filesystem path `/tmp/tf` can also be addressed as `file:///tmp/tf`. In this 

987 case, the scheme is `file`, followed by `://` and then the path, according to 

988 [URI syntax](https://datatracker.ietf.org/doc/html/rfc3986#section-3). 

989 

990 This function returns the currently registered schemes that will be recognized 

991 by `tf.io.gfile` APIs. This includes both built-in schemes and those 

992 registered by other TensorFlow filesystem implementations, for example those 

993 provided by [TensorFlow I/O](https://github.com/tensorflow/io). 

994 

995 The empty string is always included, and represents the "scheme" for regular 

996 local filesystem paths. 

997 

998 Returns: 

999 List of string schemes, e.g. `['', 'file', 'ram']`, in arbitrary order. 

1000 

1001 Raises: 

1002 errors.OpError: If the operation fails. 

1003 """ 

1004 return _pywrap_file_io.GetRegisteredSchemes()