Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/lib/io/file

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""File IO methods that wrap the C++ FileSystem API."""

16import binascii

17import os

18from posixpath import join as urljoin

19import uuid

21import six

23from tensorflow.python.framework import errors

24from tensorflow.python.lib.io import _pywrap_file_io

25from tensorflow.python.util import compat

26from tensorflow.python.util import deprecation

27from tensorflow.python.util.tf_export import tf_export

29# A good default block size depends on the system in question.

30# A somewhat conservative default chosen here.

31_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024

34class FileIO(object):

35 """FileIO class that exposes methods to read / write to / from files.

37 The constructor takes the following arguments:

38 name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object)

39 giving the pathname of the file to be opened.

40 mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode.

42 Can be used as an iterator to iterate over lines in the file.

44 The default buffer size used for the BufferedInputStream used for reading

45 the file line by line is 1024 * 512 bytes.

46 """

48 def __init__(self, name, mode, encoding="utf-8"):

49 self.__name = name

50 self.__mode = mode

51 self.__encoding = encoding

52 self._read_buf = None

53 self._writable_file = None

54 self._binary_mode = "b" in mode

55 mode = mode.replace("b", "")

56 if mode not in ("r", "w", "a", "r+", "w+", "a+"):

57 raise errors.InvalidArgumentError(

58 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'")

59 self._read_check_passed = mode in ("r", "r+", "a+", "w+")

60 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+")

62 @property

63 def name(self):

64 """Returns the file name."""

65 return self.__name

67 @property

68 def mode(self):

69 """Returns the mode in which the file was opened."""

70 return self.__mode

72 def _preread_check(self):

73 if not self._read_buf:

74 if not self._read_check_passed:

75 raise errors.PermissionDeniedError(None, None,

76 "File isn't open for reading")

77 self._read_buf = _pywrap_file_io.BufferedInputStream(

78 compat.path_to_str(self.__name), 1024 * 512)

80 def _prewrite_check(self):

81 if not self._writable_file:

82 if not self._write_check_passed:

83 raise errors.PermissionDeniedError(None, None,

84 "File isn't open for writing")

85 self._writable_file = _pywrap_file_io.WritableFile(

86 compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode))

88 def _prepare_value(self, val):

89 if self._binary_mode:

90 return compat.as_bytes(val, encoding=self.__encoding)

91 else:

92 return compat.as_str_any(val, encoding=self.__encoding)

94 def size(self):

95 """Returns the size of the file."""

96 return stat(self.__name).length

98 def write(self, file_content):

99 """Writes file_content to the file. Appends to the end of the file."""

100 self._prewrite_check()

101 self._writable_file.append(

102 compat.as_bytes(file_content, encoding=self.__encoding))

103

104 def read(self, n=-1):

105 """Returns the contents of a file as a string.

106

107 Starts reading from current position in file.

108

109 Args:

110 n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file.

111

112 Returns:

113 `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the

114 string if in string (regular) mode.

115 """

116 self._preread_check()

117 if n == -1:

118 length = self.size() - self.tell()

119 else:

120 length = n

121 return self._prepare_value(self._read_buf.read(length))

122

123 @deprecation.deprecated_args(

124 None, "position is deprecated in favor of the offset argument.",

125 "position")

126 def seek(self, offset=None, whence=0, position=None):

127 # TODO(jhseu): Delete later. Used to omit `position` from docs.

128 # pylint: disable=g-doc-args

129 """Seeks to the offset in the file.

130

131 Args:

132 offset: The byte count relative to the whence argument.

133 whence: Valid values for whence are:

134 0: start of the file (default)

135 1: relative to the current position of the file

136 2: relative to the end of file. `offset` is usually negative.

137 """

138 # pylint: enable=g-doc-args

139 self._preread_check()

140 # We needed to make offset a keyword argument for backwards-compatibility.

141 # This check exists so that we can convert back to having offset be a

142 # positional argument.

143 # TODO(jhseu): Make `offset` a positional argument after `position` is

144 # deleted.

145 if offset is None and position is None:

146 raise TypeError("seek(): offset argument required")

147 if offset is not None and position is not None:

148 raise TypeError("seek(): offset and position may not be set "

149 "simultaneously.")

150

151 if position is not None:

152 offset = position

153

154 if whence == 0:

155 pass

156 elif whence == 1:

157 offset += self.tell()

158 elif whence == 2:

159 offset += self.size()

160 else:

161 raise errors.InvalidArgumentError(

162 None, None,

163 "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format(

164 whence))

165 self._read_buf.seek(offset)

166

167 def readline(self):

168 r"""Reads the next line, keeping \n. At EOF, returns ''."""

169 self._preread_check()

170 return self._prepare_value(self._read_buf.readline())

171

172 def readlines(self):

173 """Returns all lines from the file in a list."""

174 self._preread_check()

175 lines = []

176 while True:

177 s = self.readline()

178 if not s:

179 break

180 lines.append(s)

181 return lines

182

183 def tell(self):

184 """Returns the current position in the file."""

185 if self._read_check_passed:

186 self._preread_check()

187 return self._read_buf.tell()

188 else:

189 self._prewrite_check()

190

191 return self._writable_file.tell()

192

193 def __enter__(self):

194 """Make usable with "with" statement."""

195 return self

196

197 def __exit__(self, unused_type, unused_value, unused_traceback):

198 """Make usable with "with" statement."""

199 self.close()

200

201 def __iter__(self):

202 return self

203

204 def __next__(self):

205 retval = self.readline()

206 if not retval:

207 raise StopIteration()

208 return retval

209

210 def next(self):

211 return self.__next__()

212

213 def flush(self):

214 """Flushes the Writable file.

215

216 This only ensures that the data has made its way out of the process without

217 any guarantees on whether it's written to disk. This means that the

218 data would survive an application crash but not necessarily an OS crash.

219 """

220 if self._writable_file:

221 self._writable_file.flush()

222

223 def close(self):

224 r"""Closes the file.

225

226 Should be called for the WritableFile to be flushed.

227

228 In general, if you use the context manager pattern, you don't need to call

229 this directly.

230

231 >>> with tf.io.gfile.GFile("/tmp/x", "w") as f:

232 ... f.write("asdf\n")

233 ... f.write("qwer\n")

234 >>> # implicit f.close() at the end of the block

235

236 For cloud filesystems, forgetting to call `close()` might result in data

237 loss as last write might not have been replicated.

238 """

239 self._read_buf = None

240 if self._writable_file:

241 self._writable_file.close()

242 self._writable_file = None

243

244 def seekable(self):

245 """Returns True as FileIO supports random access ops of seek()/tell()"""

246 return True

247

248

249@tf_export("io.gfile.exists")

250def file_exists_v2(path):

251 """Determines whether a path exists or not.

252

253 >>> with open("/tmp/x", "w") as f:

254 ... f.write("asdf")

255 ...

256 4

257 >>> tf.io.gfile.exists("/tmp/x")

258 True

259

260 You can also specify the URI scheme for selecting a different filesystem:

261

262 >>> # for a GCS filesystem path:

263 >>> # tf.io.gfile.exists("gs://bucket/file")

264 >>> # for a local filesystem:

265 >>> with open("/tmp/x", "w") as f:

266 ... f.write("asdf")

267 ...

268 4

269 >>> tf.io.gfile.exists("file:///tmp/x")

270 True

271

272 This currently returns `True` for existing directories but don't rely on this

273 behavior, especially if you are using cloud filesystems (e.g., GCS, S3,

274 Hadoop):

275

276 >>> tf.io.gfile.exists("/tmp")

277 True

278

279 Args:

280 path: string, a path

281

282 Returns:

283 True if the path exists, whether it's a file or a directory.

284 False if the path does not exist and there are no filesystem errors.

285

286 Raises:

287 errors.OpError: Propagates any errors reported by the FileSystem API.

288 """

289 try:

290 _pywrap_file_io.FileExists(compat.path_to_bytes(path))

291 except errors.NotFoundError:

292 return False

293 return True

294

295

296@tf_export(v1=["gfile.Exists"])

297def file_exists(filename):

298 return file_exists_v2(filename)

299

300

301file_exists.__doc__ = file_exists_v2.__doc__

302

303

304@tf_export(v1=["gfile.Remove"])

305def delete_file(filename):

306 """Deletes the file located at 'filename'.

307

308 Args:

309 filename: string, a filename

310

311 Raises:

312 errors.OpError: Propagates any errors reported by the FileSystem API. E.g.,

313 `NotFoundError` if the file does not exist.

314 """

315 delete_file_v2(filename)

316

317

318@tf_export("io.gfile.remove")

319def delete_file_v2(path):

320 """Deletes the path located at 'path'.

321

322 Args:

323 path: string, a path

324

325 Raises:

326 errors.OpError: Propagates any errors reported by the FileSystem API. E.g.,

327 `NotFoundError` if the path does not exist.

328 """

329 _pywrap_file_io.DeleteFile(compat.path_to_bytes(path))

330

331

332def read_file_to_string(filename, binary_mode=False):

333 """Reads the entire contents of a file to a string.

334

335 Args:

336 filename: string, path to a file

337 binary_mode: whether to open the file in binary mode or not. This changes

338 the type of the object returned.

339

340 Returns:

341 contents of the file as a string or bytes.

342

343 Raises:

344 errors.OpError: Raises variety of errors that are subtypes e.g.

345 `NotFoundError` etc.

346 """

347 if binary_mode:

348 f = FileIO(filename, mode="rb")

349 else:

350 f = FileIO(filename, mode="r")

351 return f.read()

352

353

354def write_string_to_file(filename, file_content):

355 """Writes a string to a given file.

356

357 Args:

358 filename: string, path to a file

359 file_content: string, contents that need to be written to the file

360

361 Raises:

362 errors.OpError: If there are errors during the operation.

363 """

364 with FileIO(filename, mode="w") as f:

365 f.write(file_content)

366

367

368@tf_export(v1=["gfile.Glob"])

369def get_matching_files(filename):

370 """Returns a list of files that match the given pattern(s).

371

372 Args:

373 filename: string or iterable of strings. The glob pattern(s).

374

375 Returns:

376 A list of strings containing filenames that match the given pattern(s).

377

378 Raises:

379 * errors.OpError: If there are filesystem / directory listing errors.

380 * errors.NotFoundError: If pattern to be matched is an invalid directory.

381 """

382 return get_matching_files_v2(filename)

383

384

385@tf_export("io.gfile.glob")

386def get_matching_files_v2(pattern):

387 r"""Returns a list of files that match the given pattern(s).

388

389 The patterns are defined as strings. Supported patterns are defined

390 here. Note that the pattern can be a Python iteratable of string patterns.

391

392 The format definition of the pattern is:

393

394 **pattern**: `{ term }`

395

396 **term**:

397 * `'*'`: matches any sequence of non-'/' characters

398 * `'?'`: matches a single non-'/' character

399 * `'[' [ '^' ] { match-list } ']'`: matches any single

400 character (not) on the list

401 * `c`: matches character `c` where `c != '*', '?', '\\', '['`

402 * `'\\' c`: matches character `c`

403

404 **character range**:

405 * `c`: matches character `c` while `c != '\\', '-', ']'`

406 * `'\\' c`: matches character `c`

407 * `lo '-' hi`: matches character `c` for `lo <= c <= hi`

408

409 Examples:

410

411 >>> tf.io.gfile.glob("*.py")

412 ... # For example, ['__init__.py']

413

414 >>> tf.io.gfile.glob("__init__.??")

415 ... # As above

416

417 >>> files = {"*.py"}

418 >>> the_iterator = iter(files)

419 >>> tf.io.gfile.glob(the_iterator)

420 ... # As above

421

422 See the C++ function `GetMatchingPaths` in

423 [`core/platform/file_system.h`]

424 (../../../core/platform/file_system.h)

425 for implementation details.

426

427 Args:

428 pattern: string or iterable of strings. The glob pattern(s).

429

430 Returns:

431 A list of strings containing filenames that match the given pattern(s).

432

433 Raises:

434 errors.OpError: If there are filesystem / directory listing errors.

435 errors.NotFoundError: If pattern to be matched is an invalid directory.

436 """

437 if isinstance(pattern, six.string_types):

438 return [

439 # Convert the filenames to string from bytes.

440 compat.as_str_any(matching_filename)

441 for matching_filename in _pywrap_file_io.GetMatchingFiles(

442 compat.as_bytes(pattern))

443 ]

444 else:

445 return [

446 # Convert the filenames to string from bytes.

447 compat.as_str_any(matching_filename) # pylint: disable=g-complex-comprehension

448 for single_filename in pattern

449 for matching_filename in _pywrap_file_io.GetMatchingFiles(

450 compat.as_bytes(single_filename))

451 ]

452

453

454@tf_export(v1=["gfile.MkDir"])

455def create_dir(dirname):

456 """Creates a directory with the name `dirname`.

457

458 Args:

459 dirname: string, name of the directory to be created

460

461 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`

462 instead if there is the possibility that the parent dirs don't exist.

463

464 Raises:

465 errors.OpError: If the operation fails.

466 """

467 create_dir_v2(dirname)

468

469

470@tf_export("io.gfile.mkdir")

471def create_dir_v2(path):

472 """Creates a directory with the name given by `path`.

473

474 Args:

475 path: string, name of the directory to be created

476

477 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`

478 instead if there is the possibility that the parent dirs don't exist.

479

480 Raises:

481 errors.OpError: If the operation fails.

482 """

483 _pywrap_file_io.CreateDir(compat.path_to_bytes(path))

484

485

486@tf_export(v1=["gfile.MakeDirs"])

487def recursive_create_dir(dirname):

488 """Creates a directory and all parent/intermediate directories.

489

490 It succeeds if dirname already exists and is writable.

491

492 Args:

493 dirname: string, name of the directory to be created

494

495 Raises:

496 errors.OpError: If the operation fails.

497 """

498 recursive_create_dir_v2(dirname)

499

500

501@tf_export("io.gfile.makedirs")

502def recursive_create_dir_v2(path):

503 """Creates a directory and all parent/intermediate directories.

504

505 It succeeds if path already exists and is writable.

506

507 Args:

508 path: string, name of the directory to be created

509

510 Raises:

511 errors.OpError: If the operation fails.

512 """

513 _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path))

514

515

516@tf_export("io.gfile.copy")

517def copy_v2(src, dst, overwrite=False):

518 """Copies data from `src` to `dst`.

519

520 >>> with open("/tmp/x", "w") as f:

521 ... f.write("asdf")

522 ...

523 4

524 >>> tf.io.gfile.exists("/tmp/x")

525 True

526 >>> tf.io.gfile.copy("/tmp/x", "/tmp/y")

527 >>> tf.io.gfile.exists("/tmp/y")

528 True

529 >>> tf.io.gfile.remove("/tmp/y")

530

531 You can also specify the URI scheme for selecting a different filesystem:

532

533 >>> with open("/tmp/x", "w") as f:

534 ... f.write("asdf")

535 ...

536 4

537 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y")

538 >>> tf.io.gfile.exists("/tmp/y")

539 True

540 >>> tf.io.gfile.remove("/tmp/y")

541

542 Note that you need to always specify a file name, even if moving into a new

543 directory. This is because some cloud filesystems don't have the concept of a

544 directory.

545

546 >>> with open("/tmp/x", "w") as f:

547 ... f.write("asdf")

548 ...

549 4

550 >>> tf.io.gfile.mkdir("/tmp/new_dir")

551 >>> tf.io.gfile.copy("/tmp/x", "/tmp/new_dir/y")

552 >>> tf.io.gfile.exists("/tmp/new_dir/y")

553 True

554 >>> tf.io.gfile.rmtree("/tmp/new_dir")

555

556 If you want to prevent errors if the path already exists, you can use

557 `overwrite` argument:

558

559 >>> with open("/tmp/x", "w") as f:

560 ... f.write("asdf")

561 ...

562 4

563 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y")

564 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y", overwrite=True)

565 >>> tf.io.gfile.remove("/tmp/y")

566

567 Note that the above will still result in an error if you try to overwrite a

568 directory with a file.

569

570 Note that you cannot copy a directory, only file arguments are supported.

571

572 Args:

573 src: string, name of the file whose contents need to be copied

574 dst: string, name of the file to which to copy to

575 overwrite: boolean, if false it's an error for `dst` to be occupied by an

576 existing file.

577

578 Raises:

579 errors.OpError: If the operation fails.

580 """

581 _pywrap_file_io.CopyFile(

582 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)

583

584

585@tf_export(v1=["gfile.Copy"])

586def copy(oldpath, newpath, overwrite=False):

587 copy_v2(oldpath, newpath, overwrite)

588

589

590copy.__doc__ = copy_v2.__doc__

591

592

593@tf_export(v1=["gfile.Rename"])

594def rename(oldname, newname, overwrite=False):

595 """Rename or move a file / directory.

596

597 Args:

598 oldname: string, pathname for a file

599 newname: string, pathname to which the file needs to be moved

600 overwrite: boolean, if false it's an error for `newname` to be occupied by

601 an existing file.

602

603 Raises:

604 errors.OpError: If the operation fails.

605 """

606 rename_v2(oldname, newname, overwrite)

607

608

609@tf_export("io.gfile.rename")

610def rename_v2(src, dst, overwrite=False):

611 """Rename or move a file / directory.

612

613 Args:

614 src: string, pathname for a file

615 dst: string, pathname to which the file needs to be moved

616 overwrite: boolean, if false it's an error for `dst` to be occupied by an

617 existing file.

618

619 Raises:

620 errors.OpError: If the operation fails.

621 """

622 _pywrap_file_io.RenameFile(

623 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)

624

625

626def atomic_write_string_to_file(filename, contents, overwrite=True):

627 """Writes to `filename` atomically.

628

629 This means that when `filename` appears in the filesystem, it will contain

630 all of `contents`. With write_string_to_file, it is possible for the file

631 to appear in the filesystem with `contents` only partially written.

632

633 Accomplished by writing to a temp file and then renaming it.

634

635 Args:

636 filename: string, pathname for a file

637 contents: string, contents that need to be written to the file

638 overwrite: boolean, if false it's an error for `filename` to be occupied by

639 an existing file.

640 """

641 if not has_atomic_move(filename):

642 write_string_to_file(filename, contents)

643 else:

644 temp_pathname = filename + ".tmp" + uuid.uuid4().hex

645 write_string_to_file(temp_pathname, contents)

646 try:

647 rename(temp_pathname, filename, overwrite)

648 except errors.OpError:

649 delete_file(temp_pathname)

650 raise

651

652

653@tf_export(v1=["gfile.DeleteRecursively"])

654def delete_recursively(dirname):

655 """Deletes everything under dirname recursively.

656

657 Args:

658 dirname: string, a path to a directory

659

660 Raises:

661 errors.OpError: If the operation fails.

662 """

663 delete_recursively_v2(dirname)

664

665

666@tf_export("io.gfile.rmtree")

667def delete_recursively_v2(path):

668 """Deletes everything under path recursively.

669

670 Args:

671 path: string, a path

672

673 Raises:

674 errors.OpError: If the operation fails.

675 """

676 _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path))

677

678

679@tf_export(v1=["gfile.IsDirectory"])

680def is_directory(dirname):

681 """Returns whether the path is a directory or not.

682

683 Args:

684 dirname: string, path to a potential directory

685

686 Returns:

687 True, if the path is a directory; False otherwise

688 """

689 return is_directory_v2(dirname)

690

691

692@tf_export("io.gfile.isdir")

693def is_directory_v2(path):

694 """Returns whether the path is a directory or not.

695

696 Args:

697 path: string, path to a potential directory

698

699 Returns:

700 True, if the path is a directory; False otherwise

701 """

702 try:

703 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path))

704 except errors.OpError:

705 return False

706

707

708def has_atomic_move(path):

709 """Checks whether the file system supports atomic moves.

710

711 Returns whether or not the file system of the given path supports the atomic

712 move operation for a file or folder. If atomic move is supported, it is

713 recommended to use a temp location for writing and then move to the final

714 location.

715

716 Args:

717 path: string, path to a file

718

719 Returns:

720 True, if the path is on a file system that supports atomic move

721 False, if the file system does not support atomic move. In such cases

722 we need to be careful about using moves. In some cases it is safer

723 not to use temporary locations in this case.

724 """

725 try:

726 return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path))

727 except errors.OpError:

728 # defaults to True

729 return True

730

731

732@tf_export(v1=["gfile.ListDirectory"])

733def list_directory(dirname):

734 """Returns a list of entries contained within a directory.

735

736 The list is in arbitrary order. It does not contain the special entries "."

737 and "..".

738

739 Args:

740 dirname: string, path to a directory

741

742 Returns:

743 [filename1, filename2, ... filenameN] as strings

744

745 Raises:

746 errors.NotFoundError if directory doesn't exist

747 """

748 return list_directory_v2(dirname)

749

750

751@tf_export("io.gfile.listdir")

752def list_directory_v2(path):

753 """Returns a list of entries contained within a directory.

754

755 The list is in arbitrary order. It does not contain the special entries "."

756 and "..".

757

758 Args:

759 path: string, path to a directory

760

761 Returns:

762 [filename1, filename2, ... filenameN] as strings

763

764 Raises:

765 errors.NotFoundError if directory doesn't exist

766 """

767 if not is_directory(path):

768 raise errors.NotFoundError(

769 node_def=None,

770 op=None,

771 message="Could not find directory {}".format(path))

772

773 # Convert each element to string, since the return values of the

774 # vector of string should be interpreted as strings, not bytes.

775 return [

776 compat.as_str_any(filename)

777 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path))

778 ]

779

780

781@tf_export("io.gfile.join")

782def join(path, *paths):

783 r"""Join one or more path components intelligently.

784

785 TensorFlow specific filesystems will be joined

786 like a url (using "/" as the path seperator) on all platforms:

787

788 On Windows or Linux/Unix-like:

789 >>> tf.io.gfile.join("gcs://folder", "file.py")

790 'gcs://folder/file.py'

791

792 >>> tf.io.gfile.join("ram://folder", "file.py")

793 'ram://folder/file.py'

794

795 But the native filesystem is handled just like os.path.join:

796

797 >>> path = tf.io.gfile.join("folder", "file.py")

798 >>> if os.name == "nt":

799 ... expected = "folder\\file.py" # Windows

800 ... else:

801 ... expected = "folder/file.py" # Linux/Unix-like

802 >>> path == expected

803 True

804

805 Args:

806 path: string, path to a directory

807 paths: string, additional paths to concatenate

808

809 Returns:

810 path: the joined path.

811 """

812 # os.path.join won't take mixed bytes/str, so don't overwrite the incoming `path` var

813 path_ = compat.as_str_any(compat.path_to_str(path))

814 if "://" in path_[1:]:

815 return urljoin(path, *paths)

816 return os.path.join(path, *paths)

817

818

819@tf_export(v1=["gfile.Walk"])

820def walk(top, in_order=True):

821 """Recursive directory tree generator for directories.

822

823 Args:

824 top: string, a Directory name

825 in_order: bool, Traverse in order if True, post order if False. Errors that

826 happen while listing directories are ignored.

827

828 Yields:

829 Each yield is a 3-tuple: the pathname of a directory, followed by lists of

830 all its subdirectories and leaf files. That is, each yield looks like:

831 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.

832 Each item is a string.

833 """

834 return walk_v2(top, in_order)

835

836

837@tf_export("io.gfile.walk")

838def walk_v2(top, topdown=True, onerror=None):

839 """Recursive directory tree generator for directories.

840

841 Args:

842 top: string, a Directory name

843 topdown: bool, Traverse pre order if True, post order if False.

844 onerror: optional handler for errors. Should be a function, it will be

845 called with the error as argument. Rethrowing the error aborts the walk.

846 Errors that happen while listing directories are ignored.

847

848 Yields:

849 Each yield is a 3-tuple: the pathname of a directory, followed by lists of

850 all its subdirectories and leaf files. That is, each yield looks like:

851 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.

852 Each item is a string.

853 """

854

855 def _make_full_path(parent, item):

856 # Since `join` discards paths before one that starts with the path

857 # separator (https://docs.python.org/3/library/os.path.html#join),

858 # we have to manually handle that case as `/` is a valid character on GCS.

859 if item[0] == os.sep:

860 return "".join([join(parent, ""), item])

861 return join(parent, item)

862

863 top = compat.as_str_any(compat.path_to_str(top))

864 try:

865 listing = list_directory(top)

866 except errors.NotFoundError as err:

867 if onerror:

868 onerror(err)

869 else:

870 return

871

872 files = []

873 subdirs = []

874 for item in listing:

875 full_path = _make_full_path(top, item)

876 if is_directory(full_path):

877 subdirs.append(item)

878 else:

879 files.append(item)

880

881 here = (top, subdirs, files)

882

883 if topdown:

884 yield here

885

886 for subdir in subdirs:

887 for subitem in walk_v2(

888 _make_full_path(top, subdir), topdown, onerror=onerror):

889 yield subitem

890

891 if not topdown:

892 yield here

893

894

895@tf_export(v1=["gfile.Stat"])

896def stat(filename):

897 """Returns file statistics for a given path.

898

899 Args:

900 filename: string, path to a file

901

902 Returns:

903 FileStatistics struct that contains information about the path

904

905 Raises:

906 errors.OpError: If the operation fails.

907 """

908 return stat_v2(filename)

909

910

911@tf_export("io.gfile.stat")

912def stat_v2(path):

913 """Returns file statistics for a given path.

914

915 Args:

916 path: string, path to a file

917

918 Returns:

919 FileStatistics struct that contains information about the path

920

921 Raises:

922 errors.OpError: If the operation fails.

923 """

924 return _pywrap_file_io.Stat(compat.path_to_str(path))

925

926

927def filecmp(filename_a, filename_b):

928 """Compare two files, returning True if they are the same, False otherwise.

929

930 We check size first and return False quickly if the files are different sizes.

931 If they are the same size, we continue to generating a crc for the whole file.

932

933 You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is

934 that the builtin library is not robust to the many different filesystems

935 TensorFlow runs on, and so we here perform a similar comparison with

936 the more robust FileIO.

937

938 Args:

939 filename_a: string path to the first file.

940 filename_b: string path to the second file.

941

942 Returns:

943 True if the files are the same, False otherwise.

944 """

945 size_a = FileIO(filename_a, "rb").size()

946 size_b = FileIO(filename_b, "rb").size()

947 if size_a != size_b:

948 return False

949

950 # Size is the same. Do a full check.

951 crc_a = file_crc32(filename_a)

952 crc_b = file_crc32(filename_b)

953 return crc_a == crc_b

954

955

956def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE):

957 """Get the crc32 of the passed file.

958

959 The crc32 of a file can be used for error checking; two files with the same

960 crc32 are considered equivalent. Note that the entire file must be read

961 to produce the crc32.

962

963 Args:

964 filename: string, path to a file

965 block_size: Integer, process the files by reading blocks of `block_size`

966 bytes. Use -1 to read the file as once.

967

968 Returns:

969 hexadecimal as string, the crc32 of the passed file.

970 """

971 crc = 0

972 with FileIO(filename, mode="rb") as f:

973 chunk = f.read(n=block_size)

974 while chunk:

975 crc = binascii.crc32(chunk, crc)

976 chunk = f.read(n=block_size)

977 return hex(crc & 0xFFFFFFFF)

978

979

980@tf_export("io.gfile.get_registered_schemes")

981def get_registered_schemes():

982 """Returns the currently registered filesystem schemes.

983

984 The `tf.io.gfile` APIs, in addition to accepting traditional filesystem paths,

985 also accept file URIs that begin with a scheme. For example, the local

986 filesystem path `/tmp/tf` can also be addressed as `file:///tmp/tf`. In this

987 case, the scheme is `file`, followed by `://` and then the path, according to

988 [URI syntax](https://datatracker.ietf.org/doc/html/rfc3986#section-3).

989

990 This function returns the currently registered schemes that will be recognized

991 by `tf.io.gfile` APIs. This includes both built-in schemes and those

992 registered by other TensorFlow filesystem implementations, for example those

993 provided by [TensorFlow I/O](https://github.com/tensorflow/io).

994

995 The empty string is always included, and represents the "scheme" for regular

996 local filesystem paths.

997

998 Returns:

999 List of string schemes, e.g. `['', 'file', 'ram']`, in arbitrary order.

1000

1001 Raises:

1002 errors.OpError: If the operation fails.

1003 """

1004 return _pywrap_file_io.GetRegisteredSchemes()

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/lib/io/file_io.py: 36%

271 statements