Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/lib/io/file_io.py: 36%
271 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""File IO methods that wrap the C++ FileSystem API."""
16import binascii
17import os
18from posixpath import join as urljoin
19import uuid
21import six
23from tensorflow.python.framework import errors
24from tensorflow.python.lib.io import _pywrap_file_io
25from tensorflow.python.util import compat
26from tensorflow.python.util import deprecation
27from tensorflow.python.util.tf_export import tf_export
29# A good default block size depends on the system in question.
30# A somewhat conservative default chosen here.
31_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024
34class FileIO(object):
35 """FileIO class that exposes methods to read / write to / from files.
37 The constructor takes the following arguments:
38 name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object)
39 giving the pathname of the file to be opened.
40 mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode.
42 Can be used as an iterator to iterate over lines in the file.
44 The default buffer size used for the BufferedInputStream used for reading
45 the file line by line is 1024 * 512 bytes.
46 """
48 def __init__(self, name, mode, encoding="utf-8"):
49 self.__name = name
50 self.__mode = mode
51 self.__encoding = encoding
52 self._read_buf = None
53 self._writable_file = None
54 self._binary_mode = "b" in mode
55 mode = mode.replace("b", "")
56 if mode not in ("r", "w", "a", "r+", "w+", "a+"):
57 raise errors.InvalidArgumentError(
58 None, None, "mode is not 'r' or 'w' or 'a' or 'r+' or 'w+' or 'a+'")
59 self._read_check_passed = mode in ("r", "r+", "a+", "w+")
60 self._write_check_passed = mode in ("a", "w", "r+", "a+", "w+")
62 @property
63 def name(self):
64 """Returns the file name."""
65 return self.__name
67 @property
68 def mode(self):
69 """Returns the mode in which the file was opened."""
70 return self.__mode
72 def _preread_check(self):
73 if not self._read_buf:
74 if not self._read_check_passed:
75 raise errors.PermissionDeniedError(None, None,
76 "File isn't open for reading")
77 self._read_buf = _pywrap_file_io.BufferedInputStream(
78 compat.path_to_str(self.__name), 1024 * 512)
80 def _prewrite_check(self):
81 if not self._writable_file:
82 if not self._write_check_passed:
83 raise errors.PermissionDeniedError(None, None,
84 "File isn't open for writing")
85 self._writable_file = _pywrap_file_io.WritableFile(
86 compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode))
88 def _prepare_value(self, val):
89 if self._binary_mode:
90 return compat.as_bytes(val, encoding=self.__encoding)
91 else:
92 return compat.as_str_any(val, encoding=self.__encoding)
94 def size(self):
95 """Returns the size of the file."""
96 return stat(self.__name).length
98 def write(self, file_content):
99 """Writes file_content to the file. Appends to the end of the file."""
100 self._prewrite_check()
101 self._writable_file.append(
102 compat.as_bytes(file_content, encoding=self.__encoding))
104 def read(self, n=-1):
105 """Returns the contents of a file as a string.
107 Starts reading from current position in file.
109 Args:
110 n: Read `n` bytes if `n != -1`. If `n = -1`, reads to end of file.
112 Returns:
113 `n` bytes of the file (or whole file) in bytes mode or `n` bytes of the
114 string if in string (regular) mode.
115 """
116 self._preread_check()
117 if n == -1:
118 length = self.size() - self.tell()
119 else:
120 length = n
121 return self._prepare_value(self._read_buf.read(length))
123 @deprecation.deprecated_args(
124 None, "position is deprecated in favor of the offset argument.",
125 "position")
126 def seek(self, offset=None, whence=0, position=None):
127 # TODO(jhseu): Delete later. Used to omit `position` from docs.
128 # pylint: disable=g-doc-args
129 """Seeks to the offset in the file.
131 Args:
132 offset: The byte count relative to the whence argument.
133 whence: Valid values for whence are:
134 0: start of the file (default)
135 1: relative to the current position of the file
136 2: relative to the end of file. `offset` is usually negative.
137 """
138 # pylint: enable=g-doc-args
139 self._preread_check()
140 # We needed to make offset a keyword argument for backwards-compatibility.
141 # This check exists so that we can convert back to having offset be a
142 # positional argument.
143 # TODO(jhseu): Make `offset` a positional argument after `position` is
144 # deleted.
145 if offset is None and position is None:
146 raise TypeError("seek(): offset argument required")
147 if offset is not None and position is not None:
148 raise TypeError("seek(): offset and position may not be set "
149 "simultaneously.")
151 if position is not None:
152 offset = position
154 if whence == 0:
155 pass
156 elif whence == 1:
157 offset += self.tell()
158 elif whence == 2:
159 offset += self.size()
160 else:
161 raise errors.InvalidArgumentError(
162 None, None,
163 "Invalid whence argument: {}. Valid values are 0, 1, or 2.".format(
164 whence))
165 self._read_buf.seek(offset)
167 def readline(self):
168 r"""Reads the next line, keeping \n. At EOF, returns ''."""
169 self._preread_check()
170 return self._prepare_value(self._read_buf.readline())
172 def readlines(self):
173 """Returns all lines from the file in a list."""
174 self._preread_check()
175 lines = []
176 while True:
177 s = self.readline()
178 if not s:
179 break
180 lines.append(s)
181 return lines
183 def tell(self):
184 """Returns the current position in the file."""
185 if self._read_check_passed:
186 self._preread_check()
187 return self._read_buf.tell()
188 else:
189 self._prewrite_check()
191 return self._writable_file.tell()
193 def __enter__(self):
194 """Make usable with "with" statement."""
195 return self
197 def __exit__(self, unused_type, unused_value, unused_traceback):
198 """Make usable with "with" statement."""
199 self.close()
201 def __iter__(self):
202 return self
204 def __next__(self):
205 retval = self.readline()
206 if not retval:
207 raise StopIteration()
208 return retval
210 def next(self):
211 return self.__next__()
213 def flush(self):
214 """Flushes the Writable file.
216 This only ensures that the data has made its way out of the process without
217 any guarantees on whether it's written to disk. This means that the
218 data would survive an application crash but not necessarily an OS crash.
219 """
220 if self._writable_file:
221 self._writable_file.flush()
223 def close(self):
224 r"""Closes the file.
226 Should be called for the WritableFile to be flushed.
228 In general, if you use the context manager pattern, you don't need to call
229 this directly.
231 >>> with tf.io.gfile.GFile("/tmp/x", "w") as f:
232 ... f.write("asdf\n")
233 ... f.write("qwer\n")
234 >>> # implicit f.close() at the end of the block
236 For cloud filesystems, forgetting to call `close()` might result in data
237 loss as last write might not have been replicated.
238 """
239 self._read_buf = None
240 if self._writable_file:
241 self._writable_file.close()
242 self._writable_file = None
244 def seekable(self):
245 """Returns True as FileIO supports random access ops of seek()/tell()"""
246 return True
249@tf_export("io.gfile.exists")
250def file_exists_v2(path):
251 """Determines whether a path exists or not.
253 >>> with open("/tmp/x", "w") as f:
254 ... f.write("asdf")
255 ...
256 4
257 >>> tf.io.gfile.exists("/tmp/x")
258 True
260 You can also specify the URI scheme for selecting a different filesystem:
262 >>> # for a GCS filesystem path:
263 >>> # tf.io.gfile.exists("gs://bucket/file")
264 >>> # for a local filesystem:
265 >>> with open("/tmp/x", "w") as f:
266 ... f.write("asdf")
267 ...
268 4
269 >>> tf.io.gfile.exists("file:///tmp/x")
270 True
272 This currently returns `True` for existing directories but don't rely on this
273 behavior, especially if you are using cloud filesystems (e.g., GCS, S3,
274 Hadoop):
276 >>> tf.io.gfile.exists("/tmp")
277 True
279 Args:
280 path: string, a path
282 Returns:
283 True if the path exists, whether it's a file or a directory.
284 False if the path does not exist and there are no filesystem errors.
286 Raises:
287 errors.OpError: Propagates any errors reported by the FileSystem API.
288 """
289 try:
290 _pywrap_file_io.FileExists(compat.path_to_bytes(path))
291 except errors.NotFoundError:
292 return False
293 return True
296@tf_export(v1=["gfile.Exists"])
297def file_exists(filename):
298 return file_exists_v2(filename)
301file_exists.__doc__ = file_exists_v2.__doc__
304@tf_export(v1=["gfile.Remove"])
305def delete_file(filename):
306 """Deletes the file located at 'filename'.
308 Args:
309 filename: string, a filename
311 Raises:
312 errors.OpError: Propagates any errors reported by the FileSystem API. E.g.,
313 `NotFoundError` if the file does not exist.
314 """
315 delete_file_v2(filename)
318@tf_export("io.gfile.remove")
319def delete_file_v2(path):
320 """Deletes the path located at 'path'.
322 Args:
323 path: string, a path
325 Raises:
326 errors.OpError: Propagates any errors reported by the FileSystem API. E.g.,
327 `NotFoundError` if the path does not exist.
328 """
329 _pywrap_file_io.DeleteFile(compat.path_to_bytes(path))
332def read_file_to_string(filename, binary_mode=False):
333 """Reads the entire contents of a file to a string.
335 Args:
336 filename: string, path to a file
337 binary_mode: whether to open the file in binary mode or not. This changes
338 the type of the object returned.
340 Returns:
341 contents of the file as a string or bytes.
343 Raises:
344 errors.OpError: Raises variety of errors that are subtypes e.g.
345 `NotFoundError` etc.
346 """
347 if binary_mode:
348 f = FileIO(filename, mode="rb")
349 else:
350 f = FileIO(filename, mode="r")
351 return f.read()
354def write_string_to_file(filename, file_content):
355 """Writes a string to a given file.
357 Args:
358 filename: string, path to a file
359 file_content: string, contents that need to be written to the file
361 Raises:
362 errors.OpError: If there are errors during the operation.
363 """
364 with FileIO(filename, mode="w") as f:
365 f.write(file_content)
368@tf_export(v1=["gfile.Glob"])
369def get_matching_files(filename):
370 """Returns a list of files that match the given pattern(s).
372 Args:
373 filename: string or iterable of strings. The glob pattern(s).
375 Returns:
376 A list of strings containing filenames that match the given pattern(s).
378 Raises:
379 * errors.OpError: If there are filesystem / directory listing errors.
380 * errors.NotFoundError: If pattern to be matched is an invalid directory.
381 """
382 return get_matching_files_v2(filename)
385@tf_export("io.gfile.glob")
386def get_matching_files_v2(pattern):
387 r"""Returns a list of files that match the given pattern(s).
389 The patterns are defined as strings. Supported patterns are defined
390 here. Note that the pattern can be a Python iteratable of string patterns.
392 The format definition of the pattern is:
394 **pattern**: `{ term }`
396 **term**:
397 * `'*'`: matches any sequence of non-'/' characters
398 * `'?'`: matches a single non-'/' character
399 * `'[' [ '^' ] { match-list } ']'`: matches any single
400 character (not) on the list
401 * `c`: matches character `c` where `c != '*', '?', '\\', '['`
402 * `'\\' c`: matches character `c`
404 **character range**:
405 * `c`: matches character `c` while `c != '\\', '-', ']'`
406 * `'\\' c`: matches character `c`
407 * `lo '-' hi`: matches character `c` for `lo <= c <= hi`
409 Examples:
411 >>> tf.io.gfile.glob("*.py")
412 ... # For example, ['__init__.py']
414 >>> tf.io.gfile.glob("__init__.??")
415 ... # As above
417 >>> files = {"*.py"}
418 >>> the_iterator = iter(files)
419 >>> tf.io.gfile.glob(the_iterator)
420 ... # As above
422 See the C++ function `GetMatchingPaths` in
423 [`core/platform/file_system.h`]
424 (../../../core/platform/file_system.h)
425 for implementation details.
427 Args:
428 pattern: string or iterable of strings. The glob pattern(s).
430 Returns:
431 A list of strings containing filenames that match the given pattern(s).
433 Raises:
434 errors.OpError: If there are filesystem / directory listing errors.
435 errors.NotFoundError: If pattern to be matched is an invalid directory.
436 """
437 if isinstance(pattern, six.string_types):
438 return [
439 # Convert the filenames to string from bytes.
440 compat.as_str_any(matching_filename)
441 for matching_filename in _pywrap_file_io.GetMatchingFiles(
442 compat.as_bytes(pattern))
443 ]
444 else:
445 return [
446 # Convert the filenames to string from bytes.
447 compat.as_str_any(matching_filename) # pylint: disable=g-complex-comprehension
448 for single_filename in pattern
449 for matching_filename in _pywrap_file_io.GetMatchingFiles(
450 compat.as_bytes(single_filename))
451 ]
454@tf_export(v1=["gfile.MkDir"])
455def create_dir(dirname):
456 """Creates a directory with the name `dirname`.
458 Args:
459 dirname: string, name of the directory to be created
461 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`
462 instead if there is the possibility that the parent dirs don't exist.
464 Raises:
465 errors.OpError: If the operation fails.
466 """
467 create_dir_v2(dirname)
470@tf_export("io.gfile.mkdir")
471def create_dir_v2(path):
472 """Creates a directory with the name given by `path`.
474 Args:
475 path: string, name of the directory to be created
477 Notes: The parent directories need to exist. Use `tf.io.gfile.makedirs`
478 instead if there is the possibility that the parent dirs don't exist.
480 Raises:
481 errors.OpError: If the operation fails.
482 """
483 _pywrap_file_io.CreateDir(compat.path_to_bytes(path))
486@tf_export(v1=["gfile.MakeDirs"])
487def recursive_create_dir(dirname):
488 """Creates a directory and all parent/intermediate directories.
490 It succeeds if dirname already exists and is writable.
492 Args:
493 dirname: string, name of the directory to be created
495 Raises:
496 errors.OpError: If the operation fails.
497 """
498 recursive_create_dir_v2(dirname)
501@tf_export("io.gfile.makedirs")
502def recursive_create_dir_v2(path):
503 """Creates a directory and all parent/intermediate directories.
505 It succeeds if path already exists and is writable.
507 Args:
508 path: string, name of the directory to be created
510 Raises:
511 errors.OpError: If the operation fails.
512 """
513 _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path))
516@tf_export("io.gfile.copy")
517def copy_v2(src, dst, overwrite=False):
518 """Copies data from `src` to `dst`.
520 >>> with open("/tmp/x", "w") as f:
521 ... f.write("asdf")
522 ...
523 4
524 >>> tf.io.gfile.exists("/tmp/x")
525 True
526 >>> tf.io.gfile.copy("/tmp/x", "/tmp/y")
527 >>> tf.io.gfile.exists("/tmp/y")
528 True
529 >>> tf.io.gfile.remove("/tmp/y")
531 You can also specify the URI scheme for selecting a different filesystem:
533 >>> with open("/tmp/x", "w") as f:
534 ... f.write("asdf")
535 ...
536 4
537 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y")
538 >>> tf.io.gfile.exists("/tmp/y")
539 True
540 >>> tf.io.gfile.remove("/tmp/y")
542 Note that you need to always specify a file name, even if moving into a new
543 directory. This is because some cloud filesystems don't have the concept of a
544 directory.
546 >>> with open("/tmp/x", "w") as f:
547 ... f.write("asdf")
548 ...
549 4
550 >>> tf.io.gfile.mkdir("/tmp/new_dir")
551 >>> tf.io.gfile.copy("/tmp/x", "/tmp/new_dir/y")
552 >>> tf.io.gfile.exists("/tmp/new_dir/y")
553 True
554 >>> tf.io.gfile.rmtree("/tmp/new_dir")
556 If you want to prevent errors if the path already exists, you can use
557 `overwrite` argument:
559 >>> with open("/tmp/x", "w") as f:
560 ... f.write("asdf")
561 ...
562 4
563 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y")
564 >>> tf.io.gfile.copy("/tmp/x", "file:///tmp/y", overwrite=True)
565 >>> tf.io.gfile.remove("/tmp/y")
567 Note that the above will still result in an error if you try to overwrite a
568 directory with a file.
570 Note that you cannot copy a directory, only file arguments are supported.
572 Args:
573 src: string, name of the file whose contents need to be copied
574 dst: string, name of the file to which to copy to
575 overwrite: boolean, if false it's an error for `dst` to be occupied by an
576 existing file.
578 Raises:
579 errors.OpError: If the operation fails.
580 """
581 _pywrap_file_io.CopyFile(
582 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)
585@tf_export(v1=["gfile.Copy"])
586def copy(oldpath, newpath, overwrite=False):
587 copy_v2(oldpath, newpath, overwrite)
590copy.__doc__ = copy_v2.__doc__
593@tf_export(v1=["gfile.Rename"])
594def rename(oldname, newname, overwrite=False):
595 """Rename or move a file / directory.
597 Args:
598 oldname: string, pathname for a file
599 newname: string, pathname to which the file needs to be moved
600 overwrite: boolean, if false it's an error for `newname` to be occupied by
601 an existing file.
603 Raises:
604 errors.OpError: If the operation fails.
605 """
606 rename_v2(oldname, newname, overwrite)
609@tf_export("io.gfile.rename")
610def rename_v2(src, dst, overwrite=False):
611 """Rename or move a file / directory.
613 Args:
614 src: string, pathname for a file
615 dst: string, pathname to which the file needs to be moved
616 overwrite: boolean, if false it's an error for `dst` to be occupied by an
617 existing file.
619 Raises:
620 errors.OpError: If the operation fails.
621 """
622 _pywrap_file_io.RenameFile(
623 compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite)
626def atomic_write_string_to_file(filename, contents, overwrite=True):
627 """Writes to `filename` atomically.
629 This means that when `filename` appears in the filesystem, it will contain
630 all of `contents`. With write_string_to_file, it is possible for the file
631 to appear in the filesystem with `contents` only partially written.
633 Accomplished by writing to a temp file and then renaming it.
635 Args:
636 filename: string, pathname for a file
637 contents: string, contents that need to be written to the file
638 overwrite: boolean, if false it's an error for `filename` to be occupied by
639 an existing file.
640 """
641 if not has_atomic_move(filename):
642 write_string_to_file(filename, contents)
643 else:
644 temp_pathname = filename + ".tmp" + uuid.uuid4().hex
645 write_string_to_file(temp_pathname, contents)
646 try:
647 rename(temp_pathname, filename, overwrite)
648 except errors.OpError:
649 delete_file(temp_pathname)
650 raise
653@tf_export(v1=["gfile.DeleteRecursively"])
654def delete_recursively(dirname):
655 """Deletes everything under dirname recursively.
657 Args:
658 dirname: string, a path to a directory
660 Raises:
661 errors.OpError: If the operation fails.
662 """
663 delete_recursively_v2(dirname)
666@tf_export("io.gfile.rmtree")
667def delete_recursively_v2(path):
668 """Deletes everything under path recursively.
670 Args:
671 path: string, a path
673 Raises:
674 errors.OpError: If the operation fails.
675 """
676 _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path))
679@tf_export(v1=["gfile.IsDirectory"])
680def is_directory(dirname):
681 """Returns whether the path is a directory or not.
683 Args:
684 dirname: string, path to a potential directory
686 Returns:
687 True, if the path is a directory; False otherwise
688 """
689 return is_directory_v2(dirname)
692@tf_export("io.gfile.isdir")
693def is_directory_v2(path):
694 """Returns whether the path is a directory or not.
696 Args:
697 path: string, path to a potential directory
699 Returns:
700 True, if the path is a directory; False otherwise
701 """
702 try:
703 return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path))
704 except errors.OpError:
705 return False
708def has_atomic_move(path):
709 """Checks whether the file system supports atomic moves.
711 Returns whether or not the file system of the given path supports the atomic
712 move operation for a file or folder. If atomic move is supported, it is
713 recommended to use a temp location for writing and then move to the final
714 location.
716 Args:
717 path: string, path to a file
719 Returns:
720 True, if the path is on a file system that supports atomic move
721 False, if the file system does not support atomic move. In such cases
722 we need to be careful about using moves. In some cases it is safer
723 not to use temporary locations in this case.
724 """
725 try:
726 return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path))
727 except errors.OpError:
728 # defaults to True
729 return True
732@tf_export(v1=["gfile.ListDirectory"])
733def list_directory(dirname):
734 """Returns a list of entries contained within a directory.
736 The list is in arbitrary order. It does not contain the special entries "."
737 and "..".
739 Args:
740 dirname: string, path to a directory
742 Returns:
743 [filename1, filename2, ... filenameN] as strings
745 Raises:
746 errors.NotFoundError if directory doesn't exist
747 """
748 return list_directory_v2(dirname)
751@tf_export("io.gfile.listdir")
752def list_directory_v2(path):
753 """Returns a list of entries contained within a directory.
755 The list is in arbitrary order. It does not contain the special entries "."
756 and "..".
758 Args:
759 path: string, path to a directory
761 Returns:
762 [filename1, filename2, ... filenameN] as strings
764 Raises:
765 errors.NotFoundError if directory doesn't exist
766 """
767 if not is_directory(path):
768 raise errors.NotFoundError(
769 node_def=None,
770 op=None,
771 message="Could not find directory {}".format(path))
773 # Convert each element to string, since the return values of the
774 # vector of string should be interpreted as strings, not bytes.
775 return [
776 compat.as_str_any(filename)
777 for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path))
778 ]
781@tf_export("io.gfile.join")
782def join(path, *paths):
783 r"""Join one or more path components intelligently.
785 TensorFlow specific filesystems will be joined
786 like a url (using "/" as the path seperator) on all platforms:
788 On Windows or Linux/Unix-like:
789 >>> tf.io.gfile.join("gcs://folder", "file.py")
790 'gcs://folder/file.py'
792 >>> tf.io.gfile.join("ram://folder", "file.py")
793 'ram://folder/file.py'
795 But the native filesystem is handled just like os.path.join:
797 >>> path = tf.io.gfile.join("folder", "file.py")
798 >>> if os.name == "nt":
799 ... expected = "folder\\file.py" # Windows
800 ... else:
801 ... expected = "folder/file.py" # Linux/Unix-like
802 >>> path == expected
803 True
805 Args:
806 path: string, path to a directory
807 paths: string, additional paths to concatenate
809 Returns:
810 path: the joined path.
811 """
812 # os.path.join won't take mixed bytes/str, so don't overwrite the incoming `path` var
813 path_ = compat.as_str_any(compat.path_to_str(path))
814 if "://" in path_[1:]:
815 return urljoin(path, *paths)
816 return os.path.join(path, *paths)
819@tf_export(v1=["gfile.Walk"])
820def walk(top, in_order=True):
821 """Recursive directory tree generator for directories.
823 Args:
824 top: string, a Directory name
825 in_order: bool, Traverse in order if True, post order if False. Errors that
826 happen while listing directories are ignored.
828 Yields:
829 Each yield is a 3-tuple: the pathname of a directory, followed by lists of
830 all its subdirectories and leaf files. That is, each yield looks like:
831 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.
832 Each item is a string.
833 """
834 return walk_v2(top, in_order)
837@tf_export("io.gfile.walk")
838def walk_v2(top, topdown=True, onerror=None):
839 """Recursive directory tree generator for directories.
841 Args:
842 top: string, a Directory name
843 topdown: bool, Traverse pre order if True, post order if False.
844 onerror: optional handler for errors. Should be a function, it will be
845 called with the error as argument. Rethrowing the error aborts the walk.
846 Errors that happen while listing directories are ignored.
848 Yields:
849 Each yield is a 3-tuple: the pathname of a directory, followed by lists of
850 all its subdirectories and leaf files. That is, each yield looks like:
851 `(dirname, [subdirname, subdirname, ...], [filename, filename, ...])`.
852 Each item is a string.
853 """
855 def _make_full_path(parent, item):
856 # Since `join` discards paths before one that starts with the path
857 # separator (https://docs.python.org/3/library/os.path.html#join),
858 # we have to manually handle that case as `/` is a valid character on GCS.
859 if item[0] == os.sep:
860 return "".join([join(parent, ""), item])
861 return join(parent, item)
863 top = compat.as_str_any(compat.path_to_str(top))
864 try:
865 listing = list_directory(top)
866 except errors.NotFoundError as err:
867 if onerror:
868 onerror(err)
869 else:
870 return
872 files = []
873 subdirs = []
874 for item in listing:
875 full_path = _make_full_path(top, item)
876 if is_directory(full_path):
877 subdirs.append(item)
878 else:
879 files.append(item)
881 here = (top, subdirs, files)
883 if topdown:
884 yield here
886 for subdir in subdirs:
887 for subitem in walk_v2(
888 _make_full_path(top, subdir), topdown, onerror=onerror):
889 yield subitem
891 if not topdown:
892 yield here
895@tf_export(v1=["gfile.Stat"])
896def stat(filename):
897 """Returns file statistics for a given path.
899 Args:
900 filename: string, path to a file
902 Returns:
903 FileStatistics struct that contains information about the path
905 Raises:
906 errors.OpError: If the operation fails.
907 """
908 return stat_v2(filename)
911@tf_export("io.gfile.stat")
912def stat_v2(path):
913 """Returns file statistics for a given path.
915 Args:
916 path: string, path to a file
918 Returns:
919 FileStatistics struct that contains information about the path
921 Raises:
922 errors.OpError: If the operation fails.
923 """
924 return _pywrap_file_io.Stat(compat.path_to_str(path))
927def filecmp(filename_a, filename_b):
928 """Compare two files, returning True if they are the same, False otherwise.
930 We check size first and return False quickly if the files are different sizes.
931 If they are the same size, we continue to generating a crc for the whole file.
933 You might wonder: why not use Python's `filecmp.cmp()` instead? The answer is
934 that the builtin library is not robust to the many different filesystems
935 TensorFlow runs on, and so we here perform a similar comparison with
936 the more robust FileIO.
938 Args:
939 filename_a: string path to the first file.
940 filename_b: string path to the second file.
942 Returns:
943 True if the files are the same, False otherwise.
944 """
945 size_a = FileIO(filename_a, "rb").size()
946 size_b = FileIO(filename_b, "rb").size()
947 if size_a != size_b:
948 return False
950 # Size is the same. Do a full check.
951 crc_a = file_crc32(filename_a)
952 crc_b = file_crc32(filename_b)
953 return crc_a == crc_b
956def file_crc32(filename, block_size=_DEFAULT_BLOCK_SIZE):
957 """Get the crc32 of the passed file.
959 The crc32 of a file can be used for error checking; two files with the same
960 crc32 are considered equivalent. Note that the entire file must be read
961 to produce the crc32.
963 Args:
964 filename: string, path to a file
965 block_size: Integer, process the files by reading blocks of `block_size`
966 bytes. Use -1 to read the file as once.
968 Returns:
969 hexadecimal as string, the crc32 of the passed file.
970 """
971 crc = 0
972 with FileIO(filename, mode="rb") as f:
973 chunk = f.read(n=block_size)
974 while chunk:
975 crc = binascii.crc32(chunk, crc)
976 chunk = f.read(n=block_size)
977 return hex(crc & 0xFFFFFFFF)
980@tf_export("io.gfile.get_registered_schemes")
981def get_registered_schemes():
982 """Returns the currently registered filesystem schemes.
984 The `tf.io.gfile` APIs, in addition to accepting traditional filesystem paths,
985 also accept file URIs that begin with a scheme. For example, the local
986 filesystem path `/tmp/tf` can also be addressed as `file:///tmp/tf`. In this
987 case, the scheme is `file`, followed by `://` and then the path, according to
988 [URI syntax](https://datatracker.ietf.org/doc/html/rfc3986#section-3).
990 This function returns the currently registered schemes that will be recognized
991 by `tf.io.gfile` APIs. This includes both built-in schemes and those
992 registered by other TensorFlow filesystem implementations, for example those
993 provided by [TensorFlow I/O](https://github.com/tensorflow/io).
995 The empty string is always included, and represents the "scheme" for regular
996 local filesystem paths.
998 Returns:
999 List of string schemes, e.g. `['', 'file', 'ram']`, in arbitrary order.
1001 Raises:
1002 errors.OpError: If the operation fails.
1003 """
1004 return _pywrap_file_io.GetRegisteredSchemes()