Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/fsspec/implementations/local.py: 26%
291 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:40 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:40 +0000
1import datetime
2import io
3import logging
4import os
5import os.path as osp
6import posixpath
7import re
8import shutil
9import stat
10import tempfile
12from fsspec import AbstractFileSystem
13from fsspec.compression import compr
14from fsspec.core import get_compression
15from fsspec.utils import isfilelike, stringify_path
17logger = logging.getLogger("fsspec.local")
20class LocalFileSystem(AbstractFileSystem):
21 """Interface to files on local storage
23 Parameters
24 ----------
25 auto_mkdir: bool
26 Whether, when opening a file, the directory containing it should
27 be created (if it doesn't already exist). This is assumed by pyarrow
28 code.
29 """
31 root_marker = "/"
32 protocol = "file", "local"
33 local_file = True
35 def __init__(self, auto_mkdir=False, **kwargs):
36 super().__init__(**kwargs)
37 self.auto_mkdir = auto_mkdir
39 @property
40 def fsid(self):
41 return "local"
43 def mkdir(self, path, create_parents=True, **kwargs):
44 path = self._strip_protocol(path)
45 if self.exists(path):
46 raise FileExistsError(path)
47 if create_parents:
48 self.makedirs(path, exist_ok=True)
49 else:
50 os.mkdir(path, **kwargs)
52 def makedirs(self, path, exist_ok=False):
53 path = self._strip_protocol(path)
54 os.makedirs(path, exist_ok=exist_ok)
56 def rmdir(self, path):
57 path = self._strip_protocol(path)
58 os.rmdir(path)
60 def ls(self, path, detail=False, **kwargs):
61 path = self._strip_protocol(path)
62 if detail:
63 with os.scandir(path) as it:
64 return [self.info(f) for f in it]
65 else:
66 return [posixpath.join(path, f) for f in os.listdir(path)]
68 def info(self, path, **kwargs):
69 if isinstance(path, os.DirEntry):
70 # scandir DirEntry
71 out = path.stat(follow_symlinks=False)
72 link = path.is_symlink()
73 if path.is_dir(follow_symlinks=False):
74 t = "directory"
75 elif path.is_file(follow_symlinks=False):
76 t = "file"
77 else:
78 t = "other"
79 path = self._strip_protocol(path.path)
80 else:
81 # str or path-like
82 path = self._strip_protocol(path)
83 out = os.stat(path, follow_symlinks=False)
84 link = stat.S_ISLNK(out.st_mode)
85 if link:
86 out = os.stat(path, follow_symlinks=True)
87 if stat.S_ISDIR(out.st_mode):
88 t = "directory"
89 elif stat.S_ISREG(out.st_mode):
90 t = "file"
91 else:
92 t = "other"
93 result = {
94 "name": path,
95 "size": out.st_size,
96 "type": t,
97 "created": out.st_ctime,
98 "islink": link,
99 }
100 for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
101 result[field] = getattr(out, f"st_{field}")
102 if result["islink"]:
103 result["destination"] = os.readlink(path)
104 try:
105 out2 = os.stat(path, follow_symlinks=True)
106 result["size"] = out2.st_size
107 except OSError:
108 result["size"] = 0
109 return result
111 def lexists(self, path, **kwargs):
112 return osp.lexists(path)
114 def cp_file(self, path1, path2, **kwargs):
115 path1 = self._strip_protocol(path1).rstrip("/")
116 path2 = self._strip_protocol(path2).rstrip("/")
117 if self.auto_mkdir:
118 self.makedirs(self._parent(path2), exist_ok=True)
119 if self.isfile(path1):
120 shutil.copyfile(path1, path2)
121 elif self.isdir(path1):
122 self.mkdirs(path2, exist_ok=True)
123 else:
124 raise FileNotFoundError(path1)
126 def get_file(self, path1, path2, callback=None, **kwargs):
127 if isfilelike(path2):
128 with open(path1, "rb") as f:
129 shutil.copyfileobj(f, path2)
130 else:
131 return self.cp_file(path1, path2, **kwargs)
133 def put_file(self, path1, path2, callback=None, **kwargs):
134 return self.cp_file(path1, path2, **kwargs)
136 def mv_file(self, path1, path2, **kwargs):
137 path1 = self._strip_protocol(path1).rstrip("/")
138 path2 = self._strip_protocol(path2).rstrip("/")
139 shutil.move(path1, path2)
141 def link(self, src, dst, **kwargs):
142 src = self._strip_protocol(src)
143 dst = self._strip_protocol(dst)
144 os.link(src, dst, **kwargs)
146 def symlink(self, src, dst, **kwargs):
147 src = self._strip_protocol(src)
148 dst = self._strip_protocol(dst)
149 os.symlink(src, dst, **kwargs)
151 def islink(self, path) -> bool:
152 return os.path.islink(self._strip_protocol(path))
154 def rm_file(self, path):
155 os.remove(self._strip_protocol(path))
157 def rm(self, path, recursive=False, maxdepth=None):
158 if not isinstance(path, list):
159 path = [path]
161 for p in path:
162 p = self._strip_protocol(p).rstrip("/")
163 if self.isdir(p):
164 if not recursive:
165 raise ValueError("Cannot delete directory, set recursive=True")
166 if osp.abspath(p) == os.getcwd():
167 raise ValueError("Cannot delete current working directory")
168 shutil.rmtree(p)
169 else:
170 os.remove(p)
172 def unstrip_protocol(self, name):
173 name = self._strip_protocol(name) # normalise for local/win/...
174 return f"file://{name}"
176 def _open(self, path, mode="rb", block_size=None, **kwargs):
177 path = self._strip_protocol(path)
178 if self.auto_mkdir and "w" in mode:
179 self.makedirs(self._parent(path), exist_ok=True)
180 return LocalFileOpener(path, mode, fs=self, **kwargs)
182 def touch(self, path, truncate=True, **kwargs):
183 path = self._strip_protocol(path)
184 if self.auto_mkdir:
185 self.makedirs(self._parent(path), exist_ok=True)
186 if self.exists(path):
187 os.utime(path, None)
188 else:
189 open(path, "a").close()
190 if truncate:
191 os.truncate(path, 0)
193 def created(self, path):
194 info = self.info(path=path)
195 return datetime.datetime.fromtimestamp(
196 info["created"], tz=datetime.timezone.utc
197 )
199 def modified(self, path):
200 info = self.info(path=path)
201 return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc)
203 @classmethod
204 def _parent(cls, path):
205 path = cls._strip_protocol(path).rstrip("/")
206 if "/" in path:
207 return path.rsplit("/", 1)[0]
208 else:
209 return cls.root_marker
211 @classmethod
212 def _strip_protocol(cls, path):
213 path = stringify_path(path)
214 if path.startswith("file://"):
215 path = path[7:]
216 elif path.startswith("file:"):
217 path = path[5:]
218 elif path.startswith("local://"):
219 path = path[8:]
220 elif path.startswith("local:"):
221 path = path[6:]
222 return make_path_posix(path).rstrip("/") or cls.root_marker
224 def _isfilestore(self):
225 # Inheriting from DaskFileSystem makes this False (S3, etc. were)
226 # the original motivation. But we are a posix-like file system.
227 # See https://github.com/dask/dask/issues/5526
228 return True
230 def chmod(self, path, mode):
231 path = stringify_path(path)
232 return os.chmod(path, mode)
235def make_path_posix(path, sep=os.sep):
236 """Make path generic"""
237 if isinstance(path, (list, set, tuple)):
238 return type(path)(make_path_posix(p) for p in path)
239 if "~" in path:
240 path = osp.expanduser(path)
241 if sep == "/":
242 # most common fast case for posix
243 if path.startswith("/"):
244 return path
245 if path.startswith("./"):
246 path = path[2:]
247 return f"{os.getcwd()}/{path}"
248 if (
249 (sep not in path and "/" not in path)
250 or (sep == "/" and not path.startswith("/"))
251 or (sep == "\\" and ":" not in path and not path.startswith("\\\\"))
252 ):
253 # relative path like "path" or "rel\\path" (win) or rel/path"
254 if os.sep == "\\":
255 # abspath made some more '\\' separators
256 return make_path_posix(osp.abspath(path))
257 else:
258 return f"{os.getcwd()}/{path}"
259 if path.startswith("file://"):
260 path = path[7:]
261 if re.match("/[A-Za-z]:", path):
262 # for windows file URI like "file:///C:/folder/file"
263 # or "file:///C:\\dir\\file"
264 path = path[1:].replace("\\", "/").replace("//", "/")
265 if path.startswith("\\\\"):
266 # special case for windows UNC/DFS-style paths, do nothing,
267 # just flip the slashes around (case below does not work!)
268 return path.replace("\\", "/")
269 if re.match("[A-Za-z]:", path):
270 # windows full path like "C:\\local\\path"
271 return path.lstrip("\\").replace("\\", "/").replace("//", "/")
272 if path.startswith("\\"):
273 # windows network path like "\\server\\path"
274 return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/")
275 return path
278def trailing_sep(path):
279 """Return True if the path ends with a path separator.
281 A forward slash is always considered a path separator, even on Operating
282 Systems that normally use a backslash.
283 """
284 # TODO: if all incoming paths were posix-compliant then separator would
285 # always be a forward slash, simplifying this function.
286 # See https://github.com/fsspec/filesystem_spec/pull/1250
287 return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
290class LocalFileOpener(io.IOBase):
291 def __init__(
292 self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
293 ):
294 logger.debug("open file: %s", path)
295 self.path = path
296 self.mode = mode
297 self.fs = fs
298 self.f = None
299 self.autocommit = autocommit
300 self.compression = get_compression(path, compression)
301 self.blocksize = io.DEFAULT_BUFFER_SIZE
302 self._open()
304 def _open(self):
305 if self.f is None or self.f.closed:
306 if self.autocommit or "w" not in self.mode:
307 self.f = open(self.path, mode=self.mode)
308 if self.compression:
309 compress = compr[self.compression]
310 self.f = compress(self.f, mode=self.mode)
311 else:
312 # TODO: check if path is writable?
313 i, name = tempfile.mkstemp()
314 os.close(i) # we want normal open and normal buffered file
315 self.temp = name
316 self.f = open(name, mode=self.mode)
317 if "w" not in self.mode:
318 self.size = self.f.seek(0, 2)
319 self.f.seek(0)
320 self.f.size = self.size
322 def _fetch_range(self, start, end):
323 # probably only used by cached FS
324 if "r" not in self.mode:
325 raise ValueError
326 self._open()
327 self.f.seek(start)
328 return self.f.read(end - start)
330 def __setstate__(self, state):
331 self.f = None
332 loc = state.pop("loc", None)
333 self.__dict__.update(state)
334 if "r" in state["mode"]:
335 self.f = None
336 self._open()
337 self.f.seek(loc)
339 def __getstate__(self):
340 d = self.__dict__.copy()
341 d.pop("f")
342 if "r" in self.mode:
343 d["loc"] = self.f.tell()
344 else:
345 if not self.f.closed:
346 raise ValueError("Cannot serialise open write-mode local file")
347 return d
349 def commit(self):
350 if self.autocommit:
351 raise RuntimeError("Can only commit if not already set to autocommit")
352 shutil.move(self.temp, self.path)
354 def discard(self):
355 if self.autocommit:
356 raise RuntimeError("Cannot discard if set to autocommit")
357 os.remove(self.temp)
359 def readable(self) -> bool:
360 return True
362 def writable(self) -> bool:
363 return "r" not in self.mode
365 def read(self, *args, **kwargs):
366 return self.f.read(*args, **kwargs)
368 def write(self, *args, **kwargs):
369 return self.f.write(*args, **kwargs)
371 def tell(self, *args, **kwargs):
372 return self.f.tell(*args, **kwargs)
374 def seek(self, *args, **kwargs):
375 return self.f.seek(*args, **kwargs)
377 def seekable(self, *args, **kwargs):
378 return self.f.seekable(*args, **kwargs)
380 def readline(self, *args, **kwargs):
381 return self.f.readline(*args, **kwargs)
383 def readlines(self, *args, **kwargs):
384 return self.f.readlines(*args, **kwargs)
386 def close(self):
387 return self.f.close()
389 def truncate(self, size=None) -> int:
390 return self.f.truncate(size)
392 @property
393 def closed(self):
394 return self.f.closed
396 def fileno(self):
397 return self.raw.fileno()
399 def flush(self) -> None:
400 self.f.flush()
402 def __iter__(self):
403 return self.f.__iter__()
405 def __getattr__(self, item):
406 return getattr(self.f, item)
408 def __enter__(self):
409 self._incontext = True
410 return self
412 def __exit__(self, exc_type, exc_value, traceback):
413 self._incontext = False
414 self.f.__exit__(exc_type, exc_value, traceback)