Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/fsspec/implementations/local.py: 26%

291 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:40 +0000

1import datetime 

2import io 

3import logging 

4import os 

5import os.path as osp 

6import posixpath 

7import re 

8import shutil 

9import stat 

10import tempfile 

11 

12from fsspec import AbstractFileSystem 

13from fsspec.compression import compr 

14from fsspec.core import get_compression 

15from fsspec.utils import isfilelike, stringify_path 

16 

17logger = logging.getLogger("fsspec.local") 

18 

19 

20class LocalFileSystem(AbstractFileSystem): 

21 """Interface to files on local storage 

22 

23 Parameters 

24 ---------- 

25 auto_mkdir: bool 

26 Whether, when opening a file, the directory containing it should 

27 be created (if it doesn't already exist). This is assumed by pyarrow 

28 code. 

29 """ 

30 

31 root_marker = "/" 

32 protocol = "file", "local" 

33 local_file = True 

34 

35 def __init__(self, auto_mkdir=False, **kwargs): 

36 super().__init__(**kwargs) 

37 self.auto_mkdir = auto_mkdir 

38 

39 @property 

40 def fsid(self): 

41 return "local" 

42 

43 def mkdir(self, path, create_parents=True, **kwargs): 

44 path = self._strip_protocol(path) 

45 if self.exists(path): 

46 raise FileExistsError(path) 

47 if create_parents: 

48 self.makedirs(path, exist_ok=True) 

49 else: 

50 os.mkdir(path, **kwargs) 

51 

52 def makedirs(self, path, exist_ok=False): 

53 path = self._strip_protocol(path) 

54 os.makedirs(path, exist_ok=exist_ok) 

55 

56 def rmdir(self, path): 

57 path = self._strip_protocol(path) 

58 os.rmdir(path) 

59 

60 def ls(self, path, detail=False, **kwargs): 

61 path = self._strip_protocol(path) 

62 if detail: 

63 with os.scandir(path) as it: 

64 return [self.info(f) for f in it] 

65 else: 

66 return [posixpath.join(path, f) for f in os.listdir(path)] 

67 

68 def info(self, path, **kwargs): 

69 if isinstance(path, os.DirEntry): 

70 # scandir DirEntry 

71 out = path.stat(follow_symlinks=False) 

72 link = path.is_symlink() 

73 if path.is_dir(follow_symlinks=False): 

74 t = "directory" 

75 elif path.is_file(follow_symlinks=False): 

76 t = "file" 

77 else: 

78 t = "other" 

79 path = self._strip_protocol(path.path) 

80 else: 

81 # str or path-like 

82 path = self._strip_protocol(path) 

83 out = os.stat(path, follow_symlinks=False) 

84 link = stat.S_ISLNK(out.st_mode) 

85 if link: 

86 out = os.stat(path, follow_symlinks=True) 

87 if stat.S_ISDIR(out.st_mode): 

88 t = "directory" 

89 elif stat.S_ISREG(out.st_mode): 

90 t = "file" 

91 else: 

92 t = "other" 

93 result = { 

94 "name": path, 

95 "size": out.st_size, 

96 "type": t, 

97 "created": out.st_ctime, 

98 "islink": link, 

99 } 

100 for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]: 

101 result[field] = getattr(out, f"st_{field}") 

102 if result["islink"]: 

103 result["destination"] = os.readlink(path) 

104 try: 

105 out2 = os.stat(path, follow_symlinks=True) 

106 result["size"] = out2.st_size 

107 except OSError: 

108 result["size"] = 0 

109 return result 

110 

111 def lexists(self, path, **kwargs): 

112 return osp.lexists(path) 

113 

114 def cp_file(self, path1, path2, **kwargs): 

115 path1 = self._strip_protocol(path1).rstrip("/") 

116 path2 = self._strip_protocol(path2).rstrip("/") 

117 if self.auto_mkdir: 

118 self.makedirs(self._parent(path2), exist_ok=True) 

119 if self.isfile(path1): 

120 shutil.copyfile(path1, path2) 

121 elif self.isdir(path1): 

122 self.mkdirs(path2, exist_ok=True) 

123 else: 

124 raise FileNotFoundError(path1) 

125 

126 def get_file(self, path1, path2, callback=None, **kwargs): 

127 if isfilelike(path2): 

128 with open(path1, "rb") as f: 

129 shutil.copyfileobj(f, path2) 

130 else: 

131 return self.cp_file(path1, path2, **kwargs) 

132 

133 def put_file(self, path1, path2, callback=None, **kwargs): 

134 return self.cp_file(path1, path2, **kwargs) 

135 

136 def mv_file(self, path1, path2, **kwargs): 

137 path1 = self._strip_protocol(path1).rstrip("/") 

138 path2 = self._strip_protocol(path2).rstrip("/") 

139 shutil.move(path1, path2) 

140 

141 def link(self, src, dst, **kwargs): 

142 src = self._strip_protocol(src) 

143 dst = self._strip_protocol(dst) 

144 os.link(src, dst, **kwargs) 

145 

146 def symlink(self, src, dst, **kwargs): 

147 src = self._strip_protocol(src) 

148 dst = self._strip_protocol(dst) 

149 os.symlink(src, dst, **kwargs) 

150 

151 def islink(self, path) -> bool: 

152 return os.path.islink(self._strip_protocol(path)) 

153 

154 def rm_file(self, path): 

155 os.remove(self._strip_protocol(path)) 

156 

157 def rm(self, path, recursive=False, maxdepth=None): 

158 if not isinstance(path, list): 

159 path = [path] 

160 

161 for p in path: 

162 p = self._strip_protocol(p).rstrip("/") 

163 if self.isdir(p): 

164 if not recursive: 

165 raise ValueError("Cannot delete directory, set recursive=True") 

166 if osp.abspath(p) == os.getcwd(): 

167 raise ValueError("Cannot delete current working directory") 

168 shutil.rmtree(p) 

169 else: 

170 os.remove(p) 

171 

172 def unstrip_protocol(self, name): 

173 name = self._strip_protocol(name) # normalise for local/win/... 

174 return f"file://{name}" 

175 

176 def _open(self, path, mode="rb", block_size=None, **kwargs): 

177 path = self._strip_protocol(path) 

178 if self.auto_mkdir and "w" in mode: 

179 self.makedirs(self._parent(path), exist_ok=True) 

180 return LocalFileOpener(path, mode, fs=self, **kwargs) 

181 

182 def touch(self, path, truncate=True, **kwargs): 

183 path = self._strip_protocol(path) 

184 if self.auto_mkdir: 

185 self.makedirs(self._parent(path), exist_ok=True) 

186 if self.exists(path): 

187 os.utime(path, None) 

188 else: 

189 open(path, "a").close() 

190 if truncate: 

191 os.truncate(path, 0) 

192 

193 def created(self, path): 

194 info = self.info(path=path) 

195 return datetime.datetime.fromtimestamp( 

196 info["created"], tz=datetime.timezone.utc 

197 ) 

198 

199 def modified(self, path): 

200 info = self.info(path=path) 

201 return datetime.datetime.fromtimestamp(info["mtime"], tz=datetime.timezone.utc) 

202 

203 @classmethod 

204 def _parent(cls, path): 

205 path = cls._strip_protocol(path).rstrip("/") 

206 if "/" in path: 

207 return path.rsplit("/", 1)[0] 

208 else: 

209 return cls.root_marker 

210 

211 @classmethod 

212 def _strip_protocol(cls, path): 

213 path = stringify_path(path) 

214 if path.startswith("file://"): 

215 path = path[7:] 

216 elif path.startswith("file:"): 

217 path = path[5:] 

218 elif path.startswith("local://"): 

219 path = path[8:] 

220 elif path.startswith("local:"): 

221 path = path[6:] 

222 return make_path_posix(path).rstrip("/") or cls.root_marker 

223 

224 def _isfilestore(self): 

225 # Inheriting from DaskFileSystem makes this False (S3, etc. were) 

226 # the original motivation. But we are a posix-like file system. 

227 # See https://github.com/dask/dask/issues/5526 

228 return True 

229 

230 def chmod(self, path, mode): 

231 path = stringify_path(path) 

232 return os.chmod(path, mode) 

233 

234 

235def make_path_posix(path, sep=os.sep): 

236 """Make path generic""" 

237 if isinstance(path, (list, set, tuple)): 

238 return type(path)(make_path_posix(p) for p in path) 

239 if "~" in path: 

240 path = osp.expanduser(path) 

241 if sep == "/": 

242 # most common fast case for posix 

243 if path.startswith("/"): 

244 return path 

245 if path.startswith("./"): 

246 path = path[2:] 

247 return f"{os.getcwd()}/{path}" 

248 if ( 

249 (sep not in path and "/" not in path) 

250 or (sep == "/" and not path.startswith("/")) 

251 or (sep == "\\" and ":" not in path and not path.startswith("\\\\")) 

252 ): 

253 # relative path like "path" or "rel\\path" (win) or rel/path" 

254 if os.sep == "\\": 

255 # abspath made some more '\\' separators 

256 return make_path_posix(osp.abspath(path)) 

257 else: 

258 return f"{os.getcwd()}/{path}" 

259 if path.startswith("file://"): 

260 path = path[7:] 

261 if re.match("/[A-Za-z]:", path): 

262 # for windows file URI like "file:///C:/folder/file" 

263 # or "file:///C:\\dir\\file" 

264 path = path[1:].replace("\\", "/").replace("//", "/") 

265 if path.startswith("\\\\"): 

266 # special case for windows UNC/DFS-style paths, do nothing, 

267 # just flip the slashes around (case below does not work!) 

268 return path.replace("\\", "/") 

269 if re.match("[A-Za-z]:", path): 

270 # windows full path like "C:\\local\\path" 

271 return path.lstrip("\\").replace("\\", "/").replace("//", "/") 

272 if path.startswith("\\"): 

273 # windows network path like "\\server\\path" 

274 return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/") 

275 return path 

276 

277 

278def trailing_sep(path): 

279 """Return True if the path ends with a path separator. 

280 

281 A forward slash is always considered a path separator, even on Operating 

282 Systems that normally use a backslash. 

283 """ 

284 # TODO: if all incoming paths were posix-compliant then separator would 

285 # always be a forward slash, simplifying this function. 

286 # See https://github.com/fsspec/filesystem_spec/pull/1250 

287 return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep)) 

288 

289 

290class LocalFileOpener(io.IOBase): 

291 def __init__( 

292 self, path, mode, autocommit=True, fs=None, compression=None, **kwargs 

293 ): 

294 logger.debug("open file: %s", path) 

295 self.path = path 

296 self.mode = mode 

297 self.fs = fs 

298 self.f = None 

299 self.autocommit = autocommit 

300 self.compression = get_compression(path, compression) 

301 self.blocksize = io.DEFAULT_BUFFER_SIZE 

302 self._open() 

303 

304 def _open(self): 

305 if self.f is None or self.f.closed: 

306 if self.autocommit or "w" not in self.mode: 

307 self.f = open(self.path, mode=self.mode) 

308 if self.compression: 

309 compress = compr[self.compression] 

310 self.f = compress(self.f, mode=self.mode) 

311 else: 

312 # TODO: check if path is writable? 

313 i, name = tempfile.mkstemp() 

314 os.close(i) # we want normal open and normal buffered file 

315 self.temp = name 

316 self.f = open(name, mode=self.mode) 

317 if "w" not in self.mode: 

318 self.size = self.f.seek(0, 2) 

319 self.f.seek(0) 

320 self.f.size = self.size 

321 

322 def _fetch_range(self, start, end): 

323 # probably only used by cached FS 

324 if "r" not in self.mode: 

325 raise ValueError 

326 self._open() 

327 self.f.seek(start) 

328 return self.f.read(end - start) 

329 

330 def __setstate__(self, state): 

331 self.f = None 

332 loc = state.pop("loc", None) 

333 self.__dict__.update(state) 

334 if "r" in state["mode"]: 

335 self.f = None 

336 self._open() 

337 self.f.seek(loc) 

338 

339 def __getstate__(self): 

340 d = self.__dict__.copy() 

341 d.pop("f") 

342 if "r" in self.mode: 

343 d["loc"] = self.f.tell() 

344 else: 

345 if not self.f.closed: 

346 raise ValueError("Cannot serialise open write-mode local file") 

347 return d 

348 

349 def commit(self): 

350 if self.autocommit: 

351 raise RuntimeError("Can only commit if not already set to autocommit") 

352 shutil.move(self.temp, self.path) 

353 

354 def discard(self): 

355 if self.autocommit: 

356 raise RuntimeError("Cannot discard if set to autocommit") 

357 os.remove(self.temp) 

358 

359 def readable(self) -> bool: 

360 return True 

361 

362 def writable(self) -> bool: 

363 return "r" not in self.mode 

364 

365 def read(self, *args, **kwargs): 

366 return self.f.read(*args, **kwargs) 

367 

368 def write(self, *args, **kwargs): 

369 return self.f.write(*args, **kwargs) 

370 

371 def tell(self, *args, **kwargs): 

372 return self.f.tell(*args, **kwargs) 

373 

374 def seek(self, *args, **kwargs): 

375 return self.f.seek(*args, **kwargs) 

376 

377 def seekable(self, *args, **kwargs): 

378 return self.f.seekable(*args, **kwargs) 

379 

380 def readline(self, *args, **kwargs): 

381 return self.f.readline(*args, **kwargs) 

382 

383 def readlines(self, *args, **kwargs): 

384 return self.f.readlines(*args, **kwargs) 

385 

386 def close(self): 

387 return self.f.close() 

388 

389 def truncate(self, size=None) -> int: 

390 return self.f.truncate(size) 

391 

392 @property 

393 def closed(self): 

394 return self.f.closed 

395 

396 def fileno(self): 

397 return self.raw.fileno() 

398 

399 def flush(self) -> None: 

400 self.f.flush() 

401 

402 def __iter__(self): 

403 return self.f.__iter__() 

404 

405 def __getattr__(self, item): 

406 return getattr(self.f, item) 

407 

408 def __enter__(self): 

409 self._incontext = True 

410 return self 

411 

412 def __exit__(self, exc_type, exc_value, traceback): 

413 self._incontext = False 

414 self.f.__exit__(exc_type, exc_value, traceback)