Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/fsspec/implementations/local.py: 27%

290 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:56 +0000

1import datetime 

2import io 

3import logging 

4import os 

5import os.path as osp 

6import posixpath 

7import re 

8import shutil 

9import stat 

10import tempfile 

11 

12from fsspec import AbstractFileSystem 

13from fsspec.compression import compr 

14from fsspec.core import get_compression 

15from fsspec.utils import isfilelike, stringify_path 

16 

17logger = logging.getLogger("fsspec.local") 

18 

19 

20class LocalFileSystem(AbstractFileSystem): 

21 """Interface to files on local storage 

22 

23 Parameters 

24 ---------- 

25 auto_mkdir: bool 

26 Whether, when opening a file, the directory containing it should 

27 be created (if it doesn't already exist). This is assumed by pyarrow 

28 code. 

29 """ 

30 

31 root_marker = "/" 

32 protocol = "file" 

33 local_file = True 

34 

35 def __init__(self, auto_mkdir=False, **kwargs): 

36 super().__init__(**kwargs) 

37 self.auto_mkdir = auto_mkdir 

38 

39 @property 

40 def fsid(self): 

41 return "local" 

42 

43 def mkdir(self, path, create_parents=True, **kwargs): 

44 path = self._strip_protocol(path) 

45 if self.exists(path): 

46 raise FileExistsError(path) 

47 if create_parents: 

48 self.makedirs(path, exist_ok=True) 

49 else: 

50 os.mkdir(path, **kwargs) 

51 

52 def makedirs(self, path, exist_ok=False): 

53 path = self._strip_protocol(path) 

54 os.makedirs(path, exist_ok=exist_ok) 

55 

56 def rmdir(self, path): 

57 path = self._strip_protocol(path) 

58 os.rmdir(path) 

59 

60 def ls(self, path, detail=False, **kwargs): 

61 path = self._strip_protocol(path) 

62 if detail: 

63 with os.scandir(path) as it: 

64 return [self.info(f) for f in it] 

65 else: 

66 return [posixpath.join(path, f) for f in os.listdir(path)] 

67 

68 def glob(self, path, **kwargs): 

69 path = self._strip_protocol(path) 

70 return super().glob(path, **kwargs) 

71 

72 def info(self, path, **kwargs): 

73 if isinstance(path, os.DirEntry): 

74 # scandir DirEntry 

75 out = path.stat(follow_symlinks=False) 

76 link = path.is_symlink() 

77 if path.is_dir(follow_symlinks=False): 

78 t = "directory" 

79 elif path.is_file(follow_symlinks=False): 

80 t = "file" 

81 else: 

82 t = "other" 

83 path = self._strip_protocol(path.path) 

84 else: 

85 # str or path-like 

86 path = self._strip_protocol(path) 

87 out = os.stat(path, follow_symlinks=False) 

88 link = stat.S_ISLNK(out.st_mode) 

89 if link: 

90 out = os.stat(path, follow_symlinks=True) 

91 if stat.S_ISDIR(out.st_mode): 

92 t = "directory" 

93 elif stat.S_ISREG(out.st_mode): 

94 t = "file" 

95 else: 

96 t = "other" 

97 result = { 

98 "name": path, 

99 "size": out.st_size, 

100 "type": t, 

101 "created": out.st_ctime, 

102 "islink": link, 

103 } 

104 for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]: 

105 result[field] = getattr(out, "st_" + field) 

106 if result["islink"]: 

107 result["destination"] = os.readlink(path) 

108 try: 

109 out2 = os.stat(path, follow_symlinks=True) 

110 result["size"] = out2.st_size 

111 except OSError: 

112 result["size"] = 0 

113 return result 

114 

115 def lexists(self, path, **kwargs): 

116 return osp.lexists(path) 

117 

118 def cp_file(self, path1, path2, **kwargs): 

119 path1 = self._strip_protocol(path1).rstrip("/") 

120 path2 = self._strip_protocol(path2).rstrip("/") 

121 if self.auto_mkdir: 

122 self.makedirs(self._parent(path2), exist_ok=True) 

123 if self.isfile(path1): 

124 shutil.copyfile(path1, path2) 

125 elif self.isdir(path1): 

126 self.mkdirs(path2, exist_ok=True) 

127 else: 

128 raise FileNotFoundError(path1) 

129 

130 def get_file(self, path1, path2, callback=None, **kwargs): 

131 if isfilelike(path2): 

132 with open(path1, "rb") as f: 

133 shutil.copyfileobj(f, path2) 

134 else: 

135 return self.cp_file(path1, path2, **kwargs) 

136 

137 def put_file(self, path1, path2, callback=None, **kwargs): 

138 return self.cp_file(path1, path2, **kwargs) 

139 

140 def mv_file(self, path1, path2, **kwargs): 

141 path1 = self._strip_protocol(path1).rstrip("/") 

142 path2 = self._strip_protocol(path2).rstrip("/") 

143 shutil.move(path1, path2) 

144 

145 def link(self, src, dst, **kwargs): 

146 src = self._strip_protocol(src) 

147 dst = self._strip_protocol(dst) 

148 os.link(src, dst, **kwargs) 

149 

150 def symlink(self, src, dst, **kwargs): 

151 src = self._strip_protocol(src) 

152 dst = self._strip_protocol(dst) 

153 os.symlink(src, dst, **kwargs) 

154 

155 def islink(self, path) -> bool: 

156 return os.path.islink(self._strip_protocol(path)) 

157 

158 def rm_file(self, path): 

159 os.remove(self._strip_protocol(path)) 

160 

161 def rm(self, path, recursive=False, maxdepth=None): 

162 if not isinstance(path, list): 

163 path = [path] 

164 

165 for p in path: 

166 p = self._strip_protocol(p).rstrip("/") 

167 if self.isdir(p): 

168 if not recursive: 

169 raise ValueError("Cannot delete directory, set recursive=True") 

170 if osp.abspath(p) == os.getcwd(): 

171 raise ValueError("Cannot delete current working directory") 

172 shutil.rmtree(p) 

173 else: 

174 os.remove(p) 

175 

176 def unstrip_protocol(self, name): 

177 name = self._strip_protocol(name) # normalise for local/win/... 

178 return f"file://{name}" 

179 

180 def _open(self, path, mode="rb", block_size=None, **kwargs): 

181 path = self._strip_protocol(path) 

182 if self.auto_mkdir and "w" in mode: 

183 self.makedirs(self._parent(path), exist_ok=True) 

184 return LocalFileOpener(path, mode, fs=self, **kwargs) 

185 

186 def touch(self, path, truncate=True, **kwargs): 

187 path = self._strip_protocol(path) 

188 if self.auto_mkdir: 

189 self.makedirs(self._parent(path), exist_ok=True) 

190 if self.exists(path): 

191 os.utime(path, None) 

192 else: 

193 open(path, "a").close() 

194 if truncate: 

195 os.truncate(path, 0) 

196 

197 def created(self, path): 

198 info = self.info(path=path) 

199 return datetime.datetime.utcfromtimestamp(info["created"]) 

200 

201 def modified(self, path): 

202 info = self.info(path=path) 

203 return datetime.datetime.utcfromtimestamp(info["mtime"]) 

204 

205 @classmethod 

206 def _parent(cls, path): 

207 path = cls._strip_protocol(path).rstrip("/") 

208 if "/" in path: 

209 return path.rsplit("/", 1)[0] 

210 else: 

211 return cls.root_marker 

212 

213 @classmethod 

214 def _strip_protocol(cls, path): 

215 path = stringify_path(path) 

216 if path.startswith("file://"): 

217 path = path[7:] 

218 elif path.startswith("file:"): 

219 path = path[5:] 

220 return make_path_posix(path).rstrip("/") or cls.root_marker 

221 

222 def _isfilestore(self): 

223 # Inheriting from DaskFileSystem makes this False (S3, etc. were) 

224 # the original motivation. But we are a posix-like file system. 

225 # See https://github.com/dask/dask/issues/5526 

226 return True 

227 

228 def chmod(self, path, mode): 

229 path = stringify_path(path) 

230 return os.chmod(path, mode) 

231 

232 

233def make_path_posix(path, sep=os.sep): 

234 """Make path generic""" 

235 if isinstance(path, (list, set, tuple)): 

236 return type(path)(make_path_posix(p) for p in path) 

237 if "~" in path: 

238 path = osp.expanduser(path) 

239 if sep == "/": 

240 # most common fast case for posix 

241 if path.startswith("/"): 

242 return path 

243 if path.startswith("./"): 

244 path = path[2:] 

245 return os.getcwd() + "/" + path 

246 if ( 

247 (sep not in path and "/" not in path) 

248 or (sep == "/" and not path.startswith("/")) 

249 or (sep == "\\" and ":" not in path and not path.startswith("\\\\")) 

250 ): 

251 # relative path like "path" or "rel\\path" (win) or rel/path" 

252 if os.sep == "\\": 

253 # abspath made some more '\\' separators 

254 return make_path_posix(osp.abspath(path)) 

255 else: 

256 return os.getcwd() + "/" + path 

257 if path.startswith("file://"): 

258 path = path[7:] 

259 if re.match("/[A-Za-z]:", path): 

260 # for windows file URI like "file:///C:/folder/file" 

261 # or "file:///C:\\dir\\file" 

262 path = path[1:].replace("\\", "/").replace("//", "/") 

263 if path.startswith("\\\\"): 

264 # special case for windows UNC/DFS-style paths, do nothing, 

265 # just flip the slashes around (case below does not work!) 

266 return path.replace("\\", "/") 

267 if re.match("[A-Za-z]:", path): 

268 # windows full path like "C:\\local\\path" 

269 return path.lstrip("\\").replace("\\", "/").replace("//", "/") 

270 if path.startswith("\\"): 

271 # windows network path like "\\server\\path" 

272 return "/" + path.lstrip("\\").replace("\\", "/").replace("//", "/") 

273 return path 

274 

275 

276def trailing_sep(path): 

277 """Return True if the path ends with a path separator. 

278 

279 A forward slash is always considered a path separator, even on Operating 

280 Systems that normally use a backslash. 

281 """ 

282 # TODO: if all incoming paths were posix-compliant then separator would 

283 # always be a forward slash, simplifying this function. 

284 # See https://github.com/fsspec/filesystem_spec/pull/1250 

285 return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep)) 

286 

287 

288def trailing_sep_maybe_asterisk(path): 

289 """Return True if the path ends with a path separator and optionally an 

290 asterisk. 

291 

292 A forward slash is always considered a path separator, even on Operating 

293 Systems that normally use a backslash. 

294 """ 

295 # TODO: if all incoming paths were posix-compliant then separator would 

296 # always be a forward slash, simplifying this function. 

297 # See https://github.com/fsspec/filesystem_spec/pull/1250 

298 return path.endswith((os.sep, os.sep + "*")) or ( 

299 os.altsep is not None and path.endswith((os.altsep, os.altsep + "*")) 

300 ) 

301 

302 

303class LocalFileOpener(io.IOBase): 

304 def __init__( 

305 self, path, mode, autocommit=True, fs=None, compression=None, **kwargs 

306 ): 

307 logger.debug("open file: %s", path) 

308 self.path = path 

309 self.mode = mode 

310 self.fs = fs 

311 self.f = None 

312 self.autocommit = autocommit 

313 self.compression = get_compression(path, compression) 

314 self.blocksize = io.DEFAULT_BUFFER_SIZE 

315 self._open() 

316 

317 def _open(self): 

318 if self.f is None or self.f.closed: 

319 if self.autocommit or "w" not in self.mode: 

320 self.f = open(self.path, mode=self.mode) 

321 if self.compression: 

322 compress = compr[self.compression] 

323 self.f = compress(self.f, mode=self.mode) 

324 else: 

325 # TODO: check if path is writable? 

326 i, name = tempfile.mkstemp() 

327 os.close(i) # we want normal open and normal buffered file 

328 self.temp = name 

329 self.f = open(name, mode=self.mode) 

330 if "w" not in self.mode: 

331 self.size = self.f.seek(0, 2) 

332 self.f.seek(0) 

333 self.f.size = self.size 

334 

335 def _fetch_range(self, start, end): 

336 # probably only used by cached FS 

337 if "r" not in self.mode: 

338 raise ValueError 

339 self._open() 

340 self.f.seek(start) 

341 return self.f.read(end - start) 

342 

343 def __setstate__(self, state): 

344 self.f = None 

345 loc = state.pop("loc", None) 

346 self.__dict__.update(state) 

347 if "r" in state["mode"]: 

348 self.f = None 

349 self._open() 

350 self.f.seek(loc) 

351 

352 def __getstate__(self): 

353 d = self.__dict__.copy() 

354 d.pop("f") 

355 if "r" in self.mode: 

356 d["loc"] = self.f.tell() 

357 else: 

358 if not self.f.closed: 

359 raise ValueError("Cannot serialise open write-mode local file") 

360 return d 

361 

362 def commit(self): 

363 if self.autocommit: 

364 raise RuntimeError("Can only commit if not already set to autocommit") 

365 shutil.move(self.temp, self.path) 

366 

367 def discard(self): 

368 if self.autocommit: 

369 raise RuntimeError("Cannot discard if set to autocommit") 

370 os.remove(self.temp) 

371 

372 def readable(self) -> bool: 

373 return True 

374 

375 def writable(self) -> bool: 

376 return "r" not in self.mode 

377 

378 def read(self, *args, **kwargs): 

379 return self.f.read(*args, **kwargs) 

380 

381 def write(self, *args, **kwargs): 

382 return self.f.write(*args, **kwargs) 

383 

384 def tell(self, *args, **kwargs): 

385 return self.f.tell(*args, **kwargs) 

386 

387 def seek(self, *args, **kwargs): 

388 return self.f.seek(*args, **kwargs) 

389 

390 def seekable(self, *args, **kwargs): 

391 return self.f.seekable(*args, **kwargs) 

392 

393 def readline(self, *args, **kwargs): 

394 return self.f.readline(*args, **kwargs) 

395 

396 def readlines(self, *args, **kwargs): 

397 return self.f.readlines(*args, **kwargs) 

398 

399 def close(self): 

400 return self.f.close() 

401 

402 @property 

403 def closed(self): 

404 return self.f.closed 

405 

406 def fileno(self): 

407 return self.raw.fileno() 

408 

409 def flush(self) -> None: 

410 self.f.flush() 

411 

412 def __iter__(self): 

413 return self.f.__iter__() 

414 

415 def __getattr__(self, item): 

416 return getattr(self.f, item) 

417 

418 def __enter__(self): 

419 self._incontext = True 

420 return self 

421 

422 def __exit__(self, exc_type, exc_value, traceback): 

423 self._incontext = False 

424 self.f.__exit__(exc_type, exc_value, traceback)