Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/fsspec/spec.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

885 statements  

1from __future__ import annotations 

2 

3import io 

4import json 

5import logging 

6import os 

7import threading 

8import warnings 

9import weakref 

10from errno import ESPIPE 

11from glob import has_magic 

12from hashlib import sha256 

13from typing import Any, ClassVar 

14 

15from .callbacks import DEFAULT_CALLBACK 

16from .config import apply_config, conf 

17from .dircache import DirCache 

18from .transaction import Transaction 

19from .utils import ( 

20 _unstrip_protocol, 

21 glob_translate, 

22 isfilelike, 

23 other_paths, 

24 read_block, 

25 stringify_path, 

26 tokenize, 

27) 

28 

29logger = logging.getLogger("fsspec") 

30 

31 

32def make_instance(cls, args, kwargs): 

33 return cls(*args, **kwargs) 

34 

35 

36class _Cached(type): 

37 """ 

38 Metaclass for caching file system instances. 

39 

40 Notes 

41 ----- 

42 Instances are cached according to 

43 

44 * The values of the class attributes listed in `_extra_tokenize_attributes` 

45 * The arguments passed to ``__init__``. 

46 

47 This creates an additional reference to the filesystem, which prevents the 

48 filesystem from being garbage collected when all *user* references go away. 

49 A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also* 

50 be made for a filesystem instance to be garbage collected. 

51 """ 

52 

53 def __init__(cls, *args, **kwargs): 

54 super().__init__(*args, **kwargs) 

55 # Note: we intentionally create a reference here, to avoid garbage 

56 # collecting instances when all other references are gone. To really 

57 # delete a FileSystem, the cache must be cleared. 

58 if conf.get("weakref_instance_cache"): # pragma: no cover 

59 # debug option for analysing fork/spawn conditions 

60 cls._cache = weakref.WeakValueDictionary() 

61 else: 

62 cls._cache = {} 

63 cls._pid = os.getpid() 

64 

65 def __call__(cls, *args, **kwargs): 

66 kwargs = apply_config(cls, kwargs) 

67 extra_tokens = tuple( 

68 getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes 

69 ) 

70 strip_tokenize_options = { 

71 k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs 

72 } 

73 token = tokenize( 

74 cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs 

75 ) 

76 skip = kwargs.pop("skip_instance_cache", False) 

77 if os.getpid() != cls._pid: 

78 cls._cache.clear() 

79 cls._pid = os.getpid() 

80 if not skip and cls.cachable and token in cls._cache: 

81 cls._latest = token 

82 return cls._cache[token] 

83 else: 

84 obj = super().__call__(*args, **kwargs, **strip_tokenize_options) 

85 # Setting _fs_token here causes some static linters to complain. 

86 obj._fs_token_ = token 

87 obj.storage_args = args 

88 obj.storage_options = kwargs 

89 if obj.async_impl and obj.mirror_sync_methods: 

90 from .asyn import mirror_sync_methods 

91 

92 mirror_sync_methods(obj) 

93 

94 if cls.cachable and not skip: 

95 cls._latest = token 

96 cls._cache[token] = obj 

97 return obj 

98 

99 

100class AbstractFileSystem(metaclass=_Cached): 

101 """ 

102 An abstract super-class for pythonic file-systems 

103 

104 Implementations are expected to be compatible with or, better, subclass 

105 from here. 

106 """ 

107 

108 cachable = True # this class can be cached, instances reused 

109 _cached = False 

110 blocksize = 2**22 

111 sep = "/" 

112 protocol: ClassVar[str | tuple[str, ...]] = "abstract" 

113 _latest = None 

114 async_impl = False 

115 mirror_sync_methods = False 

116 root_marker = "" # For some FSs, may require leading '/' or other character 

117 transaction_type = Transaction 

118 

119 #: Extra *class attributes* that should be considered when hashing. 

120 _extra_tokenize_attributes = () 

121 #: *storage options* that should not be considered when hashing. 

122 _strip_tokenize_options = () 

123 

124 # Set by _Cached metaclass 

125 storage_args: tuple[Any, ...] 

126 storage_options: dict[str, Any] 

127 

128 def __init__(self, *args, **storage_options): 

129 """Create and configure file-system instance 

130 

131 Instances may be cachable, so if similar enough arguments are seen 

132 a new instance is not required. The token attribute exists to allow 

133 implementations to cache instances if they wish. 

134 

135 A reasonable default should be provided if there are no arguments. 

136 

137 Subclasses should call this method. 

138 

139 Parameters 

140 ---------- 

141 use_listings_cache, listings_expiry_time, max_paths: 

142 passed to ``DirCache``, if the implementation supports 

143 directory listing caching. Pass use_listings_cache=False 

144 to disable such caching. 

145 skip_instance_cache: bool 

146 If this is a cachable implementation, pass True here to force 

147 creating a new instance even if a matching instance exists, and prevent 

148 storing this instance. 

149 asynchronous: bool 

150 loop: asyncio-compatible IOLoop or None 

151 """ 

152 if self._cached: 

153 # reusing instance, don't change 

154 return 

155 self._cached = True 

156 self._intrans = False 

157 self._transaction = None 

158 self._invalidated_caches_in_transaction = [] 

159 self.dircache = DirCache(**storage_options) 

160 

161 if storage_options.pop("add_docs", None): 

162 warnings.warn("add_docs is no longer supported.", FutureWarning) 

163 

164 if storage_options.pop("add_aliases", None): 

165 warnings.warn("add_aliases has been removed.", FutureWarning) 

166 # This is set in _Cached 

167 self._fs_token_ = None 

168 

169 @property 

170 def fsid(self): 

171 """Persistent filesystem id that can be used to compare filesystems 

172 across sessions. 

173 """ 

174 raise NotImplementedError 

175 

176 @property 

177 def _fs_token(self): 

178 return self._fs_token_ 

179 

180 def __dask_tokenize__(self): 

181 return self._fs_token 

182 

183 def __hash__(self): 

184 return int(self._fs_token, 16) 

185 

186 def __eq__(self, other): 

187 return isinstance(other, type(self)) and self._fs_token == other._fs_token 

188 

189 def __reduce__(self): 

190 return make_instance, (type(self), self.storage_args, self.storage_options) 

191 

192 @classmethod 

193 def _strip_protocol(cls, path): 

194 """Turn path from fully-qualified to file-system-specific 

195 

196 May require FS-specific handling, e.g., for relative paths or links. 

197 """ 

198 if isinstance(path, list): 

199 return [cls._strip_protocol(p) for p in path] 

200 path = stringify_path(path) 

201 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol 

202 for protocol in protos: 

203 if path.startswith(protocol + "://"): 

204 path = path[len(protocol) + 3 :] 

205 elif path.startswith(protocol + "::"): 

206 path = path[len(protocol) + 2 :] 

207 path = path.rstrip("/") 

208 # use of root_marker to make minimum required path, e.g., "/" 

209 return path or cls.root_marker 

210 

211 def unstrip_protocol(self, name: str) -> str: 

212 """Format FS-specific path to generic, including protocol""" 

213 protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol 

214 for protocol in protos: 

215 if name.startswith(f"{protocol}://"): 

216 return name 

217 return f"{protos[0]}://{name}" 

218 

219 @staticmethod 

220 def _get_kwargs_from_urls(path): 

221 """If kwargs can be encoded in the paths, extract them here 

222 

223 This should happen before instantiation of the class; incoming paths 

224 then should be amended to strip the options in methods. 

225 

226 Examples may look like an sftp path "sftp://user@host:/my/path", where 

227 the user and host should become kwargs and later get stripped. 

228 """ 

229 # by default, nothing happens 

230 return {} 

231 

232 @classmethod 

233 def current(cls): 

234 """Return the most recently instantiated FileSystem 

235 

236 If no instance has been created, then create one with defaults 

237 """ 

238 if cls._latest in cls._cache: 

239 return cls._cache[cls._latest] 

240 return cls() 

241 

242 @property 

243 def transaction(self): 

244 """A context within which files are committed together upon exit 

245 

246 Requires the file class to implement `.commit()` and `.discard()` 

247 for the normal and exception cases. 

248 """ 

249 if self._transaction is None: 

250 self._transaction = self.transaction_type(self) 

251 return self._transaction 

252 

253 def start_transaction(self): 

254 """Begin write transaction for deferring files, non-context version""" 

255 self._intrans = True 

256 self._transaction = self.transaction_type(self) 

257 return self.transaction 

258 

259 def end_transaction(self): 

260 """Finish write transaction, non-context version""" 

261 self.transaction.complete() 

262 self._transaction = None 

263 # The invalid cache must be cleared after the transaction is completed. 

264 for path in self._invalidated_caches_in_transaction: 

265 self.invalidate_cache(path) 

266 self._invalidated_caches_in_transaction.clear() 

267 

268 def invalidate_cache(self, path=None): 

269 """ 

270 Discard any cached directory information 

271 

272 Parameters 

273 ---------- 

274 path: string or None 

275 If None, clear all listings cached else listings at or under given 

276 path. 

277 """ 

278 # Not necessary to implement invalidation mechanism, may have no cache. 

279 # But if have, you should call this method of parent class from your 

280 # subclass to ensure expiring caches after transacations correctly. 

281 # See the implementation of FTPFileSystem in ftp.py 

282 if self._intrans: 

283 self._invalidated_caches_in_transaction.append(path) 

284 

285 def mkdir(self, path, create_parents=True, **kwargs): 

286 """ 

287 Create directory entry at path 

288 

289 For systems that don't have true directories, may create an for 

290 this instance only and not touch the real filesystem 

291 

292 Parameters 

293 ---------- 

294 path: str 

295 location 

296 create_parents: bool 

297 if True, this is equivalent to ``makedirs`` 

298 kwargs: 

299 may be permissions, etc. 

300 """ 

301 pass # not necessary to implement, may not have directories 

302 

303 def makedirs(self, path, exist_ok=False): 

304 """Recursively make directories 

305 

306 Creates directory at path and any intervening required directories. 

307 Raises exception if, for instance, the path already exists but is a 

308 file. 

309 

310 Parameters 

311 ---------- 

312 path: str 

313 leaf directory name 

314 exist_ok: bool (False) 

315 If False, will error if the target already exists 

316 """ 

317 pass # not necessary to implement, may not have directories 

318 

319 def rmdir(self, path): 

320 """Remove a directory, if empty""" 

321 pass # not necessary to implement, may not have directories 

322 

323 def ls(self, path, detail=True, **kwargs): 

324 """List objects at path. 

325 

326 This should include subdirectories and files at that location. The 

327 difference between a file and a directory must be clear when details 

328 are requested. 

329 

330 The specific keys, or perhaps a FileInfo class, or similar, is TBD, 

331 but must be consistent across implementations. 

332 Must include: 

333 

334 - full path to the entry (without protocol) 

335 - size of the entry, in bytes. If the value cannot be determined, will 

336 be ``None``. 

337 - type of entry, "file", "directory" or other 

338 

339 Additional information 

340 may be present, appropriate to the file-system, e.g., generation, 

341 checksum, etc. 

342 

343 May use refresh=True|False to allow use of self._ls_from_cache to 

344 check for a saved listing and avoid calling the backend. This would be 

345 common where listing may be expensive. 

346 

347 Parameters 

348 ---------- 

349 path: str 

350 detail: bool 

351 if True, gives a list of dictionaries, where each is the same as 

352 the result of ``info(path)``. If False, gives a list of paths 

353 (str). 

354 kwargs: may have additional backend-specific options, such as version 

355 information 

356 

357 Returns 

358 ------- 

359 List of strings if detail is False, or list of directory information 

360 dicts if detail is True. 

361 """ 

362 raise NotImplementedError 

363 

364 def _ls_from_cache(self, path): 

365 """Check cache for listing 

366 

367 Returns listing, if found (may be empty list for a directly that exists 

368 but contains nothing), None if not in cache. 

369 """ 

370 parent = self._parent(path) 

371 try: 

372 return self.dircache[path.rstrip("/")] 

373 except KeyError: 

374 pass 

375 try: 

376 files = [ 

377 f 

378 for f in self.dircache[parent] 

379 if f["name"] == path 

380 or (f["name"] == path.rstrip("/") and f["type"] == "directory") 

381 ] 

382 if len(files) == 0: 

383 # parent dir was listed but did not contain this file 

384 raise FileNotFoundError(path) 

385 return files 

386 except KeyError: 

387 pass 

388 

389 def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): 

390 """Return all files under the given path. 

391 

392 List all files, recursing into subdirectories; output is iterator-style, 

393 like ``os.walk()``. For a simple list of files, ``find()`` is available. 

394 

395 When topdown is True, the caller can modify the dirnames list in-place (perhaps 

396 using del or slice assignment), and walk() will 

397 only recurse into the subdirectories whose names remain in dirnames; 

398 this can be used to prune the search, impose a specific order of visiting, 

399 or even to inform walk() about directories the caller creates or renames before 

400 it resumes walk() again. 

401 Modifying dirnames when topdown is False has no effect. (see os.walk) 

402 

403 Note that the "files" outputted will include anything that is not 

404 a directory, such as links. 

405 

406 Parameters 

407 ---------- 

408 path: str 

409 Root to recurse into 

410 maxdepth: int 

411 Maximum recursion depth. None means limitless, but not recommended 

412 on link-based file-systems. 

413 topdown: bool (True) 

414 Whether to walk the directory tree from the top downwards or from 

415 the bottom upwards. 

416 on_error: "omit", "raise", a callable 

417 if omit (default), path with exception will simply be empty; 

418 If raise, an underlying exception will be raised; 

419 if callable, it will be called with a single OSError instance as argument 

420 kwargs: passed to ``ls`` 

421 """ 

422 if maxdepth is not None and maxdepth < 1: 

423 raise ValueError("maxdepth must be at least 1") 

424 

425 path = self._strip_protocol(path) 

426 full_dirs = {} 

427 dirs = {} 

428 files = {} 

429 

430 detail = kwargs.pop("detail", False) 

431 try: 

432 listing = self.ls(path, detail=True, **kwargs) 

433 except (FileNotFoundError, OSError) as e: 

434 if on_error == "raise": 

435 raise 

436 if callable(on_error): 

437 on_error(e) 

438 return 

439 

440 for info in listing: 

441 # each info name must be at least [path]/part , but here 

442 # we check also for names like [path]/part/ 

443 pathname = info["name"].rstrip("/") 

444 name = pathname.rsplit("/", 1)[-1] 

445 if info["type"] == "directory" and pathname != path: 

446 # do not include "self" path 

447 full_dirs[name] = pathname 

448 dirs[name] = info 

449 elif pathname == path: 

450 # file-like with same name as give path 

451 files[""] = info 

452 else: 

453 files[name] = info 

454 

455 if not detail: 

456 dirs = list(dirs) 

457 files = list(files) 

458 

459 if topdown: 

460 # Yield before recursion if walking top down 

461 yield path, dirs, files 

462 

463 if maxdepth is not None: 

464 maxdepth -= 1 

465 if maxdepth < 1: 

466 if not topdown: 

467 yield path, dirs, files 

468 return 

469 

470 for d in dirs: 

471 yield from self.walk( 

472 full_dirs[d], 

473 maxdepth=maxdepth, 

474 detail=detail, 

475 topdown=topdown, 

476 **kwargs, 

477 ) 

478 

479 if not topdown: 

480 # Yield after recursion if walking bottom up 

481 yield path, dirs, files 

482 

483 def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): 

484 """List all files below path. 

485 

486 Like posix ``find`` command without conditions 

487 

488 Parameters 

489 ---------- 

490 path : str 

491 maxdepth: int or None 

492 If not None, the maximum number of levels to descend 

493 withdirs: bool 

494 Whether to include directory paths in the output. This is True 

495 when used by glob, but users usually only want files. 

496 kwargs are passed to ``ls``. 

497 """ 

498 # TODO: allow equivalent of -name parameter 

499 path = self._strip_protocol(path) 

500 out = {} 

501 

502 # Add the root directory if withdirs is requested 

503 # This is needed for posix glob compliance 

504 if withdirs and path != "" and self.isdir(path): 

505 out[path] = self.info(path) 

506 

507 for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): 

508 if withdirs: 

509 files.update(dirs) 

510 out.update({info["name"]: info for name, info in files.items()}) 

511 if not out and self.isfile(path): 

512 # walk works on directories, but find should also return [path] 

513 # when path happens to be a file 

514 out[path] = {} 

515 names = sorted(out) 

516 if not detail: 

517 return names 

518 else: 

519 return {name: out[name] for name in names} 

520 

521 def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): 

522 """Space used by files and optionally directories within a path 

523 

524 Directory size does not include the size of its contents. 

525 

526 Parameters 

527 ---------- 

528 path: str 

529 total: bool 

530 Whether to sum all the file sizes 

531 maxdepth: int or None 

532 Maximum number of directory levels to descend, None for unlimited. 

533 withdirs: bool 

534 Whether to include directory paths in the output. 

535 kwargs: passed to ``find`` 

536 

537 Returns 

538 ------- 

539 Dict of {path: size} if total=False, or int otherwise, where numbers 

540 refer to bytes used. 

541 """ 

542 sizes = {} 

543 if withdirs and self.isdir(path): 

544 # Include top-level directory in output 

545 info = self.info(path) 

546 sizes[info["name"]] = info["size"] 

547 for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs): 

548 info = self.info(f) 

549 sizes[info["name"]] = info["size"] 

550 if total: 

551 return sum(sizes.values()) 

552 else: 

553 return sizes 

554 

555 def glob(self, path, maxdepth=None, **kwargs): 

556 """Find files by glob-matching. 

557 

558 Pattern matching capabilities for finding files that match the given pattern. 

559 

560 Parameters 

561 ---------- 

562 path: str 

563 The glob pattern to match against 

564 maxdepth: int or None 

565 Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found. 

566 Must be at least 1 if provided. 

567 kwargs: 

568 Additional arguments passed to ``find`` (e.g., detail=True) 

569 

570 Returns 

571 ------- 

572 List of matched paths, or dict of paths and their info if detail=True 

573 

574 Notes 

575 ----- 

576 Supported patterns: 

577 - '*': Matches any sequence of characters within a single directory level 

578 - ``'**'``: Matches any number of directory levels (must be an entire path component) 

579 - '?': Matches exactly one character 

580 - '[abc]': Matches any character in the set 

581 - '[a-z]': Matches any character in the range 

582 - '[!abc]': Matches any character NOT in the set 

583 

584 Special behaviors: 

585 - If the path ends with '/', only folders are returned 

586 - Consecutive '*' characters are compressed into a single '*' 

587 - Empty brackets '[]' never match anything 

588 - Negated empty brackets '[!]' match any single character 

589 - Special characters in character classes are escaped properly 

590 

591 Limitations: 

592 - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``) 

593 - No brace expansion ('{a,b}.txt') 

594 - No extended glob patterns ('+(pattern)', '!(pattern)') 

595 """ 

596 if maxdepth is not None and maxdepth < 1: 

597 raise ValueError("maxdepth must be at least 1") 

598 

599 import re 

600 

601 seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,) 

602 ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash 

603 path = self._strip_protocol(path) 

604 append_slash_to_dirname = ends_with_sep or path.endswith( 

605 tuple(sep + "**" for sep in seps) 

606 ) 

607 idx_star = path.find("*") if path.find("*") >= 0 else len(path) 

608 idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) 

609 idx_brace = path.find("[") if path.find("[") >= 0 else len(path) 

610 

611 min_idx = min(idx_star, idx_qmark, idx_brace) 

612 

613 detail = kwargs.pop("detail", False) 

614 

615 if not has_magic(path): 

616 if self.exists(path, **kwargs): 

617 if not detail: 

618 return [path] 

619 else: 

620 return {path: self.info(path, **kwargs)} 

621 else: 

622 if not detail: 

623 return [] # glob of non-existent returns empty 

624 else: 

625 return {} 

626 elif "/" in path[:min_idx]: 

627 min_idx = path[:min_idx].rindex("/") 

628 root = path[: min_idx + 1] 

629 depth = path[min_idx + 1 :].count("/") + 1 

630 else: 

631 root = "" 

632 depth = path[min_idx + 1 :].count("/") + 1 

633 

634 if "**" in path: 

635 if maxdepth is not None: 

636 idx_double_stars = path.find("**") 

637 depth_double_stars = path[idx_double_stars:].count("/") + 1 

638 depth = depth - depth_double_stars + maxdepth 

639 else: 

640 depth = None 

641 

642 allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs) 

643 

644 pattern = glob_translate(path + ("/" if ends_with_sep else "")) 

645 pattern = re.compile(pattern) 

646 

647 out = { 

648 p: info 

649 for p, info in sorted(allpaths.items()) 

650 if pattern.match( 

651 p + "/" 

652 if append_slash_to_dirname and info["type"] == "directory" 

653 else p 

654 ) 

655 } 

656 

657 if detail: 

658 return out 

659 else: 

660 return list(out) 

661 

662 def exists(self, path, **kwargs): 

663 """Is there a file at the given path""" 

664 try: 

665 self.info(path, **kwargs) 

666 return True 

667 except: # noqa: E722 

668 # any exception allowed bar FileNotFoundError? 

669 return False 

670 

671 def lexists(self, path, **kwargs): 

672 """If there is a file at the given path (including 

673 broken links)""" 

674 return self.exists(path) 

675 

676 def info(self, path, **kwargs): 

677 """Give details of entry at path 

678 

679 Returns a single dictionary, with exactly the same information as ``ls`` 

680 would with ``detail=True``. 

681 

682 The default implementation calls ls and could be overridden by a 

683 shortcut. kwargs are passed on to ```ls()``. 

684 

685 Some file systems might not be able to measure the file's size, in 

686 which case, the returned dict will include ``'size': None``. 

687 

688 Returns 

689 ------- 

690 dict with keys: name (full path in the FS), size (in bytes), type (file, 

691 directory, or something else) and other FS-specific keys. 

692 """ 

693 path = self._strip_protocol(path) 

694 out = self.ls(self._parent(path), detail=True, **kwargs) 

695 out = [o for o in out if o["name"].rstrip("/") == path] 

696 if out: 

697 return out[0] 

698 out = self.ls(path, detail=True, **kwargs) 

699 path = path.rstrip("/") 

700 out1 = [o for o in out if o["name"].rstrip("/") == path] 

701 if len(out1) == 1: 

702 if "size" not in out1[0]: 

703 out1[0]["size"] = None 

704 return out1[0] 

705 elif len(out1) > 1 or out: 

706 return {"name": path, "size": 0, "type": "directory"} 

707 else: 

708 raise FileNotFoundError(path) 

709 

710 def checksum(self, path): 

711 """Unique value for current version of file 

712 

713 If the checksum is the same from one moment to another, the contents 

714 are guaranteed to be the same. If the checksum changes, the contents 

715 *might* have changed. 

716 

717 This should normally be overridden; default will probably capture 

718 creation/modification timestamp (which would be good) or maybe 

719 access timestamp (which would be bad) 

720 """ 

721 return int(tokenize(self.info(path)), 16) 

722 

723 def size(self, path): 

724 """Size in bytes of file""" 

725 return self.info(path).get("size", None) 

726 

727 def sizes(self, paths): 

728 """Size in bytes of each file in a list of paths""" 

729 return [self.size(p) for p in paths] 

730 

731 def isdir(self, path): 

732 """Is this entry directory-like?""" 

733 try: 

734 return self.info(path)["type"] == "directory" 

735 except OSError: 

736 return False 

737 

738 def isfile(self, path): 

739 """Is this entry file-like?""" 

740 try: 

741 return self.info(path)["type"] == "file" 

742 except: # noqa: E722 

743 return False 

744 

745 def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs): 

746 """Get the contents of the file as a string. 

747 

748 Parameters 

749 ---------- 

750 path: str 

751 URL of file on this filesystems 

752 encoding, errors, newline: same as `open`. 

753 """ 

754 with self.open( 

755 path, 

756 mode="r", 

757 encoding=encoding, 

758 errors=errors, 

759 newline=newline, 

760 **kwargs, 

761 ) as f: 

762 return f.read() 

763 

764 def write_text( 

765 self, path, value, encoding=None, errors=None, newline=None, **kwargs 

766 ): 

767 """Write the text to the given file. 

768 

769 An existing file will be overwritten. 

770 

771 Parameters 

772 ---------- 

773 path: str 

774 URL of file on this filesystems 

775 value: str 

776 Text to write. 

777 encoding, errors, newline: same as `open`. 

778 """ 

779 with self.open( 

780 path, 

781 mode="w", 

782 encoding=encoding, 

783 errors=errors, 

784 newline=newline, 

785 **kwargs, 

786 ) as f: 

787 return f.write(value) 

788 

789 def cat_file(self, path, start=None, end=None, **kwargs): 

790 """Get the content of a file 

791 

792 Parameters 

793 ---------- 

794 path: URL of file on this filesystems 

795 start, end: int 

796 Bytes limits of the read. If negative, backwards from end, 

797 like usual python slices. Either can be None for start or 

798 end of file, respectively 

799 kwargs: passed to ``open()``. 

800 """ 

801 # explicitly set buffering off? 

802 with self.open(path, "rb", **kwargs) as f: 

803 if start is not None: 

804 if start >= 0: 

805 f.seek(start) 

806 else: 

807 f.seek(max(0, f.size + start)) 

808 if end is not None: 

809 if end < 0: 

810 end = f.size + end 

811 return f.read(end - f.tell()) 

812 return f.read() 

813 

814 def pipe_file(self, path, value, mode="overwrite", **kwargs): 

815 """Set the bytes of given file""" 

816 if mode == "create" and self.exists(path): 

817 # non-atomic but simple way; or could use "xb" in open(), which is likely 

818 # not as well supported 

819 raise FileExistsError 

820 with self.open(path, "wb", **kwargs) as f: 

821 f.write(value) 

822 

823 def pipe(self, path, value=None, **kwargs): 

824 """Put value into path 

825 

826 (counterpart to ``cat``) 

827 

828 Parameters 

829 ---------- 

830 path: string or dict(str, bytes) 

831 If a string, a single remote location to put ``value`` bytes; if a dict, 

832 a mapping of {path: bytesvalue}. 

833 value: bytes, optional 

834 If using a single path, these are the bytes to put there. Ignored if 

835 ``path`` is a dict 

836 """ 

837 if isinstance(path, str): 

838 self.pipe_file(self._strip_protocol(path), value, **kwargs) 

839 elif isinstance(path, dict): 

840 for k, v in path.items(): 

841 self.pipe_file(self._strip_protocol(k), v, **kwargs) 

842 else: 

843 raise ValueError("path must be str or dict") 

844 

845 def cat_ranges( 

846 self, paths, starts, ends, max_gap=None, on_error="return", **kwargs 

847 ): 

848 """Get the contents of byte ranges from one or more files 

849 

850 Parameters 

851 ---------- 

852 paths: list 

853 A list of of filepaths on this filesystems 

854 starts, ends: int or list 

855 Bytes limits of the read. If using a single int, the same value will be 

856 used to read all the specified files. 

857 """ 

858 if max_gap is not None: 

859 raise NotImplementedError 

860 if not isinstance(paths, list): 

861 raise TypeError 

862 if not isinstance(starts, list): 

863 starts = [starts] * len(paths) 

864 if not isinstance(ends, list): 

865 ends = [ends] * len(paths) 

866 if len(starts) != len(paths) or len(ends) != len(paths): 

867 raise ValueError 

868 out = [] 

869 for p, s, e in zip(paths, starts, ends): 

870 try: 

871 out.append(self.cat_file(p, s, e)) 

872 except Exception as e: 

873 if on_error == "return": 

874 out.append(e) 

875 else: 

876 raise 

877 return out 

878 

879 def cat(self, path, recursive=False, on_error="raise", **kwargs): 

880 """Fetch (potentially multiple) paths' contents 

881 

882 Parameters 

883 ---------- 

884 recursive: bool 

885 If True, assume the path(s) are directories, and get all the 

886 contained files 

887 on_error : "raise", "omit", "return" 

888 If raise, an underlying exception will be raised (converted to KeyError 

889 if the type is in self.missing_exceptions); if omit, keys with exception 

890 will simply not be included in the output; if "return", all keys are 

891 included in the output, but the value will be bytes or an exception 

892 instance. 

893 kwargs: passed to cat_file 

894 

895 Returns 

896 ------- 

897 dict of {path: contents} if there are multiple paths 

898 or the path has been otherwise expanded 

899 """ 

900 paths = self.expand_path(path, recursive=recursive, **kwargs) 

901 if ( 

902 len(paths) > 1 

903 or isinstance(path, list) 

904 or paths[0] != self._strip_protocol(path) 

905 ): 

906 out = {} 

907 for path in paths: 

908 try: 

909 out[path] = self.cat_file(path, **kwargs) 

910 except Exception as e: 

911 if on_error == "raise": 

912 raise 

913 if on_error == "return": 

914 out[path] = e 

915 return out 

916 else: 

917 return self.cat_file(paths[0], **kwargs) 

918 

919 def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs): 

920 """Copy single remote file to local""" 

921 from .implementations.local import LocalFileSystem 

922 

923 if isfilelike(lpath): 

924 outfile = lpath 

925 elif self.isdir(rpath): 

926 os.makedirs(lpath, exist_ok=True) 

927 return None 

928 

929 fs = LocalFileSystem(auto_mkdir=True) 

930 fs.makedirs(fs._parent(lpath), exist_ok=True) 

931 

932 with self.open(rpath, "rb", **kwargs) as f1: 

933 if outfile is None: 

934 outfile = open(lpath, "wb") 

935 

936 try: 

937 callback.set_size(getattr(f1, "size", None)) 

938 data = True 

939 while data: 

940 data = f1.read(self.blocksize) 

941 segment_len = outfile.write(data) 

942 if segment_len is None: 

943 segment_len = len(data) 

944 callback.relative_update(segment_len) 

945 finally: 

946 if not isfilelike(lpath): 

947 outfile.close() 

948 

949 def get( 

950 self, 

951 rpath, 

952 lpath, 

953 recursive=False, 

954 callback=DEFAULT_CALLBACK, 

955 maxdepth=None, 

956 **kwargs, 

957 ): 

958 """Copy file(s) to local. 

959 

960 Copies a specific file or tree of files (if recursive=True). If lpath 

961 ends with a "/", it will be assumed to be a directory, and target files 

962 will go within. Can submit a list of paths, which may be glob-patterns 

963 and will be expanded. 

964 

965 Calls get_file for each source. 

966 """ 

967 if isinstance(lpath, list) and isinstance(rpath, list): 

968 # No need to expand paths when both source and destination 

969 # are provided as lists 

970 rpaths = rpath 

971 lpaths = lpath 

972 else: 

973 from .implementations.local import ( 

974 LocalFileSystem, 

975 make_path_posix, 

976 trailing_sep, 

977 ) 

978 

979 source_is_str = isinstance(rpath, str) 

980 rpaths = self.expand_path( 

981 rpath, recursive=recursive, maxdepth=maxdepth, **kwargs 

982 ) 

983 if source_is_str and (not recursive or maxdepth is not None): 

984 # Non-recursive glob does not copy directories 

985 rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))] 

986 if not rpaths: 

987 return 

988 

989 if isinstance(lpath, str): 

990 lpath = make_path_posix(lpath) 

991 

992 source_is_file = len(rpaths) == 1 

993 dest_is_dir = isinstance(lpath, str) and ( 

994 trailing_sep(lpath) or LocalFileSystem().isdir(lpath) 

995 ) 

996 

997 exists = source_is_str and ( 

998 (has_magic(rpath) and source_is_file) 

999 or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) 

1000 ) 

1001 lpaths = other_paths( 

1002 rpaths, 

1003 lpath, 

1004 exists=exists, 

1005 flatten=not source_is_str, 

1006 ) 

1007 

1008 callback.set_size(len(lpaths)) 

1009 for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): 

1010 with callback.branched(rpath, lpath) as child: 

1011 self.get_file(rpath, lpath, callback=child, **kwargs) 

1012 

1013 def put_file( 

1014 self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs 

1015 ): 

1016 """Copy single file to remote""" 

1017 if mode == "create" and self.exists(rpath): 

1018 raise FileExistsError 

1019 if os.path.isdir(lpath): 

1020 self.makedirs(rpath, exist_ok=True) 

1021 return None 

1022 

1023 with open(lpath, "rb") as f1: 

1024 size = f1.seek(0, 2) 

1025 callback.set_size(size) 

1026 f1.seek(0) 

1027 

1028 self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True) 

1029 with self.open(rpath, "wb", **kwargs) as f2: 

1030 while f1.tell() < size: 

1031 data = f1.read(self.blocksize) 

1032 segment_len = f2.write(data) 

1033 if segment_len is None: 

1034 segment_len = len(data) 

1035 callback.relative_update(segment_len) 

1036 

1037 def put( 

1038 self, 

1039 lpath, 

1040 rpath, 

1041 recursive=False, 

1042 callback=DEFAULT_CALLBACK, 

1043 maxdepth=None, 

1044 **kwargs, 

1045 ): 

1046 """Copy file(s) from local. 

1047 

1048 Copies a specific file or tree of files (if recursive=True). If rpath 

1049 ends with a "/", it will be assumed to be a directory, and target files 

1050 will go within. 

1051 

1052 Calls put_file for each source. 

1053 """ 

1054 if isinstance(lpath, list) and isinstance(rpath, list): 

1055 # No need to expand paths when both source and destination 

1056 # are provided as lists 

1057 rpaths = rpath 

1058 lpaths = lpath 

1059 else: 

1060 from .implementations.local import ( 

1061 LocalFileSystem, 

1062 make_path_posix, 

1063 trailing_sep, 

1064 ) 

1065 

1066 source_is_str = isinstance(lpath, str) 

1067 if source_is_str: 

1068 lpath = make_path_posix(lpath) 

1069 fs = LocalFileSystem() 

1070 lpaths = fs.expand_path( 

1071 lpath, recursive=recursive, maxdepth=maxdepth, **kwargs 

1072 ) 

1073 if source_is_str and (not recursive or maxdepth is not None): 

1074 # Non-recursive glob does not copy directories 

1075 lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] 

1076 if not lpaths: 

1077 return 

1078 

1079 source_is_file = len(lpaths) == 1 

1080 dest_is_dir = isinstance(rpath, str) and ( 

1081 trailing_sep(rpath) or self.isdir(rpath) 

1082 ) 

1083 

1084 rpath = ( 

1085 self._strip_protocol(rpath) 

1086 if isinstance(rpath, str) 

1087 else [self._strip_protocol(p) for p in rpath] 

1088 ) 

1089 exists = source_is_str and ( 

1090 (has_magic(lpath) and source_is_file) 

1091 or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) 

1092 ) 

1093 rpaths = other_paths( 

1094 lpaths, 

1095 rpath, 

1096 exists=exists, 

1097 flatten=not source_is_str, 

1098 ) 

1099 

1100 callback.set_size(len(rpaths)) 

1101 for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): 

1102 with callback.branched(lpath, rpath) as child: 

1103 self.put_file(lpath, rpath, callback=child, **kwargs) 

1104 

1105 def head(self, path, size=1024): 

1106 """Get the first ``size`` bytes from file""" 

1107 with self.open(path, "rb") as f: 

1108 return f.read(size) 

1109 

1110 def tail(self, path, size=1024): 

1111 """Get the last ``size`` bytes from file""" 

1112 with self.open(path, "rb") as f: 

1113 f.seek(max(-size, -f.size), 2) 

1114 return f.read() 

1115 

1116 def cp_file(self, path1, path2, **kwargs): 

1117 raise NotImplementedError 

1118 

1119 def copy( 

1120 self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs 

1121 ): 

1122 """Copy within two locations in the filesystem 

1123 

1124 on_error : "raise", "ignore" 

1125 If raise, any not-found exceptions will be raised; if ignore any 

1126 not-found exceptions will cause the path to be skipped; defaults to 

1127 raise unless recursive is true, where the default is ignore 

1128 """ 

1129 if on_error is None and recursive: 

1130 on_error = "ignore" 

1131 elif on_error is None: 

1132 on_error = "raise" 

1133 

1134 if isinstance(path1, list) and isinstance(path2, list): 

1135 # No need to expand paths when both source and destination 

1136 # are provided as lists 

1137 paths1 = path1 

1138 paths2 = path2 

1139 else: 

1140 from .implementations.local import trailing_sep 

1141 

1142 source_is_str = isinstance(path1, str) 

1143 paths1 = self.expand_path( 

1144 path1, recursive=recursive, maxdepth=maxdepth, **kwargs 

1145 ) 

1146 if source_is_str and (not recursive or maxdepth is not None): 

1147 # Non-recursive glob does not copy directories 

1148 paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))] 

1149 if not paths1: 

1150 return 

1151 

1152 source_is_file = len(paths1) == 1 

1153 dest_is_dir = isinstance(path2, str) and ( 

1154 trailing_sep(path2) or self.isdir(path2) 

1155 ) 

1156 

1157 exists = source_is_str and ( 

1158 (has_magic(path1) and source_is_file) 

1159 or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) 

1160 ) 

1161 paths2 = other_paths( 

1162 paths1, 

1163 path2, 

1164 exists=exists, 

1165 flatten=not source_is_str, 

1166 ) 

1167 

1168 for p1, p2 in zip(paths1, paths2): 

1169 try: 

1170 self.cp_file(p1, p2, **kwargs) 

1171 except FileNotFoundError: 

1172 if on_error == "raise": 

1173 raise 

1174 

1175 def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): 

1176 """Turn one or more globs or directories into a list of all matching paths 

1177 to files or directories. 

1178 

1179 kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls`` 

1180 """ 

1181 

1182 if maxdepth is not None and maxdepth < 1: 

1183 raise ValueError("maxdepth must be at least 1") 

1184 

1185 if isinstance(path, (str, os.PathLike)): 

1186 out = self.expand_path([path], recursive, maxdepth, **kwargs) 

1187 else: 

1188 out = set() 

1189 path = [self._strip_protocol(p) for p in path] 

1190 for p in path: 

1191 if has_magic(p): 

1192 bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) 

1193 out |= bit 

1194 if recursive: 

1195 # glob call above expanded one depth so if maxdepth is defined 

1196 # then decrement it in expand_path call below. If it is zero 

1197 # after decrementing then avoid expand_path call. 

1198 if maxdepth is not None and maxdepth <= 1: 

1199 continue 

1200 out |= set( 

1201 self.expand_path( 

1202 list(bit), 

1203 recursive=recursive, 

1204 maxdepth=maxdepth - 1 if maxdepth is not None else None, 

1205 **kwargs, 

1206 ) 

1207 ) 

1208 continue 

1209 elif recursive: 

1210 rec = set( 

1211 self.find( 

1212 p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs 

1213 ) 

1214 ) 

1215 out |= rec 

1216 if p not in out and (recursive is False or self.exists(p)): 

1217 # should only check once, for the root 

1218 out.add(p) 

1219 if not out: 

1220 raise FileNotFoundError(path) 

1221 return sorted(out) 

1222 

1223 def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): 

1224 """Move file(s) from one location to another""" 

1225 if path1 == path2: 

1226 logger.debug("%s mv: The paths are the same, so no files were moved.", self) 

1227 else: 

1228 # explicitly raise exception to prevent data corruption 

1229 self.copy( 

1230 path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise" 

1231 ) 

1232 self.rm(path1, recursive=recursive) 

1233 

1234 def rm_file(self, path): 

1235 """Delete a file""" 

1236 self._rm(path) 

1237 

1238 def _rm(self, path): 

1239 """Delete one file""" 

1240 # this is the old name for the method, prefer rm_file 

1241 raise NotImplementedError 

1242 

1243 def rm(self, path, recursive=False, maxdepth=None): 

1244 """Delete files. 

1245 

1246 Parameters 

1247 ---------- 

1248 path: str or list of str 

1249 File(s) to delete. 

1250 recursive: bool 

1251 If file(s) are directories, recursively delete contents and then 

1252 also remove the directory 

1253 maxdepth: int or None 

1254 Depth to pass to walk for finding files to delete, if recursive. 

1255 If None, there will be no limit and infinite recursion may be 

1256 possible. 

1257 """ 

1258 path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) 

1259 for p in reversed(path): 

1260 self.rm_file(p) 

1261 

1262 @classmethod 

1263 def _parent(cls, path): 

1264 path = cls._strip_protocol(path) 

1265 if "/" in path: 

1266 parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker) 

1267 return cls.root_marker + parent 

1268 else: 

1269 return cls.root_marker 

1270 

1271 def _open( 

1272 self, 

1273 path, 

1274 mode="rb", 

1275 block_size=None, 

1276 autocommit=True, 

1277 cache_options=None, 

1278 **kwargs, 

1279 ): 

1280 """Return raw bytes-mode file-like from the file-system""" 

1281 return AbstractBufferedFile( 

1282 self, 

1283 path, 

1284 mode, 

1285 block_size, 

1286 autocommit, 

1287 cache_options=cache_options, 

1288 **kwargs, 

1289 ) 

1290 

1291 def open( 

1292 self, 

1293 path, 

1294 mode="rb", 

1295 block_size=None, 

1296 cache_options=None, 

1297 compression=None, 

1298 **kwargs, 

1299 ): 

1300 """ 

1301 Return a file-like object from the filesystem 

1302 

1303 The resultant instance must function correctly in a context ``with`` 

1304 block. 

1305 

1306 Parameters 

1307 ---------- 

1308 path: str 

1309 Target file 

1310 mode: str like 'rb', 'w' 

1311 See builtin ``open()`` 

1312 Mode "x" (exclusive write) may be implemented by the backend. Even if 

1313 it is, whether it is checked up front or on commit, and whether it is 

1314 atomic is implementation-dependent. 

1315 block_size: int 

1316 Some indication of buffering - this is a value in bytes 

1317 cache_options : dict, optional 

1318 Extra arguments to pass through to the cache. 

1319 compression: string or None 

1320 If given, open file using compression codec. Can either be a compression 

1321 name (a key in ``fsspec.compression.compr``) or "infer" to guess the 

1322 compression from the filename suffix. 

1323 encoding, errors, newline: passed on to TextIOWrapper for text mode 

1324 """ 

1325 import io 

1326 

1327 path = self._strip_protocol(path) 

1328 if "b" not in mode: 

1329 mode = mode.replace("t", "") + "b" 

1330 

1331 text_kwargs = { 

1332 k: kwargs.pop(k) 

1333 for k in ["encoding", "errors", "newline"] 

1334 if k in kwargs 

1335 } 

1336 return io.TextIOWrapper( 

1337 self.open( 

1338 path, 

1339 mode, 

1340 block_size=block_size, 

1341 cache_options=cache_options, 

1342 compression=compression, 

1343 **kwargs, 

1344 ), 

1345 **text_kwargs, 

1346 ) 

1347 else: 

1348 ac = kwargs.pop("autocommit", not self._intrans) 

1349 f = self._open( 

1350 path, 

1351 mode=mode, 

1352 block_size=block_size, 

1353 autocommit=ac, 

1354 cache_options=cache_options, 

1355 **kwargs, 

1356 ) 

1357 if compression is not None: 

1358 from fsspec.compression import compr 

1359 from fsspec.core import get_compression 

1360 

1361 compression = get_compression(path, compression) 

1362 compress = compr[compression] 

1363 f = compress(f, mode=mode[0]) 

1364 

1365 if not ac and "r" not in mode: 

1366 self.transaction.files.append(f) 

1367 return f 

1368 

1369 def touch(self, path, truncate=True, **kwargs): 

1370 """Create empty file, or update timestamp 

1371 

1372 Parameters 

1373 ---------- 

1374 path: str 

1375 file location 

1376 truncate: bool 

1377 If True, always set file size to 0; if False, update timestamp and 

1378 leave file unchanged, if backend allows this 

1379 """ 

1380 if truncate or not self.exists(path): 

1381 with self.open(path, "wb", **kwargs): 

1382 pass 

1383 else: 

1384 raise NotImplementedError # update timestamp, if possible 

1385 

1386 def ukey(self, path): 

1387 """Hash of file properties, to tell if it has changed""" 

1388 return sha256(str(self.info(path)).encode()).hexdigest() 

1389 

1390 def read_block(self, fn, offset, length, delimiter=None): 

1391 """Read a block of bytes from 

1392 

1393 Starting at ``offset`` of the file, read ``length`` bytes. If 

1394 ``delimiter`` is set then we ensure that the read starts and stops at 

1395 delimiter boundaries that follow the locations ``offset`` and ``offset 

1396 + length``. If ``offset`` is zero then we start at zero. The 

1397 bytestring returned WILL include the end delimiter string. 

1398 

1399 If offset+length is beyond the eof, reads to eof. 

1400 

1401 Parameters 

1402 ---------- 

1403 fn: string 

1404 Path to filename 

1405 offset: int 

1406 Byte offset to start read 

1407 length: int 

1408 Number of bytes to read. If None, read to end. 

1409 delimiter: bytes (optional) 

1410 Ensure reading starts and stops at delimiter bytestring 

1411 

1412 Examples 

1413 -------- 

1414 >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP 

1415 b'Alice, 100\\nBo' 

1416 >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP 

1417 b'Alice, 100\\nBob, 200\\n' 

1418 

1419 Use ``length=None`` to read to the end of the file. 

1420 >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP 

1421 b'Alice, 100\\nBob, 200\\nCharlie, 300' 

1422 

1423 See Also 

1424 -------- 

1425 :func:`fsspec.utils.read_block` 

1426 """ 

1427 with self.open(fn, "rb") as f: 

1428 size = f.size 

1429 if length is None: 

1430 length = size 

1431 if size is not None and offset + length > size: 

1432 length = size - offset 

1433 return read_block(f, offset, length, delimiter) 

1434 

1435 def to_json(self, *, include_password: bool = True) -> str: 

1436 """ 

1437 JSON representation of this filesystem instance. 

1438 

1439 Parameters 

1440 ---------- 

1441 include_password: bool, default True 

1442 Whether to include the password (if any) in the output. 

1443 

1444 Returns 

1445 ------- 

1446 JSON string with keys ``cls`` (the python location of this class), 

1447 protocol (text name of this class's protocol, first one in case of 

1448 multiple), ``args`` (positional args, usually empty), and all other 

1449 keyword arguments as their own keys. 

1450 

1451 Warnings 

1452 -------- 

1453 Serialized filesystems may contain sensitive information which have been 

1454 passed to the constructor, such as passwords and tokens. Make sure you 

1455 store and send them in a secure environment! 

1456 """ 

1457 from .json import FilesystemJSONEncoder 

1458 

1459 return json.dumps( 

1460 self, 

1461 cls=type( 

1462 "_FilesystemJSONEncoder", 

1463 (FilesystemJSONEncoder,), 

1464 {"include_password": include_password}, 

1465 ), 

1466 ) 

1467 

1468 @staticmethod 

1469 def from_json(blob: str) -> AbstractFileSystem: 

1470 """ 

1471 Recreate a filesystem instance from JSON representation. 

1472 

1473 See ``.to_json()`` for the expected structure of the input. 

1474 

1475 Parameters 

1476 ---------- 

1477 blob: str 

1478 

1479 Returns 

1480 ------- 

1481 file system instance, not necessarily of this particular class. 

1482 

1483 Warnings 

1484 -------- 

1485 This can import arbitrary modules (as determined by the ``cls`` key). 

1486 Make sure you haven't installed any modules that may execute malicious code 

1487 at import time. 

1488 """ 

1489 from .json import FilesystemJSONDecoder 

1490 

1491 return json.loads(blob, cls=FilesystemJSONDecoder) 

1492 

1493 def to_dict(self, *, include_password: bool = True) -> dict[str, Any]: 

1494 """ 

1495 JSON-serializable dictionary representation of this filesystem instance. 

1496 

1497 Parameters 

1498 ---------- 

1499 include_password: bool, default True 

1500 Whether to include the password (if any) in the output. 

1501 

1502 Returns 

1503 ------- 

1504 Dictionary with keys ``cls`` (the python location of this class), 

1505 protocol (text name of this class's protocol, first one in case of 

1506 multiple), ``args`` (positional args, usually empty), and all other 

1507 keyword arguments as their own keys. 

1508 

1509 Warnings 

1510 -------- 

1511 Serialized filesystems may contain sensitive information which have been 

1512 passed to the constructor, such as passwords and tokens. Make sure you 

1513 store and send them in a secure environment! 

1514 """ 

1515 from .json import FilesystemJSONEncoder 

1516 

1517 json_encoder = FilesystemJSONEncoder() 

1518 

1519 cls = type(self) 

1520 proto = self.protocol 

1521 

1522 storage_options = dict(self.storage_options) 

1523 if not include_password: 

1524 storage_options.pop("password", None) 

1525 

1526 return dict( 

1527 cls=f"{cls.__module__}:{cls.__name__}", 

1528 protocol=proto[0] if isinstance(proto, (tuple, list)) else proto, 

1529 args=json_encoder.make_serializable(self.storage_args), 

1530 **json_encoder.make_serializable(storage_options), 

1531 ) 

1532 

1533 @staticmethod 

1534 def from_dict(dct: dict[str, Any]) -> AbstractFileSystem: 

1535 """ 

1536 Recreate a filesystem instance from dictionary representation. 

1537 

1538 See ``.to_dict()`` for the expected structure of the input. 

1539 

1540 Parameters 

1541 ---------- 

1542 dct: Dict[str, Any] 

1543 

1544 Returns 

1545 ------- 

1546 file system instance, not necessarily of this particular class. 

1547 

1548 Warnings 

1549 -------- 

1550 This can import arbitrary modules (as determined by the ``cls`` key). 

1551 Make sure you haven't installed any modules that may execute malicious code 

1552 at import time. 

1553 """ 

1554 from .json import FilesystemJSONDecoder 

1555 

1556 json_decoder = FilesystemJSONDecoder() 

1557 

1558 dct = dict(dct) # Defensive copy 

1559 

1560 cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct) 

1561 if cls is None: 

1562 raise ValueError("Not a serialized AbstractFileSystem") 

1563 

1564 dct.pop("cls", None) 

1565 dct.pop("protocol", None) 

1566 

1567 return cls( 

1568 *json_decoder.unmake_serializable(dct.pop("args", ())), 

1569 **json_decoder.unmake_serializable(dct), 

1570 ) 

1571 

1572 def _get_pyarrow_filesystem(self): 

1573 """ 

1574 Make a version of the FS instance which will be acceptable to pyarrow 

1575 """ 

1576 # all instances already also derive from pyarrow 

1577 return self 

1578 

1579 def get_mapper(self, root="", check=False, create=False, missing_exceptions=None): 

1580 """Create key/value store based on this file-system 

1581 

1582 Makes a MutableMapping interface to the FS at the given root path. 

1583 See ``fsspec.mapping.FSMap`` for further details. 

1584 """ 

1585 from .mapping import FSMap 

1586 

1587 return FSMap( 

1588 root, 

1589 self, 

1590 check=check, 

1591 create=create, 

1592 missing_exceptions=missing_exceptions, 

1593 ) 

1594 

1595 @classmethod 

1596 def clear_instance_cache(cls): 

1597 """ 

1598 Clear the cache of filesystem instances. 

1599 

1600 Notes 

1601 ----- 

1602 Unless overridden by setting the ``cachable`` class attribute to False, 

1603 the filesystem class stores a reference to newly created instances. This 

1604 prevents Python's normal rules around garbage collection from working, 

1605 since the instances refcount will not drop to zero until 

1606 ``clear_instance_cache`` is called. 

1607 """ 

1608 cls._cache.clear() 

1609 

1610 def created(self, path): 

1611 """Return the created timestamp of a file as a datetime.datetime""" 

1612 raise NotImplementedError 

1613 

1614 def modified(self, path): 

1615 """Return the modified timestamp of a file as a datetime.datetime""" 

1616 raise NotImplementedError 

1617 

1618 def tree( 

1619 self, 

1620 path: str = "/", 

1621 recursion_limit: int = 2, 

1622 max_display: int = 25, 

1623 display_size: bool = False, 

1624 prefix: str = "", 

1625 is_last: bool = True, 

1626 first: bool = True, 

1627 indent_size: int = 4, 

1628 ) -> str: 

1629 """ 

1630 Return a tree-like structure of the filesystem starting from the given path as a string. 

1631 

1632 Parameters 

1633 ---------- 

1634 path: Root path to start traversal from 

1635 recursion_limit: Maximum depth of directory traversal 

1636 max_display: Maximum number of items to display per directory 

1637 display_size: Whether to display file sizes 

1638 prefix: Current line prefix for visual tree structure 

1639 is_last: Whether current item is last in its level 

1640 first: Whether this is the first call (displays root path) 

1641 indent_size: Number of spaces by indent 

1642 

1643 Returns 

1644 ------- 

1645 str: A string representing the tree structure. 

1646 

1647 Example 

1648 ------- 

1649 >>> from fsspec import filesystem 

1650 

1651 >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password') 

1652 >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10) 

1653 >>> print(tree) 

1654 """ 

1655 

1656 def format_bytes(n: int) -> str: 

1657 """Format bytes as text.""" 

1658 for prefix, k in ( 

1659 ("P", 2**50), 

1660 ("T", 2**40), 

1661 ("G", 2**30), 

1662 ("M", 2**20), 

1663 ("k", 2**10), 

1664 ): 

1665 if n >= 0.9 * k: 

1666 return f"{n / k:.2f} {prefix}b" 

1667 return f"{n}B" 

1668 

1669 result = [] 

1670 

1671 if first: 

1672 result.append(path) 

1673 

1674 if recursion_limit: 

1675 indent = " " * indent_size 

1676 contents = self.ls(path, detail=True) 

1677 contents.sort( 

1678 key=lambda x: (x.get("type") != "directory", x.get("name", "")) 

1679 ) 

1680 

1681 if max_display is not None and len(contents) > max_display: 

1682 displayed_contents = contents[:max_display] 

1683 remaining_count = len(contents) - max_display 

1684 else: 

1685 displayed_contents = contents 

1686 remaining_count = 0 

1687 

1688 for i, item in enumerate(displayed_contents): 

1689 is_last_item = (i == len(displayed_contents) - 1) and ( 

1690 remaining_count == 0 

1691 ) 

1692 

1693 branch = ( 

1694 "└" + ("─" * (indent_size - 2)) 

1695 if is_last_item 

1696 else "├" + ("─" * (indent_size - 2)) 

1697 ) 

1698 branch += " " 

1699 new_prefix = prefix + ( 

1700 indent if is_last_item else "│" + " " * (indent_size - 1) 

1701 ) 

1702 

1703 name = os.path.basename(item.get("name", "")) 

1704 

1705 if display_size and item.get("type") == "directory": 

1706 sub_contents = self.ls(item.get("name", ""), detail=True) 

1707 num_files = sum( 

1708 1 for sub_item in sub_contents if sub_item.get("type") == "file" 

1709 ) 

1710 num_folders = sum( 

1711 1 

1712 for sub_item in sub_contents 

1713 if sub_item.get("type") == "directory" 

1714 ) 

1715 

1716 if num_files == 0 and num_folders == 0: 

1717 size = " (empty folder)" 

1718 elif num_files == 0: 

1719 size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})" 

1720 elif num_folders == 0: 

1721 size = f" ({num_files} file{'s' if num_files > 1 else ''})" 

1722 else: 

1723 size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})" 

1724 elif display_size and item.get("type") == "file": 

1725 size = f" ({format_bytes(item.get('size', 0))})" 

1726 else: 

1727 size = "" 

1728 

1729 result.append(f"{prefix}{branch}{name}{size}") 

1730 

1731 if item.get("type") == "directory" and recursion_limit > 0: 

1732 result.append( 

1733 self.tree( 

1734 path=item.get("name", ""), 

1735 recursion_limit=recursion_limit - 1, 

1736 max_display=max_display, 

1737 display_size=display_size, 

1738 prefix=new_prefix, 

1739 is_last=is_last_item, 

1740 first=False, 

1741 indent_size=indent_size, 

1742 ) 

1743 ) 

1744 

1745 if remaining_count > 0: 

1746 more_message = f"{remaining_count} more item(s) not displayed." 

1747 result.append( 

1748 f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}" 

1749 ) 

1750 

1751 return "\n".join(_ for _ in result if _) 

1752 

1753 # ------------------------------------------------------------------------ 

1754 # Aliases 

1755 

1756 def read_bytes(self, path, start=None, end=None, **kwargs): 

1757 """Alias of `AbstractFileSystem.cat_file`.""" 

1758 return self.cat_file(path, start=start, end=end, **kwargs) 

1759 

1760 def write_bytes(self, path, value, **kwargs): 

1761 """Alias of `AbstractFileSystem.pipe_file`.""" 

1762 self.pipe_file(path, value, **kwargs) 

1763 

1764 def makedir(self, path, create_parents=True, **kwargs): 

1765 """Alias of `AbstractFileSystem.mkdir`.""" 

1766 return self.mkdir(path, create_parents=create_parents, **kwargs) 

1767 

1768 def mkdirs(self, path, exist_ok=False): 

1769 """Alias of `AbstractFileSystem.makedirs`.""" 

1770 return self.makedirs(path, exist_ok=exist_ok) 

1771 

1772 def listdir(self, path, detail=True, **kwargs): 

1773 """Alias of `AbstractFileSystem.ls`.""" 

1774 return self.ls(path, detail=detail, **kwargs) 

1775 

1776 def cp(self, path1, path2, **kwargs): 

1777 """Alias of `AbstractFileSystem.copy`.""" 

1778 return self.copy(path1, path2, **kwargs) 

1779 

1780 def move(self, path1, path2, **kwargs): 

1781 """Alias of `AbstractFileSystem.mv`.""" 

1782 return self.mv(path1, path2, **kwargs) 

1783 

1784 def stat(self, path, **kwargs): 

1785 """Alias of `AbstractFileSystem.info`.""" 

1786 return self.info(path, **kwargs) 

1787 

1788 def disk_usage(self, path, total=True, maxdepth=None, **kwargs): 

1789 """Alias of `AbstractFileSystem.du`.""" 

1790 return self.du(path, total=total, maxdepth=maxdepth, **kwargs) 

1791 

1792 def rename(self, path1, path2, **kwargs): 

1793 """Alias of `AbstractFileSystem.mv`.""" 

1794 return self.mv(path1, path2, **kwargs) 

1795 

1796 def delete(self, path, recursive=False, maxdepth=None): 

1797 """Alias of `AbstractFileSystem.rm`.""" 

1798 return self.rm(path, recursive=recursive, maxdepth=maxdepth) 

1799 

1800 def upload(self, lpath, rpath, recursive=False, **kwargs): 

1801 """Alias of `AbstractFileSystem.put`.""" 

1802 return self.put(lpath, rpath, recursive=recursive, **kwargs) 

1803 

1804 def download(self, rpath, lpath, recursive=False, **kwargs): 

1805 """Alias of `AbstractFileSystem.get`.""" 

1806 return self.get(rpath, lpath, recursive=recursive, **kwargs) 

1807 

1808 def sign(self, path, expiration=100, **kwargs): 

1809 """Create a signed URL representing the given path 

1810 

1811 Some implementations allow temporary URLs to be generated, as a 

1812 way of delegating credentials. 

1813 

1814 Parameters 

1815 ---------- 

1816 path : str 

1817 The path on the filesystem 

1818 expiration : int 

1819 Number of seconds to enable the URL for (if supported) 

1820 

1821 Returns 

1822 ------- 

1823 URL : str 

1824 The signed URL 

1825 

1826 Raises 

1827 ------ 

1828 NotImplementedError : if method is not implemented for a filesystem 

1829 """ 

1830 raise NotImplementedError("Sign is not implemented for this filesystem") 

1831 

1832 def _isfilestore(self): 

1833 # Originally inherited from pyarrow DaskFileSystem. Keeping this 

1834 # here for backwards compatibility as long as pyarrow uses its 

1835 # legacy fsspec-compatible filesystems and thus accepts fsspec 

1836 # filesystems as well 

1837 return False 

1838 

1839 

1840class AbstractBufferedFile(io.IOBase): 

1841 """Convenient class to derive from to provide buffering 

1842 

1843 In the case that the backend does not provide a pythonic file-like object 

1844 already, this class contains much of the logic to build one. The only 

1845 methods that need to be overridden are ``_upload_chunk``, 

1846 ``_initiate_upload`` and ``_fetch_range``. 

1847 """ 

1848 

1849 DEFAULT_BLOCK_SIZE = 5 * 2**20 

1850 _details = None 

1851 

1852 def __init__( 

1853 self, 

1854 fs, 

1855 path, 

1856 mode="rb", 

1857 block_size="default", 

1858 autocommit=True, 

1859 cache_type="readahead", 

1860 cache_options=None, 

1861 size=None, 

1862 **kwargs, 

1863 ): 

1864 """ 

1865 Template for files with buffered reading and writing 

1866 

1867 Parameters 

1868 ---------- 

1869 fs: instance of FileSystem 

1870 path: str 

1871 location in file-system 

1872 mode: str 

1873 Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file 

1874 systems may be read-only, and some may not support append. 

1875 block_size: int 

1876 Buffer size for reading or writing, 'default' for class default 

1877 autocommit: bool 

1878 Whether to write to final destination; may only impact what 

1879 happens when file is being closed. 

1880 cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead" 

1881 Caching policy in read mode. See the definitions in ``core``. 

1882 cache_options : dict 

1883 Additional options passed to the constructor for the cache specified 

1884 by `cache_type`. 

1885 size: int 

1886 If given and in read mode, suppressed having to look up the file size 

1887 kwargs: 

1888 Gets stored as self.kwargs 

1889 """ 

1890 from .core import caches 

1891 

1892 self.path = path 

1893 self.fs = fs 

1894 self.mode = mode 

1895 self.blocksize = ( 

1896 self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size 

1897 ) 

1898 self.loc = 0 

1899 self.autocommit = autocommit 

1900 self.end = None 

1901 self.start = None 

1902 self.closed = False 

1903 

1904 if cache_options is None: 

1905 cache_options = {} 

1906 

1907 if "trim" in kwargs: 

1908 warnings.warn( 

1909 "Passing 'trim' to control the cache behavior has been deprecated. " 

1910 "Specify it within the 'cache_options' argument instead.", 

1911 FutureWarning, 

1912 ) 

1913 cache_options["trim"] = kwargs.pop("trim") 

1914 

1915 self.kwargs = kwargs 

1916 

1917 if mode not in {"ab", "rb", "wb", "xb"}: 

1918 raise NotImplementedError("File mode not supported") 

1919 if mode == "rb": 

1920 if size is not None: 

1921 self.size = size 

1922 else: 

1923 self.size = self.details["size"] 

1924 self.cache = caches[cache_type]( 

1925 self.blocksize, self._fetch_range, self.size, **cache_options 

1926 ) 

1927 else: 

1928 self.buffer = io.BytesIO() 

1929 self.offset = None 

1930 self.forced = False 

1931 self.location = None 

1932 

1933 @property 

1934 def details(self): 

1935 if self._details is None: 

1936 self._details = self.fs.info(self.path) 

1937 return self._details 

1938 

1939 @details.setter 

1940 def details(self, value): 

1941 self._details = value 

1942 self.size = value["size"] 

1943 

1944 @property 

1945 def full_name(self): 

1946 return _unstrip_protocol(self.path, self.fs) 

1947 

1948 @property 

1949 def closed(self): 

1950 # get around this attr being read-only in IOBase 

1951 # use getattr here, since this can be called during del 

1952 return getattr(self, "_closed", True) 

1953 

1954 @closed.setter 

1955 def closed(self, c): 

1956 self._closed = c 

1957 

1958 def __hash__(self): 

1959 if "w" in self.mode: 

1960 return id(self) 

1961 else: 

1962 return int(tokenize(self.details), 16) 

1963 

1964 def __eq__(self, other): 

1965 """Files are equal if they have the same checksum, only in read mode""" 

1966 if self is other: 

1967 return True 

1968 return ( 

1969 isinstance(other, type(self)) 

1970 and self.mode == "rb" 

1971 and other.mode == "rb" 

1972 and hash(self) == hash(other) 

1973 ) 

1974 

1975 def commit(self): 

1976 """Move from temp to final destination""" 

1977 

1978 def discard(self): 

1979 """Throw away temporary file""" 

1980 

1981 def info(self): 

1982 """File information about this path""" 

1983 if self.readable(): 

1984 return self.details 

1985 else: 

1986 raise ValueError("Info not available while writing") 

1987 

1988 def tell(self): 

1989 """Current file location""" 

1990 return self.loc 

1991 

1992 def seek(self, loc, whence=0): 

1993 """Set current file location 

1994 

1995 Parameters 

1996 ---------- 

1997 loc: int 

1998 byte location 

1999 whence: {0, 1, 2} 

2000 from start of file, current location or end of file, resp. 

2001 """ 

2002 loc = int(loc) 

2003 if not self.mode == "rb": 

2004 raise OSError(ESPIPE, "Seek only available in read mode") 

2005 if whence == 0: 

2006 nloc = loc 

2007 elif whence == 1: 

2008 nloc = self.loc + loc 

2009 elif whence == 2: 

2010 nloc = self.size + loc 

2011 else: 

2012 raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)") 

2013 if nloc < 0: 

2014 raise ValueError("Seek before start of file") 

2015 self.loc = nloc 

2016 return self.loc 

2017 

2018 def write(self, data): 

2019 """ 

2020 Write data to buffer. 

2021 

2022 Buffer only sent on flush() or if buffer is greater than 

2023 or equal to blocksize. 

2024 

2025 Parameters 

2026 ---------- 

2027 data: bytes 

2028 Set of bytes to be written. 

2029 """ 

2030 if not self.writable(): 

2031 raise ValueError("File not in write mode") 

2032 if self.closed: 

2033 raise ValueError("I/O operation on closed file.") 

2034 if self.forced: 

2035 raise ValueError("This file has been force-flushed, can only close") 

2036 out = self.buffer.write(data) 

2037 self.loc += out 

2038 if self.buffer.tell() >= self.blocksize: 

2039 self.flush() 

2040 return out 

2041 

2042 def flush(self, force=False): 

2043 """ 

2044 Write buffered data to backend store. 

2045 

2046 Writes the current buffer, if it is larger than the block-size, or if 

2047 the file is being closed. 

2048 

2049 Parameters 

2050 ---------- 

2051 force: bool 

2052 When closing, write the last block even if it is smaller than 

2053 blocks are allowed to be. Disallows further writing to this file. 

2054 """ 

2055 

2056 if self.closed: 

2057 raise ValueError("Flush on closed file") 

2058 if force and self.forced: 

2059 raise ValueError("Force flush cannot be called more than once") 

2060 if force: 

2061 self.forced = True 

2062 

2063 if self.readable(): 

2064 # no-op to flush on read-mode 

2065 return 

2066 

2067 if not force and self.buffer.tell() < self.blocksize: 

2068 # Defer write on small block 

2069 return 

2070 

2071 if self.offset is None: 

2072 # Initialize a multipart upload 

2073 self.offset = 0 

2074 try: 

2075 self._initiate_upload() 

2076 except: 

2077 self.closed = True 

2078 raise 

2079 

2080 if self._upload_chunk(final=force) is not False: 

2081 self.offset += self.buffer.seek(0, 2) 

2082 self.buffer = io.BytesIO() 

2083 

2084 def _upload_chunk(self, final=False): 

2085 """Write one part of a multi-block file upload 

2086 

2087 Parameters 

2088 ========== 

2089 final: bool 

2090 This is the last block, so should complete file, if 

2091 self.autocommit is True. 

2092 """ 

2093 # may not yet have been initialized, may need to call _initialize_upload 

2094 

2095 def _initiate_upload(self): 

2096 """Create remote file/upload""" 

2097 pass 

2098 

2099 def _fetch_range(self, start, end): 

2100 """Get the specified set of bytes from remote""" 

2101 return self.fs.cat_file(self.path, start=start, end=end) 

2102 

2103 def read(self, length=-1): 

2104 """ 

2105 Return data from cache, or fetch pieces as necessary 

2106 

2107 Parameters 

2108 ---------- 

2109 length: int (-1) 

2110 Number of bytes to read; if <0, all remaining bytes. 

2111 """ 

2112 length = -1 if length is None else int(length) 

2113 if self.mode != "rb": 

2114 raise ValueError("File not in read mode") 

2115 if length < 0: 

2116 length = self.size - self.loc 

2117 if self.closed: 

2118 raise ValueError("I/O operation on closed file.") 

2119 if length == 0: 

2120 # don't even bother calling fetch 

2121 return b"" 

2122 out = self.cache._fetch(self.loc, self.loc + length) 

2123 

2124 logger.debug( 

2125 "%s read: %i - %i %s", 

2126 self, 

2127 self.loc, 

2128 self.loc + length, 

2129 self.cache._log_stats(), 

2130 ) 

2131 self.loc += len(out) 

2132 return out 

2133 

2134 def readinto(self, b): 

2135 """mirrors builtin file's readinto method 

2136 

2137 https://docs.python.org/3/library/io.html#io.RawIOBase.readinto 

2138 """ 

2139 out = memoryview(b).cast("B") 

2140 data = self.read(out.nbytes) 

2141 out[: len(data)] = data 

2142 return len(data) 

2143 

2144 def readuntil(self, char=b"\n", blocks=None): 

2145 """Return data between current position and first occurrence of char 

2146 

2147 char is included in the output, except if the end of the tile is 

2148 encountered first. 

2149 

2150 Parameters 

2151 ---------- 

2152 char: bytes 

2153 Thing to find 

2154 blocks: None or int 

2155 How much to read in each go. Defaults to file blocksize - which may 

2156 mean a new read on every call. 

2157 """ 

2158 out = [] 

2159 while True: 

2160 start = self.tell() 

2161 part = self.read(blocks or self.blocksize) 

2162 if len(part) == 0: 

2163 break 

2164 found = part.find(char) 

2165 if found > -1: 

2166 out.append(part[: found + len(char)]) 

2167 self.seek(start + found + len(char)) 

2168 break 

2169 out.append(part) 

2170 return b"".join(out) 

2171 

2172 def readline(self): 

2173 """Read until and including the first occurrence of newline character 

2174 

2175 Note that, because of character encoding, this is not necessarily a 

2176 true line ending. 

2177 """ 

2178 return self.readuntil(b"\n") 

2179 

2180 def __next__(self): 

2181 out = self.readline() 

2182 if out: 

2183 return out 

2184 raise StopIteration 

2185 

2186 def __iter__(self): 

2187 return self 

2188 

2189 def readlines(self): 

2190 """Return all data, split by the newline character, including the newline character""" 

2191 data = self.read() 

2192 lines = data.split(b"\n") 

2193 out = [l + b"\n" for l in lines[:-1]] 

2194 if data.endswith(b"\n"): 

2195 return out 

2196 else: 

2197 return out + [lines[-1]] 

2198 # return list(self) ??? 

2199 

2200 def readinto1(self, b): 

2201 return self.readinto(b) 

2202 

2203 def close(self): 

2204 """Close file 

2205 

2206 Finalizes writes, discards cache 

2207 """ 

2208 if getattr(self, "_unclosable", False): 

2209 return 

2210 if self.closed: 

2211 return 

2212 try: 

2213 if self.mode == "rb": 

2214 self.cache = None 

2215 else: 

2216 if not self.forced: 

2217 self.flush(force=True) 

2218 

2219 if self.fs is not None: 

2220 self.fs.invalidate_cache(self.path) 

2221 self.fs.invalidate_cache(self.fs._parent(self.path)) 

2222 finally: 

2223 self.closed = True 

2224 

2225 def readable(self): 

2226 """Whether opened for reading""" 

2227 return "r" in self.mode and not self.closed 

2228 

2229 def seekable(self): 

2230 """Whether is seekable (only in read mode)""" 

2231 return self.readable() 

2232 

2233 def writable(self): 

2234 """Whether opened for writing""" 

2235 return self.mode in {"wb", "ab", "xb"} and not self.closed 

2236 

2237 def __reduce__(self): 

2238 if self.mode != "rb": 

2239 raise RuntimeError("Pickling a writeable file is not supported") 

2240 

2241 return reopen, ( 

2242 self.fs, 

2243 self.path, 

2244 self.mode, 

2245 self.blocksize, 

2246 self.loc, 

2247 self.size, 

2248 self.autocommit, 

2249 self.cache.name if self.cache else "none", 

2250 self.kwargs, 

2251 ) 

2252 

2253 def __del__(self): 

2254 if not self.closed: 

2255 self.close() 

2256 

2257 def __str__(self): 

2258 return f"<File-like object {type(self.fs).__name__}, {self.path}>" 

2259 

2260 __repr__ = __str__ 

2261 

2262 def __enter__(self): 

2263 return self 

2264 

2265 def __exit__(self, *args): 

2266 self.close() 

2267 

2268 

2269def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs): 

2270 file = fs.open( 

2271 path, 

2272 mode=mode, 

2273 block_size=blocksize, 

2274 autocommit=autocommit, 

2275 cache_type=cache_type, 

2276 size=size, 

2277 **kwargs, 

2278 ) 

2279 if loc > 0: 

2280 file.seek(loc) 

2281 return file