Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/fsspec/spec.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

886 statements  

1from __future__ import annotations 

2 

3import io 

4import json 

5import logging 

6import os 

7import threading 

8import warnings 

9import weakref 

10from errno import ESPIPE 

11from glob import has_magic 

12from hashlib import sha256 

13from typing import Any, ClassVar 

14 

15from .callbacks import DEFAULT_CALLBACK 

16from .config import apply_config, conf 

17from .dircache import DirCache 

18from .transaction import Transaction 

19from .utils import ( 

20 _unstrip_protocol, 

21 glob_translate, 

22 isfilelike, 

23 other_paths, 

24 read_block, 

25 stringify_path, 

26 tokenize, 

27) 

28 

29logger = logging.getLogger("fsspec") 

30 

31 

32def make_instance(cls, args, kwargs): 

33 return cls(*args, **kwargs) 

34 

35 

36class _Cached(type): 

37 """ 

38 Metaclass for caching file system instances. 

39 

40 Notes 

41 ----- 

42 Instances are cached according to 

43 

44 * The values of the class attributes listed in `_extra_tokenize_attributes` 

45 * The arguments passed to ``__init__``. 

46 

47 This creates an additional reference to the filesystem, which prevents the 

48 filesystem from being garbage collected when all *user* references go away. 

49 A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also* 

50 be made for a filesystem instance to be garbage collected. 

51 """ 

52 

53 def __init__(cls, *args, **kwargs): 

54 super().__init__(*args, **kwargs) 

55 # Note: we intentionally create a reference here, to avoid garbage 

56 # collecting instances when all other references are gone. To really 

57 # delete a FileSystem, the cache must be cleared. 

58 if conf.get("weakref_instance_cache"): # pragma: no cover 

59 # debug option for analysing fork/spawn conditions 

60 cls._cache = weakref.WeakValueDictionary() 

61 else: 

62 cls._cache = {} 

63 cls._pid = os.getpid() 

64 

65 def __call__(cls, *args, **kwargs): 

66 kwargs = apply_config(cls, kwargs) 

67 extra_tokens = tuple( 

68 getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes 

69 ) 

70 strip_tokenize_options = { 

71 k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs 

72 } 

73 token = tokenize( 

74 cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs 

75 ) 

76 skip = kwargs.pop("skip_instance_cache", False) 

77 if os.getpid() != cls._pid: 

78 cls._cache.clear() 

79 cls._pid = os.getpid() 

80 if not skip and cls.cachable and token in cls._cache: 

81 cls._latest = token 

82 return cls._cache[token] 

83 else: 

84 obj = super().__call__(*args, **kwargs, **strip_tokenize_options) 

85 # Setting _fs_token here causes some static linters to complain. 

86 obj._fs_token_ = token 

87 obj.storage_args = args 

88 obj.storage_options = kwargs 

89 if obj.async_impl and obj.mirror_sync_methods: 

90 from .asyn import mirror_sync_methods 

91 

92 mirror_sync_methods(obj) 

93 

94 if cls.cachable and not skip: 

95 cls._latest = token 

96 cls._cache[token] = obj 

97 return obj 

98 

99 

100class AbstractFileSystem(metaclass=_Cached): 

101 """ 

102 An abstract super-class for pythonic file-systems 

103 

104 Implementations are expected to be compatible with or, better, subclass 

105 from here. 

106 """ 

107 

108 cachable = True # this class can be cached, instances reused 

109 _cached = False 

110 blocksize = 2**22 

111 sep = "/" 

112 protocol: ClassVar[str | tuple[str, ...]] = "abstract" 

113 _latest = None 

114 async_impl = False 

115 mirror_sync_methods = False 

116 root_marker = "" # For some FSs, may require leading '/' or other character 

117 transaction_type = Transaction 

118 

119 #: Extra *class attributes* that should be considered when hashing. 

120 _extra_tokenize_attributes = () 

121 #: *storage options* that should not be considered when hashing. 

122 _strip_tokenize_options = () 

123 

124 # Set by _Cached metaclass 

125 storage_args: tuple[Any, ...] 

126 storage_options: dict[str, Any] 

127 

128 def __init__(self, *args, **storage_options): 

129 """Create and configure file-system instance 

130 

131 Instances may be cachable, so if similar enough arguments are seen 

132 a new instance is not required. The token attribute exists to allow 

133 implementations to cache instances if they wish. 

134 

135 A reasonable default should be provided if there are no arguments. 

136 

137 Subclasses should call this method. 

138 

139 Parameters 

140 ---------- 

141 use_listings_cache, listings_expiry_time, max_paths: 

142 passed to ``DirCache``, if the implementation supports 

143 directory listing caching. Pass use_listings_cache=False 

144 to disable such caching. 

145 skip_instance_cache: bool 

146 If this is a cachable implementation, pass True here to force 

147 creating a new instance even if a matching instance exists, and prevent 

148 storing this instance. 

149 asynchronous: bool 

150 loop: asyncio-compatible IOLoop or None 

151 """ 

152 if self._cached: 

153 # reusing instance, don't change 

154 return 

155 self._cached = True 

156 self._intrans = False 

157 self._transaction = None 

158 self._invalidated_caches_in_transaction = [] 

159 self.dircache = DirCache(**storage_options) 

160 

161 if storage_options.pop("add_docs", None): 

162 warnings.warn("add_docs is no longer supported.", FutureWarning) 

163 

164 if storage_options.pop("add_aliases", None): 

165 warnings.warn("add_aliases has been removed.", FutureWarning) 

166 # This is set in _Cached 

167 self._fs_token_ = None 

168 

169 @property 

170 def fsid(self): 

171 """Persistent filesystem id that can be used to compare filesystems 

172 across sessions. 

173 """ 

174 raise NotImplementedError 

175 

176 @property 

177 def _fs_token(self): 

178 return self._fs_token_ 

179 

180 def __dask_tokenize__(self): 

181 return self._fs_token 

182 

183 def __hash__(self): 

184 return int(self._fs_token, 16) 

185 

186 def __eq__(self, other): 

187 return isinstance(other, type(self)) and self._fs_token == other._fs_token 

188 

189 def __reduce__(self): 

190 return make_instance, (type(self), self.storage_args, self.storage_options) 

191 

192 @classmethod 

193 def _strip_protocol(cls, path): 

194 """Turn path from fully-qualified to file-system-specific 

195 

196 May require FS-specific handling, e.g., for relative paths or links. 

197 """ 

198 if isinstance(path, list): 

199 return [cls._strip_protocol(p) for p in path] 

200 path = stringify_path(path) 

201 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol 

202 for protocol in protos: 

203 if path.startswith(protocol + "://"): 

204 path = path[len(protocol) + 3 :] 

205 elif path.startswith(protocol + "::"): 

206 path = path[len(protocol) + 2 :] 

207 path = path.rstrip("/") 

208 # use of root_marker to make minimum required path, e.g., "/" 

209 return path or cls.root_marker 

210 

211 def unstrip_protocol(self, name: str) -> str: 

212 """Format FS-specific path to generic, including protocol""" 

213 protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol 

214 for protocol in protos: 

215 if name.startswith(f"{protocol}://"): 

216 return name 

217 return f"{protos[0]}://{name}" 

218 

219 @staticmethod 

220 def _get_kwargs_from_urls(path): 

221 """If kwargs can be encoded in the paths, extract them here 

222 

223 This should happen before instantiation of the class; incoming paths 

224 then should be amended to strip the options in methods. 

225 

226 Examples may look like an sftp path "sftp://user@host:/my/path", where 

227 the user and host should become kwargs and later get stripped. 

228 """ 

229 # by default, nothing happens 

230 return {} 

231 

232 @classmethod 

233 def current(cls): 

234 """Return the most recently instantiated FileSystem 

235 

236 If no instance has been created, then create one with defaults 

237 """ 

238 if cls._latest in cls._cache: 

239 return cls._cache[cls._latest] 

240 return cls() 

241 

242 @property 

243 def transaction(self): 

244 """A context within which files are committed together upon exit 

245 

246 Requires the file class to implement `.commit()` and `.discard()` 

247 for the normal and exception cases. 

248 """ 

249 if self._transaction is None: 

250 self._transaction = self.transaction_type(self) 

251 return self._transaction 

252 

253 def start_transaction(self): 

254 """Begin write transaction for deferring files, non-context version""" 

255 self._intrans = True 

256 self._transaction = self.transaction_type(self) 

257 return self.transaction 

258 

259 def end_transaction(self): 

260 """Finish write transaction, non-context version""" 

261 self.transaction.complete() 

262 self._transaction = None 

263 # The invalid cache must be cleared after the transaction is completed. 

264 for path in self._invalidated_caches_in_transaction: 

265 self.invalidate_cache(path) 

266 self._invalidated_caches_in_transaction.clear() 

267 

268 def invalidate_cache(self, path=None): 

269 """ 

270 Discard any cached directory information 

271 

272 Parameters 

273 ---------- 

274 path: string or None 

275 If None, clear all listings cached else listings at or under given 

276 path. 

277 """ 

278 # Not necessary to implement invalidation mechanism, may have no cache. 

279 # But if have, you should call this method of parent class from your 

280 # subclass to ensure expiring caches after transacations correctly. 

281 # See the implementation of FTPFileSystem in ftp.py 

282 if self._intrans: 

283 self._invalidated_caches_in_transaction.append(path) 

284 

285 def mkdir(self, path, create_parents=True, **kwargs): 

286 """ 

287 Create directory entry at path 

288 

289 For systems that don't have true directories, may create an for 

290 this instance only and not touch the real filesystem 

291 

292 Parameters 

293 ---------- 

294 path: str 

295 location 

296 create_parents: bool 

297 if True, this is equivalent to ``makedirs`` 

298 kwargs: 

299 may be permissions, etc. 

300 """ 

301 pass # not necessary to implement, may not have directories 

302 

303 def makedirs(self, path, exist_ok=False): 

304 """Recursively make directories 

305 

306 Creates directory at path and any intervening required directories. 

307 Raises exception if, for instance, the path already exists but is a 

308 file. 

309 

310 Parameters 

311 ---------- 

312 path: str 

313 leaf directory name 

314 exist_ok: bool (False) 

315 If False, will error if the target already exists 

316 """ 

317 pass # not necessary to implement, may not have directories 

318 

319 def rmdir(self, path): 

320 """Remove a directory, if empty""" 

321 pass # not necessary to implement, may not have directories 

322 

323 def ls(self, path, detail=True, **kwargs): 

324 """List objects at path. 

325 

326 This should include subdirectories and files at that location. The 

327 difference between a file and a directory must be clear when details 

328 are requested. 

329 

330 The specific keys, or perhaps a FileInfo class, or similar, is TBD, 

331 but must be consistent across implementations. 

332 Must include: 

333 

334 - full path to the entry (without protocol) 

335 - size of the entry, in bytes. If the value cannot be determined, will 

336 be ``None``. 

337 - type of entry, "file", "directory" or other 

338 

339 Additional information 

340 may be present, appropriate to the file-system, e.g., generation, 

341 checksum, etc. 

342 

343 May use refresh=True|False to allow use of self._ls_from_cache to 

344 check for a saved listing and avoid calling the backend. This would be 

345 common where listing may be expensive. 

346 

347 Parameters 

348 ---------- 

349 path: str 

350 detail: bool 

351 if True, gives a list of dictionaries, where each is the same as 

352 the result of ``info(path)``. If False, gives a list of paths 

353 (str). 

354 kwargs: may have additional backend-specific options, such as version 

355 information 

356 

357 Returns 

358 ------- 

359 List of strings if detail is False, or list of directory information 

360 dicts if detail is True. 

361 """ 

362 raise NotImplementedError 

363 

364 def _ls_from_cache(self, path): 

365 """Check cache for listing 

366 

367 Returns listing, if found (may be empty list for a directly that exists 

368 but contains nothing), None if not in cache. 

369 """ 

370 parent = self._parent(path) 

371 try: 

372 return self.dircache[path.rstrip("/")] 

373 except KeyError: 

374 pass 

375 try: 

376 files = [ 

377 f 

378 for f in self.dircache[parent] 

379 if f["name"] == path 

380 or (f["name"] == path.rstrip("/") and f["type"] == "directory") 

381 ] 

382 if len(files) == 0: 

383 # parent dir was listed but did not contain this file 

384 raise FileNotFoundError(path) 

385 return files 

386 except KeyError: 

387 pass 

388 

389 def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): 

390 """Return all files under the given path. 

391 

392 List all files, recursing into subdirectories; output is iterator-style, 

393 like ``os.walk()``. For a simple list of files, ``find()`` is available. 

394 

395 When topdown is True, the caller can modify the dirnames list in-place (perhaps 

396 using del or slice assignment), and walk() will 

397 only recurse into the subdirectories whose names remain in dirnames; 

398 this can be used to prune the search, impose a specific order of visiting, 

399 or even to inform walk() about directories the caller creates or renames before 

400 it resumes walk() again. 

401 Modifying dirnames when topdown is False has no effect. (see os.walk) 

402 

403 Note that the "files" outputted will include anything that is not 

404 a directory, such as links. 

405 

406 Parameters 

407 ---------- 

408 path: str 

409 Root to recurse into 

410 maxdepth: int 

411 Maximum recursion depth. None means limitless, but not recommended 

412 on link-based file-systems. 

413 topdown: bool (True) 

414 Whether to walk the directory tree from the top downwards or from 

415 the bottom upwards. 

416 on_error: "omit", "raise", a callable 

417 if omit (default), path with exception will simply be empty; 

418 If raise, an underlying exception will be raised; 

419 if callable, it will be called with a single OSError instance as argument 

420 kwargs: passed to ``ls`` 

421 """ 

422 if maxdepth is not None and maxdepth < 1: 

423 raise ValueError("maxdepth must be at least 1") 

424 

425 path = self._strip_protocol(path) 

426 full_dirs = {} 

427 dirs = {} 

428 files = {} 

429 

430 detail = kwargs.pop("detail", False) 

431 try: 

432 listing = self.ls(path, detail=True, **kwargs) 

433 except (FileNotFoundError, OSError) as e: 

434 if on_error == "raise": 

435 raise 

436 if callable(on_error): 

437 on_error(e) 

438 return 

439 

440 for info in listing: 

441 # each info name must be at least [path]/part , but here 

442 # we check also for names like [path]/part/ 

443 pathname = info["name"].rstrip("/") 

444 name = pathname.rsplit("/", 1)[-1] 

445 if info["type"] == "directory" and pathname != path: 

446 # do not include "self" path 

447 full_dirs[name] = pathname 

448 dirs[name] = info 

449 elif pathname == path: 

450 # file-like with same name as give path 

451 files[""] = info 

452 else: 

453 files[name] = info 

454 

455 if not detail: 

456 dirs = list(dirs) 

457 files = list(files) 

458 

459 if topdown: 

460 # Yield before recursion if walking top down 

461 yield path, dirs, files 

462 

463 if maxdepth is not None: 

464 maxdepth -= 1 

465 if maxdepth < 1: 

466 if not topdown: 

467 yield path, dirs, files 

468 return 

469 

470 for d in dirs: 

471 yield from self.walk( 

472 full_dirs[d], 

473 maxdepth=maxdepth, 

474 detail=detail, 

475 topdown=topdown, 

476 **kwargs, 

477 ) 

478 

479 if not topdown: 

480 # Yield after recursion if walking bottom up 

481 yield path, dirs, files 

482 

483 def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): 

484 """List all files below path. 

485 

486 Like posix ``find`` command without conditions 

487 

488 Parameters 

489 ---------- 

490 path : str 

491 maxdepth: int or None 

492 If not None, the maximum number of levels to descend 

493 withdirs: bool 

494 Whether to include directory paths in the output. This is True 

495 when used by glob, but users usually only want files. 

496 kwargs are passed to ``ls``. 

497 """ 

498 # TODO: allow equivalent of -name parameter 

499 path = self._strip_protocol(path) 

500 out = {} 

501 

502 # Add the root directory if withdirs is requested 

503 # This is needed for posix glob compliance 

504 if withdirs and path != "" and self.isdir(path): 

505 out[path] = self.info(path) 

506 

507 for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): 

508 if withdirs: 

509 files.update(dirs) 

510 out.update({info["name"]: info for name, info in files.items()}) 

511 if not out and self.isfile(path): 

512 # walk works on directories, but find should also return [path] 

513 # when path happens to be a file 

514 out[path] = {} 

515 names = sorted(out) 

516 if not detail: 

517 return names 

518 else: 

519 return {name: out[name] for name in names} 

520 

521 def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): 

522 """Space used by files and optionally directories within a path 

523 

524 Directory size does not include the size of its contents. 

525 

526 Parameters 

527 ---------- 

528 path: str 

529 total: bool 

530 Whether to sum all the file sizes 

531 maxdepth: int or None 

532 Maximum number of directory levels to descend, None for unlimited. 

533 withdirs: bool 

534 Whether to include directory paths in the output. 

535 kwargs: passed to ``find`` 

536 

537 Returns 

538 ------- 

539 Dict of {path: size} if total=False, or int otherwise, where numbers 

540 refer to bytes used. 

541 """ 

542 sizes = {} 

543 if withdirs and self.isdir(path): 

544 # Include top-level directory in output 

545 info = self.info(path) 

546 sizes[info["name"]] = info["size"] 

547 for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs): 

548 info = self.info(f) 

549 sizes[info["name"]] = info["size"] 

550 if total: 

551 return sum(sizes.values()) 

552 else: 

553 return sizes 

554 

555 def glob(self, path, maxdepth=None, **kwargs): 

556 """Find files by glob-matching. 

557 

558 Pattern matching capabilities for finding files that match the given pattern. 

559 

560 Parameters 

561 ---------- 

562 path: str 

563 The glob pattern to match against 

564 maxdepth: int or None 

565 Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found. 

566 Must be at least 1 if provided. 

567 kwargs: 

568 Additional arguments passed to ``find`` (e.g., detail=True) 

569 

570 Returns 

571 ------- 

572 List of matched paths, or dict of paths and their info if detail=True 

573 

574 Notes 

575 ----- 

576 Supported patterns: 

577 - '*': Matches any sequence of characters within a single directory level 

578 - ``'**'``: Matches any number of directory levels (must be an entire path component) 

579 - '?': Matches exactly one character 

580 - '[abc]': Matches any character in the set 

581 - '[a-z]': Matches any character in the range 

582 - '[!abc]': Matches any character NOT in the set 

583 

584 Special behaviors: 

585 - If the path ends with '/', only folders are returned 

586 - Consecutive '*' characters are compressed into a single '*' 

587 - Empty brackets '[]' never match anything 

588 - Negated empty brackets '[!]' match any single character 

589 - Special characters in character classes are escaped properly 

590 

591 Limitations: 

592 - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``) 

593 - No brace expansion ('{a,b}.txt') 

594 - No extended glob patterns ('+(pattern)', '!(pattern)') 

595 """ 

596 if maxdepth is not None and maxdepth < 1: 

597 raise ValueError("maxdepth must be at least 1") 

598 

599 import re 

600 

601 seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,) 

602 ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash 

603 path = self._strip_protocol(path) 

604 append_slash_to_dirname = ends_with_sep or path.endswith( 

605 tuple(sep + "**" for sep in seps) 

606 ) 

607 idx_star = path.find("*") if path.find("*") >= 0 else len(path) 

608 idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) 

609 idx_brace = path.find("[") if path.find("[") >= 0 else len(path) 

610 

611 min_idx = min(idx_star, idx_qmark, idx_brace) 

612 

613 detail = kwargs.pop("detail", False) 

614 withdirs = kwargs.pop("withdirs", True) 

615 

616 if not has_magic(path): 

617 if self.exists(path, **kwargs): 

618 if not detail: 

619 return [path] 

620 else: 

621 return {path: self.info(path, **kwargs)} 

622 else: 

623 if not detail: 

624 return [] # glob of non-existent returns empty 

625 else: 

626 return {} 

627 elif "/" in path[:min_idx]: 

628 min_idx = path[:min_idx].rindex("/") 

629 root = path[: min_idx + 1] 

630 depth = path[min_idx + 1 :].count("/") + 1 

631 else: 

632 root = "" 

633 depth = path[min_idx + 1 :].count("/") + 1 

634 

635 if "**" in path: 

636 if maxdepth is not None: 

637 idx_double_stars = path.find("**") 

638 depth_double_stars = path[idx_double_stars:].count("/") + 1 

639 depth = depth - depth_double_stars + maxdepth 

640 else: 

641 depth = None 

642 

643 allpaths = self.find( 

644 root, maxdepth=depth, withdirs=withdirs, detail=True, **kwargs 

645 ) 

646 

647 pattern = glob_translate(path + ("/" if ends_with_sep else "")) 

648 pattern = re.compile(pattern) 

649 

650 out = { 

651 p: info 

652 for p, info in sorted(allpaths.items()) 

653 if pattern.match( 

654 p + "/" 

655 if append_slash_to_dirname and info["type"] == "directory" 

656 else p 

657 ) 

658 } 

659 

660 if detail: 

661 return out 

662 else: 

663 return list(out) 

664 

665 def exists(self, path, **kwargs): 

666 """Is there a file at the given path""" 

667 try: 

668 self.info(path, **kwargs) 

669 return True 

670 except: # noqa: E722 

671 # any exception allowed bar FileNotFoundError? 

672 return False 

673 

674 def lexists(self, path, **kwargs): 

675 """If there is a file at the given path (including 

676 broken links)""" 

677 return self.exists(path) 

678 

679 def info(self, path, **kwargs): 

680 """Give details of entry at path 

681 

682 Returns a single dictionary, with exactly the same information as ``ls`` 

683 would with ``detail=True``. 

684 

685 The default implementation calls ls and could be overridden by a 

686 shortcut. kwargs are passed on to ```ls()``. 

687 

688 Some file systems might not be able to measure the file's size, in 

689 which case, the returned dict will include ``'size': None``. 

690 

691 Returns 

692 ------- 

693 dict with keys: name (full path in the FS), size (in bytes), type (file, 

694 directory, or something else) and other FS-specific keys. 

695 """ 

696 path = self._strip_protocol(path) 

697 out = self.ls(self._parent(path), detail=True, **kwargs) 

698 out = [o for o in out if o["name"].rstrip("/") == path] 

699 if out: 

700 return out[0] 

701 out = self.ls(path, detail=True, **kwargs) 

702 path = path.rstrip("/") 

703 out1 = [o for o in out if o["name"].rstrip("/") == path] 

704 if len(out1) == 1: 

705 if "size" not in out1[0]: 

706 out1[0]["size"] = None 

707 return out1[0] 

708 elif len(out1) > 1 or out: 

709 return {"name": path, "size": 0, "type": "directory"} 

710 else: 

711 raise FileNotFoundError(path) 

712 

713 def checksum(self, path): 

714 """Unique value for current version of file 

715 

716 If the checksum is the same from one moment to another, the contents 

717 are guaranteed to be the same. If the checksum changes, the contents 

718 *might* have changed. 

719 

720 This should normally be overridden; default will probably capture 

721 creation/modification timestamp (which would be good) or maybe 

722 access timestamp (which would be bad) 

723 """ 

724 return int(tokenize(self.info(path)), 16) 

725 

726 def size(self, path): 

727 """Size in bytes of file""" 

728 return self.info(path).get("size", None) 

729 

730 def sizes(self, paths): 

731 """Size in bytes of each file in a list of paths""" 

732 return [self.size(p) for p in paths] 

733 

734 def isdir(self, path): 

735 """Is this entry directory-like?""" 

736 try: 

737 return self.info(path)["type"] == "directory" 

738 except OSError: 

739 return False 

740 

741 def isfile(self, path): 

742 """Is this entry file-like?""" 

743 try: 

744 return self.info(path)["type"] == "file" 

745 except: # noqa: E722 

746 return False 

747 

748 def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs): 

749 """Get the contents of the file as a string. 

750 

751 Parameters 

752 ---------- 

753 path: str 

754 URL of file on this filesystems 

755 encoding, errors, newline: same as `open`. 

756 """ 

757 with self.open( 

758 path, 

759 mode="r", 

760 encoding=encoding, 

761 errors=errors, 

762 newline=newline, 

763 **kwargs, 

764 ) as f: 

765 return f.read() 

766 

767 def write_text( 

768 self, path, value, encoding=None, errors=None, newline=None, **kwargs 

769 ): 

770 """Write the text to the given file. 

771 

772 An existing file will be overwritten. 

773 

774 Parameters 

775 ---------- 

776 path: str 

777 URL of file on this filesystems 

778 value: str 

779 Text to write. 

780 encoding, errors, newline: same as `open`. 

781 """ 

782 with self.open( 

783 path, 

784 mode="w", 

785 encoding=encoding, 

786 errors=errors, 

787 newline=newline, 

788 **kwargs, 

789 ) as f: 

790 return f.write(value) 

791 

792 def cat_file(self, path, start=None, end=None, **kwargs): 

793 """Get the content of a file 

794 

795 Parameters 

796 ---------- 

797 path: URL of file on this filesystems 

798 start, end: int 

799 Bytes limits of the read. If negative, backwards from end, 

800 like usual python slices. Either can be None for start or 

801 end of file, respectively 

802 kwargs: passed to ``open()``. 

803 """ 

804 # explicitly set buffering off? 

805 with self.open(path, "rb", **kwargs) as f: 

806 if start is not None: 

807 if start >= 0: 

808 f.seek(start) 

809 else: 

810 f.seek(max(0, f.size + start)) 

811 if end is not None: 

812 if end < 0: 

813 end = f.size + end 

814 return f.read(end - f.tell()) 

815 return f.read() 

816 

817 def pipe_file(self, path, value, mode="overwrite", **kwargs): 

818 """Set the bytes of given file""" 

819 if mode == "create" and self.exists(path): 

820 # non-atomic but simple way; or could use "xb" in open(), which is likely 

821 # not as well supported 

822 raise FileExistsError 

823 with self.open(path, "wb", **kwargs) as f: 

824 f.write(value) 

825 

826 def pipe(self, path, value=None, **kwargs): 

827 """Put value into path 

828 

829 (counterpart to ``cat``) 

830 

831 Parameters 

832 ---------- 

833 path: string or dict(str, bytes) 

834 If a string, a single remote location to put ``value`` bytes; if a dict, 

835 a mapping of {path: bytesvalue}. 

836 value: bytes, optional 

837 If using a single path, these are the bytes to put there. Ignored if 

838 ``path`` is a dict 

839 """ 

840 if isinstance(path, str): 

841 self.pipe_file(self._strip_protocol(path), value, **kwargs) 

842 elif isinstance(path, dict): 

843 for k, v in path.items(): 

844 self.pipe_file(self._strip_protocol(k), v, **kwargs) 

845 else: 

846 raise ValueError("path must be str or dict") 

847 

848 def cat_ranges( 

849 self, paths, starts, ends, max_gap=None, on_error="return", **kwargs 

850 ): 

851 """Get the contents of byte ranges from one or more files 

852 

853 Parameters 

854 ---------- 

855 paths: list 

856 A list of of filepaths on this filesystems 

857 starts, ends: int or list 

858 Bytes limits of the read. If using a single int, the same value will be 

859 used to read all the specified files. 

860 """ 

861 if max_gap is not None: 

862 raise NotImplementedError 

863 if not isinstance(paths, list): 

864 raise TypeError 

865 if not isinstance(starts, list): 

866 starts = [starts] * len(paths) 

867 if not isinstance(ends, list): 

868 ends = [ends] * len(paths) 

869 if len(starts) != len(paths) or len(ends) != len(paths): 

870 raise ValueError 

871 out = [] 

872 for p, s, e in zip(paths, starts, ends): 

873 try: 

874 out.append(self.cat_file(p, s, e)) 

875 except Exception as e: 

876 if on_error == "return": 

877 out.append(e) 

878 else: 

879 raise 

880 return out 

881 

882 def cat(self, path, recursive=False, on_error="raise", **kwargs): 

883 """Fetch (potentially multiple) paths' contents 

884 

885 Parameters 

886 ---------- 

887 recursive: bool 

888 If True, assume the path(s) are directories, and get all the 

889 contained files 

890 on_error : "raise", "omit", "return" 

891 If raise, an underlying exception will be raised (converted to KeyError 

892 if the type is in self.missing_exceptions); if omit, keys with exception 

893 will simply not be included in the output; if "return", all keys are 

894 included in the output, but the value will be bytes or an exception 

895 instance. 

896 kwargs: passed to cat_file 

897 

898 Returns 

899 ------- 

900 dict of {path: contents} if there are multiple paths 

901 or the path has been otherwise expanded 

902 """ 

903 paths = self.expand_path(path, recursive=recursive, **kwargs) 

904 if ( 

905 len(paths) > 1 

906 or isinstance(path, list) 

907 or paths[0] != self._strip_protocol(path) 

908 ): 

909 out = {} 

910 for path in paths: 

911 try: 

912 out[path] = self.cat_file(path, **kwargs) 

913 except Exception as e: 

914 if on_error == "raise": 

915 raise 

916 if on_error == "return": 

917 out[path] = e 

918 return out 

919 else: 

920 return self.cat_file(paths[0], **kwargs) 

921 

922 def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs): 

923 """Copy single remote file to local""" 

924 from .implementations.local import LocalFileSystem 

925 

926 if isfilelike(lpath): 

927 outfile = lpath 

928 elif self.isdir(rpath): 

929 os.makedirs(lpath, exist_ok=True) 

930 return None 

931 

932 fs = LocalFileSystem(auto_mkdir=True) 

933 fs.makedirs(fs._parent(lpath), exist_ok=True) 

934 

935 with self.open(rpath, "rb", **kwargs) as f1: 

936 if outfile is None: 

937 outfile = open(lpath, "wb") 

938 

939 try: 

940 callback.set_size(getattr(f1, "size", None)) 

941 data = True 

942 while data: 

943 data = f1.read(self.blocksize) 

944 segment_len = outfile.write(data) 

945 if segment_len is None: 

946 segment_len = len(data) 

947 callback.relative_update(segment_len) 

948 finally: 

949 if not isfilelike(lpath): 

950 outfile.close() 

951 

952 def get( 

953 self, 

954 rpath, 

955 lpath, 

956 recursive=False, 

957 callback=DEFAULT_CALLBACK, 

958 maxdepth=None, 

959 **kwargs, 

960 ): 

961 """Copy file(s) to local. 

962 

963 Copies a specific file or tree of files (if recursive=True). If lpath 

964 ends with a "/", it will be assumed to be a directory, and target files 

965 will go within. Can submit a list of paths, which may be glob-patterns 

966 and will be expanded. 

967 

968 Calls get_file for each source. 

969 """ 

970 if isinstance(lpath, list) and isinstance(rpath, list): 

971 # No need to expand paths when both source and destination 

972 # are provided as lists 

973 rpaths = rpath 

974 lpaths = lpath 

975 else: 

976 from .implementations.local import ( 

977 LocalFileSystem, 

978 make_path_posix, 

979 trailing_sep, 

980 ) 

981 

982 source_is_str = isinstance(rpath, str) 

983 rpaths = self.expand_path( 

984 rpath, recursive=recursive, maxdepth=maxdepth, **kwargs 

985 ) 

986 if source_is_str and (not recursive or maxdepth is not None): 

987 # Non-recursive glob does not copy directories 

988 rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))] 

989 if not rpaths: 

990 return 

991 

992 if isinstance(lpath, str): 

993 lpath = make_path_posix(lpath) 

994 

995 source_is_file = len(rpaths) == 1 

996 dest_is_dir = isinstance(lpath, str) and ( 

997 trailing_sep(lpath) or LocalFileSystem().isdir(lpath) 

998 ) 

999 

1000 exists = source_is_str and ( 

1001 (has_magic(rpath) and source_is_file) 

1002 or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) 

1003 ) 

1004 lpaths = other_paths( 

1005 rpaths, 

1006 lpath, 

1007 exists=exists, 

1008 flatten=not source_is_str, 

1009 ) 

1010 

1011 callback.set_size(len(lpaths)) 

1012 for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): 

1013 with callback.branched(rpath, lpath) as child: 

1014 self.get_file(rpath, lpath, callback=child, **kwargs) 

1015 

1016 def put_file( 

1017 self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs 

1018 ): 

1019 """Copy single file to remote""" 

1020 if mode == "create" and self.exists(rpath): 

1021 raise FileExistsError 

1022 if os.path.isdir(lpath): 

1023 self.makedirs(rpath, exist_ok=True) 

1024 return None 

1025 

1026 with open(lpath, "rb") as f1: 

1027 size = f1.seek(0, 2) 

1028 callback.set_size(size) 

1029 f1.seek(0) 

1030 

1031 self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True) 

1032 with self.open(rpath, "wb", **kwargs) as f2: 

1033 while f1.tell() < size: 

1034 data = f1.read(self.blocksize) 

1035 segment_len = f2.write(data) 

1036 if segment_len is None: 

1037 segment_len = len(data) 

1038 callback.relative_update(segment_len) 

1039 

1040 def put( 

1041 self, 

1042 lpath, 

1043 rpath, 

1044 recursive=False, 

1045 callback=DEFAULT_CALLBACK, 

1046 maxdepth=None, 

1047 **kwargs, 

1048 ): 

1049 """Copy file(s) from local. 

1050 

1051 Copies a specific file or tree of files (if recursive=True). If rpath 

1052 ends with a "/", it will be assumed to be a directory, and target files 

1053 will go within. 

1054 

1055 Calls put_file for each source. 

1056 """ 

1057 if isinstance(lpath, list) and isinstance(rpath, list): 

1058 # No need to expand paths when both source and destination 

1059 # are provided as lists 

1060 rpaths = rpath 

1061 lpaths = lpath 

1062 else: 

1063 from .implementations.local import ( 

1064 LocalFileSystem, 

1065 make_path_posix, 

1066 trailing_sep, 

1067 ) 

1068 

1069 source_is_str = isinstance(lpath, str) 

1070 if source_is_str: 

1071 lpath = make_path_posix(lpath) 

1072 fs = LocalFileSystem() 

1073 lpaths = fs.expand_path( 

1074 lpath, recursive=recursive, maxdepth=maxdepth, **kwargs 

1075 ) 

1076 if source_is_str and (not recursive or maxdepth is not None): 

1077 # Non-recursive glob does not copy directories 

1078 lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] 

1079 if not lpaths: 

1080 return 

1081 

1082 source_is_file = len(lpaths) == 1 

1083 dest_is_dir = isinstance(rpath, str) and ( 

1084 trailing_sep(rpath) or self.isdir(rpath) 

1085 ) 

1086 

1087 rpath = ( 

1088 self._strip_protocol(rpath) 

1089 if isinstance(rpath, str) 

1090 else [self._strip_protocol(p) for p in rpath] 

1091 ) 

1092 exists = source_is_str and ( 

1093 (has_magic(lpath) and source_is_file) 

1094 or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) 

1095 ) 

1096 rpaths = other_paths( 

1097 lpaths, 

1098 rpath, 

1099 exists=exists, 

1100 flatten=not source_is_str, 

1101 ) 

1102 

1103 callback.set_size(len(rpaths)) 

1104 for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): 

1105 with callback.branched(lpath, rpath) as child: 

1106 self.put_file(lpath, rpath, callback=child, **kwargs) 

1107 

1108 def head(self, path, size=1024): 

1109 """Get the first ``size`` bytes from file""" 

1110 with self.open(path, "rb") as f: 

1111 return f.read(size) 

1112 

1113 def tail(self, path, size=1024): 

1114 """Get the last ``size`` bytes from file""" 

1115 with self.open(path, "rb") as f: 

1116 f.seek(max(-size, -f.size), 2) 

1117 return f.read() 

1118 

1119 def cp_file(self, path1, path2, **kwargs): 

1120 raise NotImplementedError 

1121 

1122 def copy( 

1123 self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs 

1124 ): 

1125 """Copy within two locations in the filesystem 

1126 

1127 on_error : "raise", "ignore" 

1128 If raise, any not-found exceptions will be raised; if ignore any 

1129 not-found exceptions will cause the path to be skipped; defaults to 

1130 raise unless recursive is true, where the default is ignore 

1131 """ 

1132 if on_error is None and recursive: 

1133 on_error = "ignore" 

1134 elif on_error is None: 

1135 on_error = "raise" 

1136 

1137 if isinstance(path1, list) and isinstance(path2, list): 

1138 # No need to expand paths when both source and destination 

1139 # are provided as lists 

1140 paths1 = path1 

1141 paths2 = path2 

1142 else: 

1143 from .implementations.local import trailing_sep 

1144 

1145 source_is_str = isinstance(path1, str) 

1146 paths1 = self.expand_path( 

1147 path1, recursive=recursive, maxdepth=maxdepth, **kwargs 

1148 ) 

1149 if source_is_str and (not recursive or maxdepth is not None): 

1150 # Non-recursive glob does not copy directories 

1151 paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))] 

1152 if not paths1: 

1153 return 

1154 

1155 source_is_file = len(paths1) == 1 

1156 dest_is_dir = isinstance(path2, str) and ( 

1157 trailing_sep(path2) or self.isdir(path2) 

1158 ) 

1159 

1160 exists = source_is_str and ( 

1161 (has_magic(path1) and source_is_file) 

1162 or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) 

1163 ) 

1164 paths2 = other_paths( 

1165 paths1, 

1166 path2, 

1167 exists=exists, 

1168 flatten=not source_is_str, 

1169 ) 

1170 

1171 for p1, p2 in zip(paths1, paths2): 

1172 try: 

1173 self.cp_file(p1, p2, **kwargs) 

1174 except FileNotFoundError: 

1175 if on_error == "raise": 

1176 raise 

1177 

1178 def expand_path( 

1179 self, path, recursive=False, maxdepth=None, assume_literal=False, **kwargs 

1180 ): 

1181 """Turn one or more globs or directories into a list of all matching paths 

1182 to files or directories. 

1183 

1184 kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls`` 

1185 """ 

1186 

1187 if maxdepth is not None and maxdepth < 1: 

1188 raise ValueError("maxdepth must be at least 1") 

1189 

1190 if isinstance(path, (str, os.PathLike)): 

1191 out = self.expand_path([path], recursive, maxdepth, **kwargs) 

1192 else: 

1193 out = set() 

1194 path = [self._strip_protocol(p) for p in path] 

1195 for p in path: 

1196 if not assume_literal and has_magic(p): 

1197 bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) 

1198 out |= bit 

1199 if recursive: 

1200 # glob call above expanded one depth so if maxdepth is defined 

1201 # then decrement it in expand_path call below. If it is zero 

1202 # after decrementing then avoid expand_path call. 

1203 if maxdepth is not None and maxdepth <= 1: 

1204 continue 

1205 out |= set( 

1206 self.expand_path( 

1207 list(bit), 

1208 recursive=recursive, 

1209 maxdepth=maxdepth - 1 if maxdepth is not None else None, 

1210 assume_literal=True, 

1211 **kwargs, 

1212 ) 

1213 ) 

1214 continue 

1215 elif recursive: 

1216 rec = set( 

1217 self.find( 

1218 p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs 

1219 ) 

1220 ) 

1221 out |= rec 

1222 if p not in out and (recursive is False or self.exists(p)): 

1223 # should only check once, for the root 

1224 out.add(p) 

1225 if not out: 

1226 raise FileNotFoundError(path) 

1227 return sorted(out) 

1228 

1229 def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): 

1230 """Move file(s) from one location to another""" 

1231 if path1 == path2: 

1232 logger.debug("%s mv: The paths are the same, so no files were moved.", self) 

1233 else: 

1234 # explicitly raise exception to prevent data corruption 

1235 self.copy( 

1236 path1, path2, recursive=recursive, maxdepth=maxdepth, on_error="raise" 

1237 ) 

1238 self.rm(path1, recursive=recursive) 

1239 

1240 def rm_file(self, path): 

1241 """Delete a file""" 

1242 self._rm(path) 

1243 

1244 def _rm(self, path): 

1245 """Delete one file""" 

1246 # this is the old name for the method, prefer rm_file 

1247 raise NotImplementedError 

1248 

1249 def rm(self, path, recursive=False, maxdepth=None): 

1250 """Delete files. 

1251 

1252 Parameters 

1253 ---------- 

1254 path: str or list of str 

1255 File(s) to delete. 

1256 recursive: bool 

1257 If file(s) are directories, recursively delete contents and then 

1258 also remove the directory 

1259 maxdepth: int or None 

1260 Depth to pass to walk for finding files to delete, if recursive. 

1261 If None, there will be no limit and infinite recursion may be 

1262 possible. 

1263 """ 

1264 path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) 

1265 for p in reversed(path): 

1266 self.rm_file(p) 

1267 

1268 @classmethod 

1269 def _parent(cls, path): 

1270 path = cls._strip_protocol(path) 

1271 if "/" in path: 

1272 parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker) 

1273 return cls.root_marker + parent 

1274 else: 

1275 return cls.root_marker 

1276 

1277 def _open( 

1278 self, 

1279 path, 

1280 mode="rb", 

1281 block_size=None, 

1282 autocommit=True, 

1283 cache_options=None, 

1284 **kwargs, 

1285 ): 

1286 """Return raw bytes-mode file-like from the file-system""" 

1287 return AbstractBufferedFile( 

1288 self, 

1289 path, 

1290 mode, 

1291 block_size, 

1292 autocommit, 

1293 cache_options=cache_options, 

1294 **kwargs, 

1295 ) 

1296 

1297 def open( 

1298 self, 

1299 path, 

1300 mode="rb", 

1301 block_size=None, 

1302 cache_options=None, 

1303 compression=None, 

1304 **kwargs, 

1305 ): 

1306 """ 

1307 Return a file-like object from the filesystem 

1308 

1309 The resultant instance must function correctly in a context ``with`` 

1310 block. 

1311 

1312 Parameters 

1313 ---------- 

1314 path: str 

1315 Target file 

1316 mode: str like 'rb', 'w' 

1317 See builtin ``open()`` 

1318 Mode "x" (exclusive write) may be implemented by the backend. Even if 

1319 it is, whether it is checked up front or on commit, and whether it is 

1320 atomic is implementation-dependent. 

1321 block_size: int 

1322 Some indication of buffering - this is a value in bytes 

1323 cache_options : dict, optional 

1324 Extra arguments to pass through to the cache. 

1325 compression: string or None 

1326 If given, open file using compression codec. Can either be a compression 

1327 name (a key in ``fsspec.compression.compr``) or "infer" to guess the 

1328 compression from the filename suffix. 

1329 encoding, errors, newline: passed on to TextIOWrapper for text mode 

1330 """ 

1331 import io 

1332 

1333 path = self._strip_protocol(path) 

1334 if "b" not in mode: 

1335 mode = mode.replace("t", "") + "b" 

1336 

1337 text_kwargs = { 

1338 k: kwargs.pop(k) 

1339 for k in ["encoding", "errors", "newline"] 

1340 if k in kwargs 

1341 } 

1342 return io.TextIOWrapper( 

1343 self.open( 

1344 path, 

1345 mode, 

1346 block_size=block_size, 

1347 cache_options=cache_options, 

1348 compression=compression, 

1349 **kwargs, 

1350 ), 

1351 **text_kwargs, 

1352 ) 

1353 else: 

1354 ac = kwargs.pop("autocommit", not self._intrans) 

1355 f = self._open( 

1356 path, 

1357 mode=mode, 

1358 block_size=block_size, 

1359 autocommit=ac, 

1360 cache_options=cache_options, 

1361 **kwargs, 

1362 ) 

1363 if compression is not None: 

1364 from fsspec.compression import compr 

1365 from fsspec.core import get_compression 

1366 

1367 compression = get_compression(path, compression) 

1368 compress = compr[compression] 

1369 f = compress(f, mode=mode[0]) 

1370 

1371 if not ac and "r" not in mode: 

1372 self.transaction.files.append(f) 

1373 return f 

1374 

1375 def touch(self, path, truncate=True, **kwargs): 

1376 """Create empty file, or update timestamp 

1377 

1378 Parameters 

1379 ---------- 

1380 path: str 

1381 file location 

1382 truncate: bool 

1383 If True, always set file size to 0; if False, update timestamp and 

1384 leave file unchanged, if backend allows this 

1385 """ 

1386 if truncate or not self.exists(path): 

1387 with self.open(path, "wb", **kwargs): 

1388 pass 

1389 else: 

1390 raise NotImplementedError # update timestamp, if possible 

1391 

1392 def ukey(self, path): 

1393 """Hash of file properties, to tell if it has changed""" 

1394 return sha256(str(self.info(path)).encode()).hexdigest() 

1395 

1396 def read_block(self, fn, offset, length, delimiter=None): 

1397 """Read a block of bytes from 

1398 

1399 Starting at ``offset`` of the file, read ``length`` bytes. If 

1400 ``delimiter`` is set then we ensure that the read starts and stops at 

1401 delimiter boundaries that follow the locations ``offset`` and ``offset 

1402 + length``. If ``offset`` is zero then we start at zero. The 

1403 bytestring returned WILL include the end delimiter string. 

1404 

1405 If offset+length is beyond the eof, reads to eof. 

1406 

1407 Parameters 

1408 ---------- 

1409 fn: string 

1410 Path to filename 

1411 offset: int 

1412 Byte offset to start read 

1413 length: int 

1414 Number of bytes to read. If None, read to end. 

1415 delimiter: bytes (optional) 

1416 Ensure reading starts and stops at delimiter bytestring 

1417 

1418 Examples 

1419 -------- 

1420 >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP 

1421 b'Alice, 100\\nBo' 

1422 >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP 

1423 b'Alice, 100\\nBob, 200\\n' 

1424 

1425 Use ``length=None`` to read to the end of the file. 

1426 >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP 

1427 b'Alice, 100\\nBob, 200\\nCharlie, 300' 

1428 

1429 See Also 

1430 -------- 

1431 :func:`fsspec.utils.read_block` 

1432 """ 

1433 with self.open(fn, "rb") as f: 

1434 size = f.size 

1435 if length is None: 

1436 length = size 

1437 if size is not None and offset + length > size: 

1438 length = size - offset 

1439 return read_block(f, offset, length, delimiter) 

1440 

1441 def to_json(self, *, include_password: bool = True) -> str: 

1442 """ 

1443 JSON representation of this filesystem instance. 

1444 

1445 Parameters 

1446 ---------- 

1447 include_password: bool, default True 

1448 Whether to include the password (if any) in the output. 

1449 

1450 Returns 

1451 ------- 

1452 JSON string with keys ``cls`` (the python location of this class), 

1453 protocol (text name of this class's protocol, first one in case of 

1454 multiple), ``args`` (positional args, usually empty), and all other 

1455 keyword arguments as their own keys. 

1456 

1457 Warnings 

1458 -------- 

1459 Serialized filesystems may contain sensitive information which have been 

1460 passed to the constructor, such as passwords and tokens. Make sure you 

1461 store and send them in a secure environment! 

1462 """ 

1463 from .json import FilesystemJSONEncoder 

1464 

1465 return json.dumps( 

1466 self, 

1467 cls=type( 

1468 "_FilesystemJSONEncoder", 

1469 (FilesystemJSONEncoder,), 

1470 {"include_password": include_password}, 

1471 ), 

1472 ) 

1473 

1474 @staticmethod 

1475 def from_json(blob: str) -> AbstractFileSystem: 

1476 """ 

1477 Recreate a filesystem instance from JSON representation. 

1478 

1479 See ``.to_json()`` for the expected structure of the input. 

1480 

1481 Parameters 

1482 ---------- 

1483 blob: str 

1484 

1485 Returns 

1486 ------- 

1487 file system instance, not necessarily of this particular class. 

1488 

1489 Warnings 

1490 -------- 

1491 This can import arbitrary modules (as determined by the ``cls`` key). 

1492 Make sure you haven't installed any modules that may execute malicious code 

1493 at import time. 

1494 """ 

1495 from .json import FilesystemJSONDecoder 

1496 

1497 return json.loads(blob, cls=FilesystemJSONDecoder) 

1498 

1499 def to_dict(self, *, include_password: bool = True) -> dict[str, Any]: 

1500 """ 

1501 JSON-serializable dictionary representation of this filesystem instance. 

1502 

1503 Parameters 

1504 ---------- 

1505 include_password: bool, default True 

1506 Whether to include the password (if any) in the output. 

1507 

1508 Returns 

1509 ------- 

1510 Dictionary with keys ``cls`` (the python location of this class), 

1511 protocol (text name of this class's protocol, first one in case of 

1512 multiple), ``args`` (positional args, usually empty), and all other 

1513 keyword arguments as their own keys. 

1514 

1515 Warnings 

1516 -------- 

1517 Serialized filesystems may contain sensitive information which have been 

1518 passed to the constructor, such as passwords and tokens. Make sure you 

1519 store and send them in a secure environment! 

1520 """ 

1521 from .json import FilesystemJSONEncoder 

1522 

1523 json_encoder = FilesystemJSONEncoder() 

1524 

1525 cls = type(self) 

1526 proto = self.protocol 

1527 

1528 storage_options = dict(self.storage_options) 

1529 if not include_password: 

1530 storage_options.pop("password", None) 

1531 

1532 return dict( 

1533 cls=f"{cls.__module__}:{cls.__name__}", 

1534 protocol=proto[0] if isinstance(proto, (tuple, list)) else proto, 

1535 args=json_encoder.make_serializable(self.storage_args), 

1536 **json_encoder.make_serializable(storage_options), 

1537 ) 

1538 

1539 @staticmethod 

1540 def from_dict(dct: dict[str, Any]) -> AbstractFileSystem: 

1541 """ 

1542 Recreate a filesystem instance from dictionary representation. 

1543 

1544 See ``.to_dict()`` for the expected structure of the input. 

1545 

1546 Parameters 

1547 ---------- 

1548 dct: Dict[str, Any] 

1549 

1550 Returns 

1551 ------- 

1552 file system instance, not necessarily of this particular class. 

1553 

1554 Warnings 

1555 -------- 

1556 This can import arbitrary modules (as determined by the ``cls`` key). 

1557 Make sure you haven't installed any modules that may execute malicious code 

1558 at import time. 

1559 """ 

1560 from .json import FilesystemJSONDecoder 

1561 

1562 json_decoder = FilesystemJSONDecoder() 

1563 

1564 dct = dict(dct) # Defensive copy 

1565 

1566 cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct) 

1567 if cls is None: 

1568 raise ValueError("Not a serialized AbstractFileSystem") 

1569 

1570 dct.pop("cls", None) 

1571 dct.pop("protocol", None) 

1572 

1573 return cls( 

1574 *json_decoder.unmake_serializable(dct.pop("args", ())), 

1575 **json_decoder.unmake_serializable(dct), 

1576 ) 

1577 

1578 def _get_pyarrow_filesystem(self): 

1579 """ 

1580 Make a version of the FS instance which will be acceptable to pyarrow 

1581 """ 

1582 # all instances already also derive from pyarrow 

1583 return self 

1584 

1585 def get_mapper(self, root="", check=False, create=False, missing_exceptions=None): 

1586 """Create key/value store based on this file-system 

1587 

1588 Makes a MutableMapping interface to the FS at the given root path. 

1589 See ``fsspec.mapping.FSMap`` for further details. 

1590 """ 

1591 from .mapping import FSMap 

1592 

1593 return FSMap( 

1594 root, 

1595 self, 

1596 check=check, 

1597 create=create, 

1598 missing_exceptions=missing_exceptions, 

1599 ) 

1600 

1601 @classmethod 

1602 def clear_instance_cache(cls): 

1603 """ 

1604 Clear the cache of filesystem instances. 

1605 

1606 Notes 

1607 ----- 

1608 Unless overridden by setting the ``cachable`` class attribute to False, 

1609 the filesystem class stores a reference to newly created instances. This 

1610 prevents Python's normal rules around garbage collection from working, 

1611 since the instances refcount will not drop to zero until 

1612 ``clear_instance_cache`` is called. 

1613 """ 

1614 cls._cache.clear() 

1615 

1616 def created(self, path): 

1617 """Return the created timestamp of a file as a datetime.datetime""" 

1618 raise NotImplementedError 

1619 

1620 def modified(self, path): 

1621 """Return the modified timestamp of a file as a datetime.datetime""" 

1622 raise NotImplementedError 

1623 

1624 def tree( 

1625 self, 

1626 path: str = "/", 

1627 recursion_limit: int = 2, 

1628 max_display: int = 25, 

1629 display_size: bool = False, 

1630 prefix: str = "", 

1631 is_last: bool = True, 

1632 first: bool = True, 

1633 indent_size: int = 4, 

1634 ) -> str: 

1635 """ 

1636 Return a tree-like structure of the filesystem starting from the given path as a string. 

1637 

1638 Parameters 

1639 ---------- 

1640 path: Root path to start traversal from 

1641 recursion_limit: Maximum depth of directory traversal 

1642 max_display: Maximum number of items to display per directory 

1643 display_size: Whether to display file sizes 

1644 prefix: Current line prefix for visual tree structure 

1645 is_last: Whether current item is last in its level 

1646 first: Whether this is the first call (displays root path) 

1647 indent_size: Number of spaces by indent 

1648 

1649 Returns 

1650 ------- 

1651 str: A string representing the tree structure. 

1652 

1653 Example 

1654 ------- 

1655 >>> from fsspec import filesystem 

1656 

1657 >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password') 

1658 >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10) 

1659 >>> print(tree) 

1660 """ 

1661 

1662 def format_bytes(n: int) -> str: 

1663 """Format bytes as text.""" 

1664 for prefix, k in ( 

1665 ("P", 2**50), 

1666 ("T", 2**40), 

1667 ("G", 2**30), 

1668 ("M", 2**20), 

1669 ("k", 2**10), 

1670 ): 

1671 if n >= 0.9 * k: 

1672 return f"{n / k:.2f} {prefix}b" 

1673 return f"{n}B" 

1674 

1675 result = [] 

1676 

1677 if first: 

1678 result.append(path) 

1679 

1680 if recursion_limit: 

1681 indent = " " * indent_size 

1682 contents = self.ls(path, detail=True) 

1683 contents.sort( 

1684 key=lambda x: (x.get("type") != "directory", x.get("name", "")) 

1685 ) 

1686 

1687 if max_display is not None and len(contents) > max_display: 

1688 displayed_contents = contents[:max_display] 

1689 remaining_count = len(contents) - max_display 

1690 else: 

1691 displayed_contents = contents 

1692 remaining_count = 0 

1693 

1694 for i, item in enumerate(displayed_contents): 

1695 is_last_item = (i == len(displayed_contents) - 1) and ( 

1696 remaining_count == 0 

1697 ) 

1698 

1699 branch = ( 

1700 "└" + ("─" * (indent_size - 2)) 

1701 if is_last_item 

1702 else "├" + ("─" * (indent_size - 2)) 

1703 ) 

1704 branch += " " 

1705 new_prefix = prefix + ( 

1706 indent if is_last_item else "│" + " " * (indent_size - 1) 

1707 ) 

1708 

1709 name = os.path.basename(item.get("name", "")) 

1710 

1711 if display_size and item.get("type") == "directory": 

1712 sub_contents = self.ls(item.get("name", ""), detail=True) 

1713 num_files = sum( 

1714 1 for sub_item in sub_contents if sub_item.get("type") == "file" 

1715 ) 

1716 num_folders = sum( 

1717 1 

1718 for sub_item in sub_contents 

1719 if sub_item.get("type") == "directory" 

1720 ) 

1721 

1722 if num_files == 0 and num_folders == 0: 

1723 size = " (empty folder)" 

1724 elif num_files == 0: 

1725 size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})" 

1726 elif num_folders == 0: 

1727 size = f" ({num_files} file{'s' if num_files > 1 else ''})" 

1728 else: 

1729 size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})" 

1730 elif display_size and item.get("type") == "file": 

1731 size = f" ({format_bytes(item.get('size', 0))})" 

1732 else: 

1733 size = "" 

1734 

1735 result.append(f"{prefix}{branch}{name}{size}") 

1736 

1737 if item.get("type") == "directory" and recursion_limit > 0: 

1738 result.append( 

1739 self.tree( 

1740 path=item.get("name", ""), 

1741 recursion_limit=recursion_limit - 1, 

1742 max_display=max_display, 

1743 display_size=display_size, 

1744 prefix=new_prefix, 

1745 is_last=is_last_item, 

1746 first=False, 

1747 indent_size=indent_size, 

1748 ) 

1749 ) 

1750 

1751 if remaining_count > 0: 

1752 more_message = f"{remaining_count} more item(s) not displayed." 

1753 result.append( 

1754 f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}" 

1755 ) 

1756 

1757 return "\n".join(_ for _ in result if _) 

1758 

1759 # ------------------------------------------------------------------------ 

1760 # Aliases 

1761 

1762 def read_bytes(self, path, start=None, end=None, **kwargs): 

1763 """Alias of `AbstractFileSystem.cat_file`.""" 

1764 return self.cat_file(path, start=start, end=end, **kwargs) 

1765 

1766 def write_bytes(self, path, value, **kwargs): 

1767 """Alias of `AbstractFileSystem.pipe_file`.""" 

1768 self.pipe_file(path, value, **kwargs) 

1769 

1770 def makedir(self, path, create_parents=True, **kwargs): 

1771 """Alias of `AbstractFileSystem.mkdir`.""" 

1772 return self.mkdir(path, create_parents=create_parents, **kwargs) 

1773 

1774 def mkdirs(self, path, exist_ok=False): 

1775 """Alias of `AbstractFileSystem.makedirs`.""" 

1776 return self.makedirs(path, exist_ok=exist_ok) 

1777 

1778 def listdir(self, path, detail=True, **kwargs): 

1779 """Alias of `AbstractFileSystem.ls`.""" 

1780 return self.ls(path, detail=detail, **kwargs) 

1781 

1782 def cp(self, path1, path2, **kwargs): 

1783 """Alias of `AbstractFileSystem.copy`.""" 

1784 return self.copy(path1, path2, **kwargs) 

1785 

1786 def move(self, path1, path2, **kwargs): 

1787 """Alias of `AbstractFileSystem.mv`.""" 

1788 return self.mv(path1, path2, **kwargs) 

1789 

1790 def stat(self, path, **kwargs): 

1791 """Alias of `AbstractFileSystem.info`.""" 

1792 return self.info(path, **kwargs) 

1793 

1794 def disk_usage(self, path, total=True, maxdepth=None, **kwargs): 

1795 """Alias of `AbstractFileSystem.du`.""" 

1796 return self.du(path, total=total, maxdepth=maxdepth, **kwargs) 

1797 

1798 def rename(self, path1, path2, **kwargs): 

1799 """Alias of `AbstractFileSystem.mv`.""" 

1800 return self.mv(path1, path2, **kwargs) 

1801 

1802 def delete(self, path, recursive=False, maxdepth=None): 

1803 """Alias of `AbstractFileSystem.rm`.""" 

1804 return self.rm(path, recursive=recursive, maxdepth=maxdepth) 

1805 

1806 def upload(self, lpath, rpath, recursive=False, **kwargs): 

1807 """Alias of `AbstractFileSystem.put`.""" 

1808 return self.put(lpath, rpath, recursive=recursive, **kwargs) 

1809 

1810 def download(self, rpath, lpath, recursive=False, **kwargs): 

1811 """Alias of `AbstractFileSystem.get`.""" 

1812 return self.get(rpath, lpath, recursive=recursive, **kwargs) 

1813 

1814 def sign(self, path, expiration=100, **kwargs): 

1815 """Create a signed URL representing the given path 

1816 

1817 Some implementations allow temporary URLs to be generated, as a 

1818 way of delegating credentials. 

1819 

1820 Parameters 

1821 ---------- 

1822 path : str 

1823 The path on the filesystem 

1824 expiration : int 

1825 Number of seconds to enable the URL for (if supported) 

1826 

1827 Returns 

1828 ------- 

1829 URL : str 

1830 The signed URL 

1831 

1832 Raises 

1833 ------ 

1834 NotImplementedError : if method is not implemented for a filesystem 

1835 """ 

1836 raise NotImplementedError("Sign is not implemented for this filesystem") 

1837 

1838 def _isfilestore(self): 

1839 # Originally inherited from pyarrow DaskFileSystem. Keeping this 

1840 # here for backwards compatibility as long as pyarrow uses its 

1841 # legacy fsspec-compatible filesystems and thus accepts fsspec 

1842 # filesystems as well 

1843 return False 

1844 

1845 

1846class AbstractBufferedFile(io.IOBase): 

1847 """Convenient class to derive from to provide buffering 

1848 

1849 In the case that the backend does not provide a pythonic file-like object 

1850 already, this class contains much of the logic to build one. The only 

1851 methods that need to be overridden are ``_upload_chunk``, 

1852 ``_initiate_upload`` and ``_fetch_range``. 

1853 """ 

1854 

1855 DEFAULT_BLOCK_SIZE = 5 * 2**20 

1856 _details = None 

1857 

1858 def __init__( 

1859 self, 

1860 fs, 

1861 path, 

1862 mode="rb", 

1863 block_size="default", 

1864 autocommit=True, 

1865 cache_type="readahead", 

1866 cache_options=None, 

1867 size=None, 

1868 **kwargs, 

1869 ): 

1870 """ 

1871 Template for files with buffered reading and writing 

1872 

1873 Parameters 

1874 ---------- 

1875 fs: instance of FileSystem 

1876 path: str 

1877 location in file-system 

1878 mode: str 

1879 Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file 

1880 systems may be read-only, and some may not support append. 

1881 block_size: int 

1882 Buffer size for reading or writing, 'default' for class default 

1883 autocommit: bool 

1884 Whether to write to final destination; may only impact what 

1885 happens when file is being closed. 

1886 cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead" 

1887 Caching policy in read mode. See the definitions in ``core``. 

1888 cache_options : dict 

1889 Additional options passed to the constructor for the cache specified 

1890 by `cache_type`. 

1891 size: int 

1892 If given and in read mode, suppressed having to look up the file size 

1893 kwargs: 

1894 Gets stored as self.kwargs 

1895 """ 

1896 from .core import caches 

1897 

1898 self.path = path 

1899 self.fs = fs 

1900 self.mode = mode 

1901 self.blocksize = ( 

1902 self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size 

1903 ) 

1904 self.loc = 0 

1905 self.autocommit = autocommit 

1906 self.end = None 

1907 self.start = None 

1908 self.closed = False 

1909 

1910 if cache_options is None: 

1911 cache_options = {} 

1912 

1913 if "trim" in kwargs: 

1914 warnings.warn( 

1915 "Passing 'trim' to control the cache behavior has been deprecated. " 

1916 "Specify it within the 'cache_options' argument instead.", 

1917 FutureWarning, 

1918 ) 

1919 cache_options["trim"] = kwargs.pop("trim") 

1920 

1921 self.kwargs = kwargs 

1922 

1923 if mode not in {"ab", "rb", "wb", "xb"}: 

1924 raise NotImplementedError("File mode not supported") 

1925 if mode == "rb": 

1926 if size is not None: 

1927 self.size = size 

1928 else: 

1929 self.size = self.details["size"] 

1930 self.cache = caches[cache_type]( 

1931 self.blocksize, self._fetch_range, self.size, **cache_options 

1932 ) 

1933 else: 

1934 self.buffer = io.BytesIO() 

1935 self.offset = None 

1936 self.forced = False 

1937 self.location = None 

1938 

1939 @property 

1940 def details(self): 

1941 if self._details is None: 

1942 self._details = self.fs.info(self.path) 

1943 return self._details 

1944 

1945 @details.setter 

1946 def details(self, value): 

1947 self._details = value 

1948 self.size = value["size"] 

1949 

1950 @property 

1951 def full_name(self): 

1952 return _unstrip_protocol(self.path, self.fs) 

1953 

1954 @property 

1955 def closed(self): 

1956 # get around this attr being read-only in IOBase 

1957 # use getattr here, since this can be called during del 

1958 return getattr(self, "_closed", True) 

1959 

1960 @closed.setter 

1961 def closed(self, c): 

1962 self._closed = c 

1963 

1964 def __hash__(self): 

1965 if "w" in self.mode: 

1966 return id(self) 

1967 else: 

1968 return int(tokenize(self.details), 16) 

1969 

1970 def __eq__(self, other): 

1971 """Files are equal if they have the same checksum, only in read mode""" 

1972 if self is other: 

1973 return True 

1974 return ( 

1975 isinstance(other, type(self)) 

1976 and self.mode == "rb" 

1977 and other.mode == "rb" 

1978 and hash(self) == hash(other) 

1979 ) 

1980 

1981 def commit(self): 

1982 """Move from temp to final destination""" 

1983 

1984 def discard(self): 

1985 """Throw away temporary file""" 

1986 

1987 def info(self): 

1988 """File information about this path""" 

1989 if self.readable(): 

1990 return self.details 

1991 else: 

1992 raise ValueError("Info not available while writing") 

1993 

1994 def tell(self): 

1995 """Current file location""" 

1996 return self.loc 

1997 

1998 def seek(self, loc, whence=0): 

1999 """Set current file location 

2000 

2001 Parameters 

2002 ---------- 

2003 loc: int 

2004 byte location 

2005 whence: {0, 1, 2} 

2006 from start of file, current location or end of file, resp. 

2007 """ 

2008 loc = int(loc) 

2009 if not self.mode == "rb": 

2010 raise OSError(ESPIPE, "Seek only available in read mode") 

2011 if whence == 0: 

2012 nloc = loc 

2013 elif whence == 1: 

2014 nloc = self.loc + loc 

2015 elif whence == 2: 

2016 nloc = self.size + loc 

2017 else: 

2018 raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)") 

2019 if nloc < 0: 

2020 raise ValueError("Seek before start of file") 

2021 self.loc = nloc 

2022 return self.loc 

2023 

2024 def write(self, data): 

2025 """ 

2026 Write data to buffer. 

2027 

2028 Buffer only sent on flush() or if buffer is greater than 

2029 or equal to blocksize. 

2030 

2031 Parameters 

2032 ---------- 

2033 data: bytes 

2034 Set of bytes to be written. 

2035 """ 

2036 if not self.writable(): 

2037 raise ValueError("File not in write mode") 

2038 if self.closed: 

2039 raise ValueError("I/O operation on closed file.") 

2040 if self.forced: 

2041 raise ValueError("This file has been force-flushed, can only close") 

2042 out = self.buffer.write(data) 

2043 self.loc += out 

2044 if self.buffer.tell() >= self.blocksize: 

2045 self.flush() 

2046 return out 

2047 

2048 def flush(self, force=False): 

2049 """ 

2050 Write buffered data to backend store. 

2051 

2052 Writes the current buffer, if it is larger than the block-size, or if 

2053 the file is being closed. 

2054 

2055 Parameters 

2056 ---------- 

2057 force: bool 

2058 When closing, write the last block even if it is smaller than 

2059 blocks are allowed to be. Disallows further writing to this file. 

2060 """ 

2061 

2062 if self.closed: 

2063 raise ValueError("Flush on closed file") 

2064 if force and self.forced: 

2065 raise ValueError("Force flush cannot be called more than once") 

2066 if force: 

2067 self.forced = True 

2068 

2069 if self.readable(): 

2070 # no-op to flush on read-mode 

2071 return 

2072 

2073 if not force and self.buffer.tell() < self.blocksize: 

2074 # Defer write on small block 

2075 return 

2076 

2077 if self.offset is None: 

2078 # Initialize a multipart upload 

2079 self.offset = 0 

2080 try: 

2081 self._initiate_upload() 

2082 except: 

2083 self.closed = True 

2084 raise 

2085 

2086 if self._upload_chunk(final=force) is not False: 

2087 self.offset += self.buffer.seek(0, 2) 

2088 self.buffer = io.BytesIO() 

2089 

2090 def _upload_chunk(self, final=False): 

2091 """Write one part of a multi-block file upload 

2092 

2093 Parameters 

2094 ========== 

2095 final: bool 

2096 This is the last block, so should complete file, if 

2097 self.autocommit is True. 

2098 """ 

2099 # may not yet have been initialized, may need to call _initialize_upload 

2100 

2101 def _initiate_upload(self): 

2102 """Create remote file/upload""" 

2103 pass 

2104 

2105 def _fetch_range(self, start, end): 

2106 """Get the specified set of bytes from remote""" 

2107 return self.fs.cat_file(self.path, start=start, end=end) 

2108 

2109 def read(self, length=-1): 

2110 """ 

2111 Return data from cache, or fetch pieces as necessary 

2112 

2113 Parameters 

2114 ---------- 

2115 length: int (-1) 

2116 Number of bytes to read; if <0, all remaining bytes. 

2117 """ 

2118 length = -1 if length is None else int(length) 

2119 if self.mode != "rb": 

2120 raise ValueError("File not in read mode") 

2121 if length < 0: 

2122 length = self.size - self.loc 

2123 if self.closed: 

2124 raise ValueError("I/O operation on closed file.") 

2125 if length == 0: 

2126 # don't even bother calling fetch 

2127 return b"" 

2128 out = self.cache._fetch(self.loc, self.loc + length) 

2129 

2130 logger.debug( 

2131 "%s read: %i - %i %s", 

2132 self, 

2133 self.loc, 

2134 self.loc + length, 

2135 self.cache._log_stats(), 

2136 ) 

2137 self.loc += len(out) 

2138 return out 

2139 

2140 def readinto(self, b): 

2141 """mirrors builtin file's readinto method 

2142 

2143 https://docs.python.org/3/library/io.html#io.RawIOBase.readinto 

2144 """ 

2145 out = memoryview(b).cast("B") 

2146 data = self.read(out.nbytes) 

2147 out[: len(data)] = data 

2148 return len(data) 

2149 

2150 def readuntil(self, char=b"\n", blocks=None): 

2151 """Return data between current position and first occurrence of char 

2152 

2153 char is included in the output, except if the end of the tile is 

2154 encountered first. 

2155 

2156 Parameters 

2157 ---------- 

2158 char: bytes 

2159 Thing to find 

2160 blocks: None or int 

2161 How much to read in each go. Defaults to file blocksize - which may 

2162 mean a new read on every call. 

2163 """ 

2164 out = [] 

2165 while True: 

2166 start = self.tell() 

2167 part = self.read(blocks or self.blocksize) 

2168 if len(part) == 0: 

2169 break 

2170 found = part.find(char) 

2171 if found > -1: 

2172 out.append(part[: found + len(char)]) 

2173 self.seek(start + found + len(char)) 

2174 break 

2175 out.append(part) 

2176 return b"".join(out) 

2177 

2178 def readline(self): 

2179 """Read until and including the first occurrence of newline character 

2180 

2181 Note that, because of character encoding, this is not necessarily a 

2182 true line ending. 

2183 """ 

2184 return self.readuntil(b"\n") 

2185 

2186 def __next__(self): 

2187 out = self.readline() 

2188 if out: 

2189 return out 

2190 raise StopIteration 

2191 

2192 def __iter__(self): 

2193 return self 

2194 

2195 def readlines(self): 

2196 """Return all data, split by the newline character, including the newline character""" 

2197 data = self.read() 

2198 lines = data.split(b"\n") 

2199 out = [l + b"\n" for l in lines[:-1]] 

2200 if data.endswith(b"\n"): 

2201 return out 

2202 else: 

2203 return out + [lines[-1]] 

2204 # return list(self) ??? 

2205 

2206 def readinto1(self, b): 

2207 return self.readinto(b) 

2208 

2209 def close(self): 

2210 """Close file 

2211 

2212 Finalizes writes, discards cache 

2213 """ 

2214 if getattr(self, "_unclosable", False): 

2215 return 

2216 if self.closed: 

2217 return 

2218 try: 

2219 if self.mode == "rb": 

2220 self.cache = None 

2221 else: 

2222 if not self.forced: 

2223 self.flush(force=True) 

2224 

2225 if self.fs is not None: 

2226 self.fs.invalidate_cache(self.path) 

2227 self.fs.invalidate_cache(self.fs._parent(self.path)) 

2228 finally: 

2229 self.closed = True 

2230 

2231 def readable(self): 

2232 """Whether opened for reading""" 

2233 return "r" in self.mode and not self.closed 

2234 

2235 def seekable(self): 

2236 """Whether is seekable (only in read mode)""" 

2237 return self.readable() 

2238 

2239 def writable(self): 

2240 """Whether opened for writing""" 

2241 return self.mode in {"wb", "ab", "xb"} and not self.closed 

2242 

2243 def __reduce__(self): 

2244 if self.mode != "rb": 

2245 raise RuntimeError("Pickling a writeable file is not supported") 

2246 

2247 return reopen, ( 

2248 self.fs, 

2249 self.path, 

2250 self.mode, 

2251 self.blocksize, 

2252 self.loc, 

2253 self.size, 

2254 self.autocommit, 

2255 self.cache.name if self.cache else "none", 

2256 self.kwargs, 

2257 ) 

2258 

2259 def __del__(self): 

2260 if not self.closed: 

2261 self.close() 

2262 

2263 def __str__(self): 

2264 return f"<File-like object {type(self.fs).__name__}, {self.path}>" 

2265 

2266 __repr__ = __str__ 

2267 

2268 def __enter__(self): 

2269 return self 

2270 

2271 def __exit__(self, *args): 

2272 self.close() 

2273 

2274 

2275def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs): 

2276 file = fs.open( 

2277 path, 

2278 mode=mode, 

2279 block_size=blocksize, 

2280 autocommit=autocommit, 

2281 cache_type=cache_type, 

2282 size=size, 

2283 **kwargs, 

2284 ) 

2285 if loc > 0: 

2286 file.seek(loc) 

2287 return file