Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/fsspec/spec.py: 23%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

883 statements  

1from __future__ import annotations 

2 

3import io 

4import json 

5import logging 

6import os 

7import threading 

8import warnings 

9import weakref 

10from errno import ESPIPE 

11from glob import has_magic 

12from hashlib import sha256 

13from typing import Any, ClassVar 

14 

15from .callbacks import DEFAULT_CALLBACK 

16from .config import apply_config, conf 

17from .dircache import DirCache 

18from .transaction import Transaction 

19from .utils import ( 

20 _unstrip_protocol, 

21 glob_translate, 

22 isfilelike, 

23 other_paths, 

24 read_block, 

25 stringify_path, 

26 tokenize, 

27) 

28 

29logger = logging.getLogger("fsspec") 

30 

31 

32def make_instance(cls, args, kwargs): 

33 return cls(*args, **kwargs) 

34 

35 

36class _Cached(type): 

37 """ 

38 Metaclass for caching file system instances. 

39 

40 Notes 

41 ----- 

42 Instances are cached according to 

43 

44 * The values of the class attributes listed in `_extra_tokenize_attributes` 

45 * The arguments passed to ``__init__``. 

46 

47 This creates an additional reference to the filesystem, which prevents the 

48 filesystem from being garbage collected when all *user* references go away. 

49 A call to the :meth:`AbstractFileSystem.clear_instance_cache` must *also* 

50 be made for a filesystem instance to be garbage collected. 

51 """ 

52 

53 def __init__(cls, *args, **kwargs): 

54 super().__init__(*args, **kwargs) 

55 # Note: we intentionally create a reference here, to avoid garbage 

56 # collecting instances when all other references are gone. To really 

57 # delete a FileSystem, the cache must be cleared. 

58 if conf.get("weakref_instance_cache"): # pragma: no cover 

59 # debug option for analysing fork/spawn conditions 

60 cls._cache = weakref.WeakValueDictionary() 

61 else: 

62 cls._cache = {} 

63 cls._pid = os.getpid() 

64 

65 def __call__(cls, *args, **kwargs): 

66 kwargs = apply_config(cls, kwargs) 

67 extra_tokens = tuple( 

68 getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes 

69 ) 

70 token = tokenize( 

71 cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs 

72 ) 

73 skip = kwargs.pop("skip_instance_cache", False) 

74 if os.getpid() != cls._pid: 

75 cls._cache.clear() 

76 cls._pid = os.getpid() 

77 if not skip and cls.cachable and token in cls._cache: 

78 cls._latest = token 

79 return cls._cache[token] 

80 else: 

81 obj = super().__call__(*args, **kwargs) 

82 # Setting _fs_token here causes some static linters to complain. 

83 obj._fs_token_ = token 

84 obj.storage_args = args 

85 obj.storage_options = kwargs 

86 if obj.async_impl and obj.mirror_sync_methods: 

87 from .asyn import mirror_sync_methods 

88 

89 mirror_sync_methods(obj) 

90 

91 if cls.cachable and not skip: 

92 cls._latest = token 

93 cls._cache[token] = obj 

94 return obj 

95 

96 

97class AbstractFileSystem(metaclass=_Cached): 

98 """ 

99 An abstract super-class for pythonic file-systems 

100 

101 Implementations are expected to be compatible with or, better, subclass 

102 from here. 

103 """ 

104 

105 cachable = True # this class can be cached, instances reused 

106 _cached = False 

107 blocksize = 2**22 

108 sep = "/" 

109 protocol: ClassVar[str | tuple[str, ...]] = "abstract" 

110 _latest = None 

111 async_impl = False 

112 mirror_sync_methods = False 

113 root_marker = "" # For some FSs, may require leading '/' or other character 

114 transaction_type = Transaction 

115 

116 #: Extra *class attributes* that should be considered when hashing. 

117 _extra_tokenize_attributes = () 

118 

119 # Set by _Cached metaclass 

120 storage_args: tuple[Any, ...] 

121 storage_options: dict[str, Any] 

122 

123 def __init__(self, *args, **storage_options): 

124 """Create and configure file-system instance 

125 

126 Instances may be cachable, so if similar enough arguments are seen 

127 a new instance is not required. The token attribute exists to allow 

128 implementations to cache instances if they wish. 

129 

130 A reasonable default should be provided if there are no arguments. 

131 

132 Subclasses should call this method. 

133 

134 Parameters 

135 ---------- 

136 use_listings_cache, listings_expiry_time, max_paths: 

137 passed to ``DirCache``, if the implementation supports 

138 directory listing caching. Pass use_listings_cache=False 

139 to disable such caching. 

140 skip_instance_cache: bool 

141 If this is a cachable implementation, pass True here to force 

142 creating a new instance even if a matching instance exists, and prevent 

143 storing this instance. 

144 asynchronous: bool 

145 loop: asyncio-compatible IOLoop or None 

146 """ 

147 if self._cached: 

148 # reusing instance, don't change 

149 return 

150 self._cached = True 

151 self._intrans = False 

152 self._transaction = None 

153 self._invalidated_caches_in_transaction = [] 

154 self.dircache = DirCache(**storage_options) 

155 

156 if storage_options.pop("add_docs", None): 

157 warnings.warn("add_docs is no longer supported.", FutureWarning) 

158 

159 if storage_options.pop("add_aliases", None): 

160 warnings.warn("add_aliases has been removed.", FutureWarning) 

161 # This is set in _Cached 

162 self._fs_token_ = None 

163 

164 @property 

165 def fsid(self): 

166 """Persistent filesystem id that can be used to compare filesystems 

167 across sessions. 

168 """ 

169 raise NotImplementedError 

170 

171 @property 

172 def _fs_token(self): 

173 return self._fs_token_ 

174 

175 def __dask_tokenize__(self): 

176 return self._fs_token 

177 

178 def __hash__(self): 

179 return int(self._fs_token, 16) 

180 

181 def __eq__(self, other): 

182 return isinstance(other, type(self)) and self._fs_token == other._fs_token 

183 

184 def __reduce__(self): 

185 return make_instance, (type(self), self.storage_args, self.storage_options) 

186 

187 @classmethod 

188 def _strip_protocol(cls, path): 

189 """Turn path from fully-qualified to file-system-specific 

190 

191 May require FS-specific handling, e.g., for relative paths or links. 

192 """ 

193 if isinstance(path, list): 

194 return [cls._strip_protocol(p) for p in path] 

195 path = stringify_path(path) 

196 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol 

197 for protocol in protos: 

198 if path.startswith(protocol + "://"): 

199 path = path[len(protocol) + 3 :] 

200 elif path.startswith(protocol + "::"): 

201 path = path[len(protocol) + 2 :] 

202 path = path.rstrip("/") 

203 # use of root_marker to make minimum required path, e.g., "/" 

204 return path or cls.root_marker 

205 

206 def unstrip_protocol(self, name: str) -> str: 

207 """Format FS-specific path to generic, including protocol""" 

208 protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol 

209 for protocol in protos: 

210 if name.startswith(f"{protocol}://"): 

211 return name 

212 return f"{protos[0]}://{name}" 

213 

214 @staticmethod 

215 def _get_kwargs_from_urls(path): 

216 """If kwargs can be encoded in the paths, extract them here 

217 

218 This should happen before instantiation of the class; incoming paths 

219 then should be amended to strip the options in methods. 

220 

221 Examples may look like an sftp path "sftp://user@host:/my/path", where 

222 the user and host should become kwargs and later get stripped. 

223 """ 

224 # by default, nothing happens 

225 return {} 

226 

227 @classmethod 

228 def current(cls): 

229 """Return the most recently instantiated FileSystem 

230 

231 If no instance has been created, then create one with defaults 

232 """ 

233 if cls._latest in cls._cache: 

234 return cls._cache[cls._latest] 

235 return cls() 

236 

237 @property 

238 def transaction(self): 

239 """A context within which files are committed together upon exit 

240 

241 Requires the file class to implement `.commit()` and `.discard()` 

242 for the normal and exception cases. 

243 """ 

244 if self._transaction is None: 

245 self._transaction = self.transaction_type(self) 

246 return self._transaction 

247 

248 def start_transaction(self): 

249 """Begin write transaction for deferring files, non-context version""" 

250 self._intrans = True 

251 self._transaction = self.transaction_type(self) 

252 return self.transaction 

253 

254 def end_transaction(self): 

255 """Finish write transaction, non-context version""" 

256 self.transaction.complete() 

257 self._transaction = None 

258 # The invalid cache must be cleared after the transaction is completed. 

259 for path in self._invalidated_caches_in_transaction: 

260 self.invalidate_cache(path) 

261 self._invalidated_caches_in_transaction.clear() 

262 

263 def invalidate_cache(self, path=None): 

264 """ 

265 Discard any cached directory information 

266 

267 Parameters 

268 ---------- 

269 path: string or None 

270 If None, clear all listings cached else listings at or under given 

271 path. 

272 """ 

273 # Not necessary to implement invalidation mechanism, may have no cache. 

274 # But if have, you should call this method of parent class from your 

275 # subclass to ensure expiring caches after transacations correctly. 

276 # See the implementation of FTPFileSystem in ftp.py 

277 if self._intrans: 

278 self._invalidated_caches_in_transaction.append(path) 

279 

280 def mkdir(self, path, create_parents=True, **kwargs): 

281 """ 

282 Create directory entry at path 

283 

284 For systems that don't have true directories, may create an for 

285 this instance only and not touch the real filesystem 

286 

287 Parameters 

288 ---------- 

289 path: str 

290 location 

291 create_parents: bool 

292 if True, this is equivalent to ``makedirs`` 

293 kwargs: 

294 may be permissions, etc. 

295 """ 

296 pass # not necessary to implement, may not have directories 

297 

298 def makedirs(self, path, exist_ok=False): 

299 """Recursively make directories 

300 

301 Creates directory at path and any intervening required directories. 

302 Raises exception if, for instance, the path already exists but is a 

303 file. 

304 

305 Parameters 

306 ---------- 

307 path: str 

308 leaf directory name 

309 exist_ok: bool (False) 

310 If False, will error if the target already exists 

311 """ 

312 pass # not necessary to implement, may not have directories 

313 

314 def rmdir(self, path): 

315 """Remove a directory, if empty""" 

316 pass # not necessary to implement, may not have directories 

317 

318 def ls(self, path, detail=True, **kwargs): 

319 """List objects at path. 

320 

321 This should include subdirectories and files at that location. The 

322 difference between a file and a directory must be clear when details 

323 are requested. 

324 

325 The specific keys, or perhaps a FileInfo class, or similar, is TBD, 

326 but must be consistent across implementations. 

327 Must include: 

328 

329 - full path to the entry (without protocol) 

330 - size of the entry, in bytes. If the value cannot be determined, will 

331 be ``None``. 

332 - type of entry, "file", "directory" or other 

333 

334 Additional information 

335 may be present, appropriate to the file-system, e.g., generation, 

336 checksum, etc. 

337 

338 May use refresh=True|False to allow use of self._ls_from_cache to 

339 check for a saved listing and avoid calling the backend. This would be 

340 common where listing may be expensive. 

341 

342 Parameters 

343 ---------- 

344 path: str 

345 detail: bool 

346 if True, gives a list of dictionaries, where each is the same as 

347 the result of ``info(path)``. If False, gives a list of paths 

348 (str). 

349 kwargs: may have additional backend-specific options, such as version 

350 information 

351 

352 Returns 

353 ------- 

354 List of strings if detail is False, or list of directory information 

355 dicts if detail is True. 

356 """ 

357 raise NotImplementedError 

358 

359 def _ls_from_cache(self, path): 

360 """Check cache for listing 

361 

362 Returns listing, if found (may be empty list for a directly that exists 

363 but contains nothing), None if not in cache. 

364 """ 

365 parent = self._parent(path) 

366 try: 

367 return self.dircache[path.rstrip("/")] 

368 except KeyError: 

369 pass 

370 try: 

371 files = [ 

372 f 

373 for f in self.dircache[parent] 

374 if f["name"] == path 

375 or (f["name"] == path.rstrip("/") and f["type"] == "directory") 

376 ] 

377 if len(files) == 0: 

378 # parent dir was listed but did not contain this file 

379 raise FileNotFoundError(path) 

380 return files 

381 except KeyError: 

382 pass 

383 

384 def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs): 

385 """Return all files under the given path. 

386 

387 List all files, recursing into subdirectories; output is iterator-style, 

388 like ``os.walk()``. For a simple list of files, ``find()`` is available. 

389 

390 When topdown is True, the caller can modify the dirnames list in-place (perhaps 

391 using del or slice assignment), and walk() will 

392 only recurse into the subdirectories whose names remain in dirnames; 

393 this can be used to prune the search, impose a specific order of visiting, 

394 or even to inform walk() about directories the caller creates or renames before 

395 it resumes walk() again. 

396 Modifying dirnames when topdown is False has no effect. (see os.walk) 

397 

398 Note that the "files" outputted will include anything that is not 

399 a directory, such as links. 

400 

401 Parameters 

402 ---------- 

403 path: str 

404 Root to recurse into 

405 maxdepth: int 

406 Maximum recursion depth. None means limitless, but not recommended 

407 on link-based file-systems. 

408 topdown: bool (True) 

409 Whether to walk the directory tree from the top downwards or from 

410 the bottom upwards. 

411 on_error: "omit", "raise", a callable 

412 if omit (default), path with exception will simply be empty; 

413 If raise, an underlying exception will be raised; 

414 if callable, it will be called with a single OSError instance as argument 

415 kwargs: passed to ``ls`` 

416 """ 

417 if maxdepth is not None and maxdepth < 1: 

418 raise ValueError("maxdepth must be at least 1") 

419 

420 path = self._strip_protocol(path) 

421 full_dirs = {} 

422 dirs = {} 

423 files = {} 

424 

425 detail = kwargs.pop("detail", False) 

426 try: 

427 listing = self.ls(path, detail=True, **kwargs) 

428 except (FileNotFoundError, OSError) as e: 

429 if on_error == "raise": 

430 raise 

431 if callable(on_error): 

432 on_error(e) 

433 return 

434 

435 for info in listing: 

436 # each info name must be at least [path]/part , but here 

437 # we check also for names like [path]/part/ 

438 pathname = info["name"].rstrip("/") 

439 name = pathname.rsplit("/", 1)[-1] 

440 if info["type"] == "directory" and pathname != path: 

441 # do not include "self" path 

442 full_dirs[name] = pathname 

443 dirs[name] = info 

444 elif pathname == path: 

445 # file-like with same name as give path 

446 files[""] = info 

447 else: 

448 files[name] = info 

449 

450 if not detail: 

451 dirs = list(dirs) 

452 files = list(files) 

453 

454 if topdown: 

455 # Yield before recursion if walking top down 

456 yield path, dirs, files 

457 

458 if maxdepth is not None: 

459 maxdepth -= 1 

460 if maxdepth < 1: 

461 if not topdown: 

462 yield path, dirs, files 

463 return 

464 

465 for d in dirs: 

466 yield from self.walk( 

467 full_dirs[d], 

468 maxdepth=maxdepth, 

469 detail=detail, 

470 topdown=topdown, 

471 **kwargs, 

472 ) 

473 

474 if not topdown: 

475 # Yield after recursion if walking bottom up 

476 yield path, dirs, files 

477 

478 def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): 

479 """List all files below path. 

480 

481 Like posix ``find`` command without conditions 

482 

483 Parameters 

484 ---------- 

485 path : str 

486 maxdepth: int or None 

487 If not None, the maximum number of levels to descend 

488 withdirs: bool 

489 Whether to include directory paths in the output. This is True 

490 when used by glob, but users usually only want files. 

491 kwargs are passed to ``ls``. 

492 """ 

493 # TODO: allow equivalent of -name parameter 

494 path = self._strip_protocol(path) 

495 out = {} 

496 

497 # Add the root directory if withdirs is requested 

498 # This is needed for posix glob compliance 

499 if withdirs and path != "" and self.isdir(path): 

500 out[path] = self.info(path) 

501 

502 for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): 

503 if withdirs: 

504 files.update(dirs) 

505 out.update({info["name"]: info for name, info in files.items()}) 

506 if not out and self.isfile(path): 

507 # walk works on directories, but find should also return [path] 

508 # when path happens to be a file 

509 out[path] = {} 

510 names = sorted(out) 

511 if not detail: 

512 return names 

513 else: 

514 return {name: out[name] for name in names} 

515 

516 def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): 

517 """Space used by files and optionally directories within a path 

518 

519 Directory size does not include the size of its contents. 

520 

521 Parameters 

522 ---------- 

523 path: str 

524 total: bool 

525 Whether to sum all the file sizes 

526 maxdepth: int or None 

527 Maximum number of directory levels to descend, None for unlimited. 

528 withdirs: bool 

529 Whether to include directory paths in the output. 

530 kwargs: passed to ``find`` 

531 

532 Returns 

533 ------- 

534 Dict of {path: size} if total=False, or int otherwise, where numbers 

535 refer to bytes used. 

536 """ 

537 sizes = {} 

538 if withdirs and self.isdir(path): 

539 # Include top-level directory in output 

540 info = self.info(path) 

541 sizes[info["name"]] = info["size"] 

542 for f in self.find(path, maxdepth=maxdepth, withdirs=withdirs, **kwargs): 

543 info = self.info(f) 

544 sizes[info["name"]] = info["size"] 

545 if total: 

546 return sum(sizes.values()) 

547 else: 

548 return sizes 

549 

550 def glob(self, path, maxdepth=None, **kwargs): 

551 """Find files by glob-matching. 

552 

553 Pattern matching capabilities for finding files that match the given pattern. 

554 

555 Parameters 

556 ---------- 

557 path: str 

558 The glob pattern to match against 

559 maxdepth: int or None 

560 Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found. 

561 Must be at least 1 if provided. 

562 kwargs: 

563 Additional arguments passed to ``find`` (e.g., detail=True) 

564 

565 Returns 

566 ------- 

567 List of matched paths, or dict of paths and their info if detail=True 

568 

569 Notes 

570 ----- 

571 Supported patterns: 

572 - '*': Matches any sequence of characters within a single directory level 

573 - ``'**'``: Matches any number of directory levels (must be an entire path component) 

574 - '?': Matches exactly one character 

575 - '[abc]': Matches any character in the set 

576 - '[a-z]': Matches any character in the range 

577 - '[!abc]': Matches any character NOT in the set 

578 

579 Special behaviors: 

580 - If the path ends with '/', only folders are returned 

581 - Consecutive '*' characters are compressed into a single '*' 

582 - Empty brackets '[]' never match anything 

583 - Negated empty brackets '[!]' match any single character 

584 - Special characters in character classes are escaped properly 

585 

586 Limitations: 

587 - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``) 

588 - No brace expansion ('{a,b}.txt') 

589 - No extended glob patterns ('+(pattern)', '!(pattern)') 

590 """ 

591 if maxdepth is not None and maxdepth < 1: 

592 raise ValueError("maxdepth must be at least 1") 

593 

594 import re 

595 

596 seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,) 

597 ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash 

598 path = self._strip_protocol(path) 

599 append_slash_to_dirname = ends_with_sep or path.endswith( 

600 tuple(sep + "**" for sep in seps) 

601 ) 

602 idx_star = path.find("*") if path.find("*") >= 0 else len(path) 

603 idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) 

604 idx_brace = path.find("[") if path.find("[") >= 0 else len(path) 

605 

606 min_idx = min(idx_star, idx_qmark, idx_brace) 

607 

608 detail = kwargs.pop("detail", False) 

609 

610 if not has_magic(path): 

611 if self.exists(path, **kwargs): 

612 if not detail: 

613 return [path] 

614 else: 

615 return {path: self.info(path, **kwargs)} 

616 else: 

617 if not detail: 

618 return [] # glob of non-existent returns empty 

619 else: 

620 return {} 

621 elif "/" in path[:min_idx]: 

622 min_idx = path[:min_idx].rindex("/") 

623 root = path[: min_idx + 1] 

624 depth = path[min_idx + 1 :].count("/") + 1 

625 else: 

626 root = "" 

627 depth = path[min_idx + 1 :].count("/") + 1 

628 

629 if "**" in path: 

630 if maxdepth is not None: 

631 idx_double_stars = path.find("**") 

632 depth_double_stars = path[idx_double_stars:].count("/") + 1 

633 depth = depth - depth_double_stars + maxdepth 

634 else: 

635 depth = None 

636 

637 allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs) 

638 

639 pattern = glob_translate(path + ("/" if ends_with_sep else "")) 

640 pattern = re.compile(pattern) 

641 

642 out = { 

643 p: info 

644 for p, info in sorted(allpaths.items()) 

645 if pattern.match( 

646 p + "/" 

647 if append_slash_to_dirname and info["type"] == "directory" 

648 else p 

649 ) 

650 } 

651 

652 if detail: 

653 return out 

654 else: 

655 return list(out) 

656 

657 def exists(self, path, **kwargs): 

658 """Is there a file at the given path""" 

659 try: 

660 self.info(path, **kwargs) 

661 return True 

662 except: # noqa: E722 

663 # any exception allowed bar FileNotFoundError? 

664 return False 

665 

666 def lexists(self, path, **kwargs): 

667 """If there is a file at the given path (including 

668 broken links)""" 

669 return self.exists(path) 

670 

671 def info(self, path, **kwargs): 

672 """Give details of entry at path 

673 

674 Returns a single dictionary, with exactly the same information as ``ls`` 

675 would with ``detail=True``. 

676 

677 The default implementation calls ls and could be overridden by a 

678 shortcut. kwargs are passed on to ```ls()``. 

679 

680 Some file systems might not be able to measure the file's size, in 

681 which case, the returned dict will include ``'size': None``. 

682 

683 Returns 

684 ------- 

685 dict with keys: name (full path in the FS), size (in bytes), type (file, 

686 directory, or something else) and other FS-specific keys. 

687 """ 

688 path = self._strip_protocol(path) 

689 out = self.ls(self._parent(path), detail=True, **kwargs) 

690 out = [o for o in out if o["name"].rstrip("/") == path] 

691 if out: 

692 return out[0] 

693 out = self.ls(path, detail=True, **kwargs) 

694 path = path.rstrip("/") 

695 out1 = [o for o in out if o["name"].rstrip("/") == path] 

696 if len(out1) == 1: 

697 if "size" not in out1[0]: 

698 out1[0]["size"] = None 

699 return out1[0] 

700 elif len(out1) > 1 or out: 

701 return {"name": path, "size": 0, "type": "directory"} 

702 else: 

703 raise FileNotFoundError(path) 

704 

705 def checksum(self, path): 

706 """Unique value for current version of file 

707 

708 If the checksum is the same from one moment to another, the contents 

709 are guaranteed to be the same. If the checksum changes, the contents 

710 *might* have changed. 

711 

712 This should normally be overridden; default will probably capture 

713 creation/modification timestamp (which would be good) or maybe 

714 access timestamp (which would be bad) 

715 """ 

716 return int(tokenize(self.info(path)), 16) 

717 

718 def size(self, path): 

719 """Size in bytes of file""" 

720 return self.info(path).get("size", None) 

721 

722 def sizes(self, paths): 

723 """Size in bytes of each file in a list of paths""" 

724 return [self.size(p) for p in paths] 

725 

726 def isdir(self, path): 

727 """Is this entry directory-like?""" 

728 try: 

729 return self.info(path)["type"] == "directory" 

730 except OSError: 

731 return False 

732 

733 def isfile(self, path): 

734 """Is this entry file-like?""" 

735 try: 

736 return self.info(path)["type"] == "file" 

737 except: # noqa: E722 

738 return False 

739 

740 def read_text(self, path, encoding=None, errors=None, newline=None, **kwargs): 

741 """Get the contents of the file as a string. 

742 

743 Parameters 

744 ---------- 

745 path: str 

746 URL of file on this filesystems 

747 encoding, errors, newline: same as `open`. 

748 """ 

749 with self.open( 

750 path, 

751 mode="r", 

752 encoding=encoding, 

753 errors=errors, 

754 newline=newline, 

755 **kwargs, 

756 ) as f: 

757 return f.read() 

758 

759 def write_text( 

760 self, path, value, encoding=None, errors=None, newline=None, **kwargs 

761 ): 

762 """Write the text to the given file. 

763 

764 An existing file will be overwritten. 

765 

766 Parameters 

767 ---------- 

768 path: str 

769 URL of file on this filesystems 

770 value: str 

771 Text to write. 

772 encoding, errors, newline: same as `open`. 

773 """ 

774 with self.open( 

775 path, 

776 mode="w", 

777 encoding=encoding, 

778 errors=errors, 

779 newline=newline, 

780 **kwargs, 

781 ) as f: 

782 return f.write(value) 

783 

784 def cat_file(self, path, start=None, end=None, **kwargs): 

785 """Get the content of a file 

786 

787 Parameters 

788 ---------- 

789 path: URL of file on this filesystems 

790 start, end: int 

791 Bytes limits of the read. If negative, backwards from end, 

792 like usual python slices. Either can be None for start or 

793 end of file, respectively 

794 kwargs: passed to ``open()``. 

795 """ 

796 # explicitly set buffering off? 

797 with self.open(path, "rb", **kwargs) as f: 

798 if start is not None: 

799 if start >= 0: 

800 f.seek(start) 

801 else: 

802 f.seek(max(0, f.size + start)) 

803 if end is not None: 

804 if end < 0: 

805 end = f.size + end 

806 return f.read(end - f.tell()) 

807 return f.read() 

808 

809 def pipe_file(self, path, value, mode="overwrite", **kwargs): 

810 """Set the bytes of given file""" 

811 if mode == "create" and self.exists(path): 

812 # non-atomic but simple way; or could use "xb" in open(), which is likely 

813 # not as well supported 

814 raise FileExistsError 

815 with self.open(path, "wb", **kwargs) as f: 

816 f.write(value) 

817 

818 def pipe(self, path, value=None, **kwargs): 

819 """Put value into path 

820 

821 (counterpart to ``cat``) 

822 

823 Parameters 

824 ---------- 

825 path: string or dict(str, bytes) 

826 If a string, a single remote location to put ``value`` bytes; if a dict, 

827 a mapping of {path: bytesvalue}. 

828 value: bytes, optional 

829 If using a single path, these are the bytes to put there. Ignored if 

830 ``path`` is a dict 

831 """ 

832 if isinstance(path, str): 

833 self.pipe_file(self._strip_protocol(path), value, **kwargs) 

834 elif isinstance(path, dict): 

835 for k, v in path.items(): 

836 self.pipe_file(self._strip_protocol(k), v, **kwargs) 

837 else: 

838 raise ValueError("path must be str or dict") 

839 

840 def cat_ranges( 

841 self, paths, starts, ends, max_gap=None, on_error="return", **kwargs 

842 ): 

843 """Get the contents of byte ranges from one or more files 

844 

845 Parameters 

846 ---------- 

847 paths: list 

848 A list of of filepaths on this filesystems 

849 starts, ends: int or list 

850 Bytes limits of the read. If using a single int, the same value will be 

851 used to read all the specified files. 

852 """ 

853 if max_gap is not None: 

854 raise NotImplementedError 

855 if not isinstance(paths, list): 

856 raise TypeError 

857 if not isinstance(starts, list): 

858 starts = [starts] * len(paths) 

859 if not isinstance(ends, list): 

860 ends = [ends] * len(paths) 

861 if len(starts) != len(paths) or len(ends) != len(paths): 

862 raise ValueError 

863 out = [] 

864 for p, s, e in zip(paths, starts, ends): 

865 try: 

866 out.append(self.cat_file(p, s, e)) 

867 except Exception as e: 

868 if on_error == "return": 

869 out.append(e) 

870 else: 

871 raise 

872 return out 

873 

874 def cat(self, path, recursive=False, on_error="raise", **kwargs): 

875 """Fetch (potentially multiple) paths' contents 

876 

877 Parameters 

878 ---------- 

879 recursive: bool 

880 If True, assume the path(s) are directories, and get all the 

881 contained files 

882 on_error : "raise", "omit", "return" 

883 If raise, an underlying exception will be raised (converted to KeyError 

884 if the type is in self.missing_exceptions); if omit, keys with exception 

885 will simply not be included in the output; if "return", all keys are 

886 included in the output, but the value will be bytes or an exception 

887 instance. 

888 kwargs: passed to cat_file 

889 

890 Returns 

891 ------- 

892 dict of {path: contents} if there are multiple paths 

893 or the path has been otherwise expanded 

894 """ 

895 paths = self.expand_path(path, recursive=recursive, **kwargs) 

896 if ( 

897 len(paths) > 1 

898 or isinstance(path, list) 

899 or paths[0] != self._strip_protocol(path) 

900 ): 

901 out = {} 

902 for path in paths: 

903 try: 

904 out[path] = self.cat_file(path, **kwargs) 

905 except Exception as e: 

906 if on_error == "raise": 

907 raise 

908 if on_error == "return": 

909 out[path] = e 

910 return out 

911 else: 

912 return self.cat_file(paths[0], **kwargs) 

913 

914 def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs): 

915 """Copy single remote file to local""" 

916 from .implementations.local import LocalFileSystem 

917 

918 if isfilelike(lpath): 

919 outfile = lpath 

920 elif self.isdir(rpath): 

921 os.makedirs(lpath, exist_ok=True) 

922 return None 

923 

924 fs = LocalFileSystem(auto_mkdir=True) 

925 fs.makedirs(fs._parent(lpath), exist_ok=True) 

926 

927 with self.open(rpath, "rb", **kwargs) as f1: 

928 if outfile is None: 

929 outfile = open(lpath, "wb") 

930 

931 try: 

932 callback.set_size(getattr(f1, "size", None)) 

933 data = True 

934 while data: 

935 data = f1.read(self.blocksize) 

936 segment_len = outfile.write(data) 

937 if segment_len is None: 

938 segment_len = len(data) 

939 callback.relative_update(segment_len) 

940 finally: 

941 if not isfilelike(lpath): 

942 outfile.close() 

943 

944 def get( 

945 self, 

946 rpath, 

947 lpath, 

948 recursive=False, 

949 callback=DEFAULT_CALLBACK, 

950 maxdepth=None, 

951 **kwargs, 

952 ): 

953 """Copy file(s) to local. 

954 

955 Copies a specific file or tree of files (if recursive=True). If lpath 

956 ends with a "/", it will be assumed to be a directory, and target files 

957 will go within. Can submit a list of paths, which may be glob-patterns 

958 and will be expanded. 

959 

960 Calls get_file for each source. 

961 """ 

962 if isinstance(lpath, list) and isinstance(rpath, list): 

963 # No need to expand paths when both source and destination 

964 # are provided as lists 

965 rpaths = rpath 

966 lpaths = lpath 

967 else: 

968 from .implementations.local import ( 

969 LocalFileSystem, 

970 make_path_posix, 

971 trailing_sep, 

972 ) 

973 

974 source_is_str = isinstance(rpath, str) 

975 rpaths = self.expand_path( 

976 rpath, recursive=recursive, maxdepth=maxdepth, **kwargs 

977 ) 

978 if source_is_str and (not recursive or maxdepth is not None): 

979 # Non-recursive glob does not copy directories 

980 rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))] 

981 if not rpaths: 

982 return 

983 

984 if isinstance(lpath, str): 

985 lpath = make_path_posix(lpath) 

986 

987 source_is_file = len(rpaths) == 1 

988 dest_is_dir = isinstance(lpath, str) and ( 

989 trailing_sep(lpath) or LocalFileSystem().isdir(lpath) 

990 ) 

991 

992 exists = source_is_str and ( 

993 (has_magic(rpath) and source_is_file) 

994 or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) 

995 ) 

996 lpaths = other_paths( 

997 rpaths, 

998 lpath, 

999 exists=exists, 

1000 flatten=not source_is_str, 

1001 ) 

1002 

1003 callback.set_size(len(lpaths)) 

1004 for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): 

1005 with callback.branched(rpath, lpath) as child: 

1006 self.get_file(rpath, lpath, callback=child, **kwargs) 

1007 

1008 def put_file( 

1009 self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs 

1010 ): 

1011 """Copy single file to remote""" 

1012 if mode == "create" and self.exists(rpath): 

1013 raise FileExistsError 

1014 if os.path.isdir(lpath): 

1015 self.makedirs(rpath, exist_ok=True) 

1016 return None 

1017 

1018 with open(lpath, "rb") as f1: 

1019 size = f1.seek(0, 2) 

1020 callback.set_size(size) 

1021 f1.seek(0) 

1022 

1023 self.mkdirs(self._parent(os.fspath(rpath)), exist_ok=True) 

1024 with self.open(rpath, "wb", **kwargs) as f2: 

1025 while f1.tell() < size: 

1026 data = f1.read(self.blocksize) 

1027 segment_len = f2.write(data) 

1028 if segment_len is None: 

1029 segment_len = len(data) 

1030 callback.relative_update(segment_len) 

1031 

1032 def put( 

1033 self, 

1034 lpath, 

1035 rpath, 

1036 recursive=False, 

1037 callback=DEFAULT_CALLBACK, 

1038 maxdepth=None, 

1039 **kwargs, 

1040 ): 

1041 """Copy file(s) from local. 

1042 

1043 Copies a specific file or tree of files (if recursive=True). If rpath 

1044 ends with a "/", it will be assumed to be a directory, and target files 

1045 will go within. 

1046 

1047 Calls put_file for each source. 

1048 """ 

1049 if isinstance(lpath, list) and isinstance(rpath, list): 

1050 # No need to expand paths when both source and destination 

1051 # are provided as lists 

1052 rpaths = rpath 

1053 lpaths = lpath 

1054 else: 

1055 from .implementations.local import ( 

1056 LocalFileSystem, 

1057 make_path_posix, 

1058 trailing_sep, 

1059 ) 

1060 

1061 source_is_str = isinstance(lpath, str) 

1062 if source_is_str: 

1063 lpath = make_path_posix(lpath) 

1064 fs = LocalFileSystem() 

1065 lpaths = fs.expand_path( 

1066 lpath, recursive=recursive, maxdepth=maxdepth, **kwargs 

1067 ) 

1068 if source_is_str and (not recursive or maxdepth is not None): 

1069 # Non-recursive glob does not copy directories 

1070 lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] 

1071 if not lpaths: 

1072 return 

1073 

1074 source_is_file = len(lpaths) == 1 

1075 dest_is_dir = isinstance(rpath, str) and ( 

1076 trailing_sep(rpath) or self.isdir(rpath) 

1077 ) 

1078 

1079 rpath = ( 

1080 self._strip_protocol(rpath) 

1081 if isinstance(rpath, str) 

1082 else [self._strip_protocol(p) for p in rpath] 

1083 ) 

1084 exists = source_is_str and ( 

1085 (has_magic(lpath) and source_is_file) 

1086 or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) 

1087 ) 

1088 rpaths = other_paths( 

1089 lpaths, 

1090 rpath, 

1091 exists=exists, 

1092 flatten=not source_is_str, 

1093 ) 

1094 

1095 callback.set_size(len(rpaths)) 

1096 for lpath, rpath in callback.wrap(zip(lpaths, rpaths)): 

1097 with callback.branched(lpath, rpath) as child: 

1098 self.put_file(lpath, rpath, callback=child, **kwargs) 

1099 

1100 def head(self, path, size=1024): 

1101 """Get the first ``size`` bytes from file""" 

1102 with self.open(path, "rb") as f: 

1103 return f.read(size) 

1104 

1105 def tail(self, path, size=1024): 

1106 """Get the last ``size`` bytes from file""" 

1107 with self.open(path, "rb") as f: 

1108 f.seek(max(-size, -f.size), 2) 

1109 return f.read() 

1110 

1111 def cp_file(self, path1, path2, **kwargs): 

1112 raise NotImplementedError 

1113 

1114 def copy( 

1115 self, path1, path2, recursive=False, maxdepth=None, on_error=None, **kwargs 

1116 ): 

1117 """Copy within two locations in the filesystem 

1118 

1119 on_error : "raise", "ignore" 

1120 If raise, any not-found exceptions will be raised; if ignore any 

1121 not-found exceptions will cause the path to be skipped; defaults to 

1122 raise unless recursive is true, where the default is ignore 

1123 """ 

1124 if on_error is None and recursive: 

1125 on_error = "ignore" 

1126 elif on_error is None: 

1127 on_error = "raise" 

1128 

1129 if isinstance(path1, list) and isinstance(path2, list): 

1130 # No need to expand paths when both source and destination 

1131 # are provided as lists 

1132 paths1 = path1 

1133 paths2 = path2 

1134 else: 

1135 from .implementations.local import trailing_sep 

1136 

1137 source_is_str = isinstance(path1, str) 

1138 paths1 = self.expand_path( 

1139 path1, recursive=recursive, maxdepth=maxdepth, **kwargs 

1140 ) 

1141 if source_is_str and (not recursive or maxdepth is not None): 

1142 # Non-recursive glob does not copy directories 

1143 paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))] 

1144 if not paths1: 

1145 return 

1146 

1147 source_is_file = len(paths1) == 1 

1148 dest_is_dir = isinstance(path2, str) and ( 

1149 trailing_sep(path2) or self.isdir(path2) 

1150 ) 

1151 

1152 exists = source_is_str and ( 

1153 (has_magic(path1) and source_is_file) 

1154 or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) 

1155 ) 

1156 paths2 = other_paths( 

1157 paths1, 

1158 path2, 

1159 exists=exists, 

1160 flatten=not source_is_str, 

1161 ) 

1162 

1163 for p1, p2 in zip(paths1, paths2): 

1164 try: 

1165 self.cp_file(p1, p2, **kwargs) 

1166 except FileNotFoundError: 

1167 if on_error == "raise": 

1168 raise 

1169 

1170 def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): 

1171 """Turn one or more globs or directories into a list of all matching paths 

1172 to files or directories. 

1173 

1174 kwargs are passed to ``glob`` or ``find``, which may in turn call ``ls`` 

1175 """ 

1176 

1177 if maxdepth is not None and maxdepth < 1: 

1178 raise ValueError("maxdepth must be at least 1") 

1179 

1180 if isinstance(path, (str, os.PathLike)): 

1181 out = self.expand_path([path], recursive, maxdepth, **kwargs) 

1182 else: 

1183 out = set() 

1184 path = [self._strip_protocol(p) for p in path] 

1185 for p in path: 

1186 if has_magic(p): 

1187 bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) 

1188 out |= bit 

1189 if recursive: 

1190 # glob call above expanded one depth so if maxdepth is defined 

1191 # then decrement it in expand_path call below. If it is zero 

1192 # after decrementing then avoid expand_path call. 

1193 if maxdepth is not None and maxdepth <= 1: 

1194 continue 

1195 out |= set( 

1196 self.expand_path( 

1197 list(bit), 

1198 recursive=recursive, 

1199 maxdepth=maxdepth - 1 if maxdepth is not None else None, 

1200 **kwargs, 

1201 ) 

1202 ) 

1203 continue 

1204 elif recursive: 

1205 rec = set( 

1206 self.find( 

1207 p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs 

1208 ) 

1209 ) 

1210 out |= rec 

1211 if p not in out and (recursive is False or self.exists(p)): 

1212 # should only check once, for the root 

1213 out.add(p) 

1214 if not out: 

1215 raise FileNotFoundError(path) 

1216 return sorted(out) 

1217 

1218 def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): 

1219 """Move file(s) from one location to another""" 

1220 if path1 == path2: 

1221 logger.debug("%s mv: The paths are the same, so no files were moved.", self) 

1222 else: 

1223 # explicitly raise exception to prevent data corruption 

1224 self.copy( 

1225 path1, path2, recursive=recursive, maxdepth=maxdepth, onerror="raise" 

1226 ) 

1227 self.rm(path1, recursive=recursive) 

1228 

1229 def rm_file(self, path): 

1230 """Delete a file""" 

1231 self._rm(path) 

1232 

1233 def _rm(self, path): 

1234 """Delete one file""" 

1235 # this is the old name for the method, prefer rm_file 

1236 raise NotImplementedError 

1237 

1238 def rm(self, path, recursive=False, maxdepth=None): 

1239 """Delete files. 

1240 

1241 Parameters 

1242 ---------- 

1243 path: str or list of str 

1244 File(s) to delete. 

1245 recursive: bool 

1246 If file(s) are directories, recursively delete contents and then 

1247 also remove the directory 

1248 maxdepth: int or None 

1249 Depth to pass to walk for finding files to delete, if recursive. 

1250 If None, there will be no limit and infinite recursion may be 

1251 possible. 

1252 """ 

1253 path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) 

1254 for p in reversed(path): 

1255 self.rm_file(p) 

1256 

1257 @classmethod 

1258 def _parent(cls, path): 

1259 path = cls._strip_protocol(path) 

1260 if "/" in path: 

1261 parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker) 

1262 return cls.root_marker + parent 

1263 else: 

1264 return cls.root_marker 

1265 

1266 def _open( 

1267 self, 

1268 path, 

1269 mode="rb", 

1270 block_size=None, 

1271 autocommit=True, 

1272 cache_options=None, 

1273 **kwargs, 

1274 ): 

1275 """Return raw bytes-mode file-like from the file-system""" 

1276 return AbstractBufferedFile( 

1277 self, 

1278 path, 

1279 mode, 

1280 block_size, 

1281 autocommit, 

1282 cache_options=cache_options, 

1283 **kwargs, 

1284 ) 

1285 

1286 def open( 

1287 self, 

1288 path, 

1289 mode="rb", 

1290 block_size=None, 

1291 cache_options=None, 

1292 compression=None, 

1293 **kwargs, 

1294 ): 

1295 """ 

1296 Return a file-like object from the filesystem 

1297 

1298 The resultant instance must function correctly in a context ``with`` 

1299 block. 

1300 

1301 Parameters 

1302 ---------- 

1303 path: str 

1304 Target file 

1305 mode: str like 'rb', 'w' 

1306 See builtin ``open()`` 

1307 Mode "x" (exclusive write) may be implemented by the backend. Even if 

1308 it is, whether it is checked up front or on commit, and whether it is 

1309 atomic is implementation-dependent. 

1310 block_size: int 

1311 Some indication of buffering - this is a value in bytes 

1312 cache_options : dict, optional 

1313 Extra arguments to pass through to the cache. 

1314 compression: string or None 

1315 If given, open file using compression codec. Can either be a compression 

1316 name (a key in ``fsspec.compression.compr``) or "infer" to guess the 

1317 compression from the filename suffix. 

1318 encoding, errors, newline: passed on to TextIOWrapper for text mode 

1319 """ 

1320 import io 

1321 

1322 path = self._strip_protocol(path) 

1323 if "b" not in mode: 

1324 mode = mode.replace("t", "") + "b" 

1325 

1326 text_kwargs = { 

1327 k: kwargs.pop(k) 

1328 for k in ["encoding", "errors", "newline"] 

1329 if k in kwargs 

1330 } 

1331 return io.TextIOWrapper( 

1332 self.open( 

1333 path, 

1334 mode, 

1335 block_size=block_size, 

1336 cache_options=cache_options, 

1337 compression=compression, 

1338 **kwargs, 

1339 ), 

1340 **text_kwargs, 

1341 ) 

1342 else: 

1343 ac = kwargs.pop("autocommit", not self._intrans) 

1344 f = self._open( 

1345 path, 

1346 mode=mode, 

1347 block_size=block_size, 

1348 autocommit=ac, 

1349 cache_options=cache_options, 

1350 **kwargs, 

1351 ) 

1352 if compression is not None: 

1353 from fsspec.compression import compr 

1354 from fsspec.core import get_compression 

1355 

1356 compression = get_compression(path, compression) 

1357 compress = compr[compression] 

1358 f = compress(f, mode=mode[0]) 

1359 

1360 if not ac and "r" not in mode: 

1361 self.transaction.files.append(f) 

1362 return f 

1363 

1364 def touch(self, path, truncate=True, **kwargs): 

1365 """Create empty file, or update timestamp 

1366 

1367 Parameters 

1368 ---------- 

1369 path: str 

1370 file location 

1371 truncate: bool 

1372 If True, always set file size to 0; if False, update timestamp and 

1373 leave file unchanged, if backend allows this 

1374 """ 

1375 if truncate or not self.exists(path): 

1376 with self.open(path, "wb", **kwargs): 

1377 pass 

1378 else: 

1379 raise NotImplementedError # update timestamp, if possible 

1380 

1381 def ukey(self, path): 

1382 """Hash of file properties, to tell if it has changed""" 

1383 return sha256(str(self.info(path)).encode()).hexdigest() 

1384 

1385 def read_block(self, fn, offset, length, delimiter=None): 

1386 """Read a block of bytes from 

1387 

1388 Starting at ``offset`` of the file, read ``length`` bytes. If 

1389 ``delimiter`` is set then we ensure that the read starts and stops at 

1390 delimiter boundaries that follow the locations ``offset`` and ``offset 

1391 + length``. If ``offset`` is zero then we start at zero. The 

1392 bytestring returned WILL include the end delimiter string. 

1393 

1394 If offset+length is beyond the eof, reads to eof. 

1395 

1396 Parameters 

1397 ---------- 

1398 fn: string 

1399 Path to filename 

1400 offset: int 

1401 Byte offset to start read 

1402 length: int 

1403 Number of bytes to read. If None, read to end. 

1404 delimiter: bytes (optional) 

1405 Ensure reading starts and stops at delimiter bytestring 

1406 

1407 Examples 

1408 -------- 

1409 >>> fs.read_block('data/file.csv', 0, 13) # doctest: +SKIP 

1410 b'Alice, 100\\nBo' 

1411 >>> fs.read_block('data/file.csv', 0, 13, delimiter=b'\\n') # doctest: +SKIP 

1412 b'Alice, 100\\nBob, 200\\n' 

1413 

1414 Use ``length=None`` to read to the end of the file. 

1415 >>> fs.read_block('data/file.csv', 0, None, delimiter=b'\\n') # doctest: +SKIP 

1416 b'Alice, 100\\nBob, 200\\nCharlie, 300' 

1417 

1418 See Also 

1419 -------- 

1420 :func:`fsspec.utils.read_block` 

1421 """ 

1422 with self.open(fn, "rb") as f: 

1423 size = f.size 

1424 if length is None: 

1425 length = size 

1426 if size is not None and offset + length > size: 

1427 length = size - offset 

1428 return read_block(f, offset, length, delimiter) 

1429 

1430 def to_json(self, *, include_password: bool = True) -> str: 

1431 """ 

1432 JSON representation of this filesystem instance. 

1433 

1434 Parameters 

1435 ---------- 

1436 include_password: bool, default True 

1437 Whether to include the password (if any) in the output. 

1438 

1439 Returns 

1440 ------- 

1441 JSON string with keys ``cls`` (the python location of this class), 

1442 protocol (text name of this class's protocol, first one in case of 

1443 multiple), ``args`` (positional args, usually empty), and all other 

1444 keyword arguments as their own keys. 

1445 

1446 Warnings 

1447 -------- 

1448 Serialized filesystems may contain sensitive information which have been 

1449 passed to the constructor, such as passwords and tokens. Make sure you 

1450 store and send them in a secure environment! 

1451 """ 

1452 from .json import FilesystemJSONEncoder 

1453 

1454 return json.dumps( 

1455 self, 

1456 cls=type( 

1457 "_FilesystemJSONEncoder", 

1458 (FilesystemJSONEncoder,), 

1459 {"include_password": include_password}, 

1460 ), 

1461 ) 

1462 

1463 @staticmethod 

1464 def from_json(blob: str) -> AbstractFileSystem: 

1465 """ 

1466 Recreate a filesystem instance from JSON representation. 

1467 

1468 See ``.to_json()`` for the expected structure of the input. 

1469 

1470 Parameters 

1471 ---------- 

1472 blob: str 

1473 

1474 Returns 

1475 ------- 

1476 file system instance, not necessarily of this particular class. 

1477 

1478 Warnings 

1479 -------- 

1480 This can import arbitrary modules (as determined by the ``cls`` key). 

1481 Make sure you haven't installed any modules that may execute malicious code 

1482 at import time. 

1483 """ 

1484 from .json import FilesystemJSONDecoder 

1485 

1486 return json.loads(blob, cls=FilesystemJSONDecoder) 

1487 

1488 def to_dict(self, *, include_password: bool = True) -> dict[str, Any]: 

1489 """ 

1490 JSON-serializable dictionary representation of this filesystem instance. 

1491 

1492 Parameters 

1493 ---------- 

1494 include_password: bool, default True 

1495 Whether to include the password (if any) in the output. 

1496 

1497 Returns 

1498 ------- 

1499 Dictionary with keys ``cls`` (the python location of this class), 

1500 protocol (text name of this class's protocol, first one in case of 

1501 multiple), ``args`` (positional args, usually empty), and all other 

1502 keyword arguments as their own keys. 

1503 

1504 Warnings 

1505 -------- 

1506 Serialized filesystems may contain sensitive information which have been 

1507 passed to the constructor, such as passwords and tokens. Make sure you 

1508 store and send them in a secure environment! 

1509 """ 

1510 from .json import FilesystemJSONEncoder 

1511 

1512 json_encoder = FilesystemJSONEncoder() 

1513 

1514 cls = type(self) 

1515 proto = self.protocol 

1516 

1517 storage_options = dict(self.storage_options) 

1518 if not include_password: 

1519 storage_options.pop("password", None) 

1520 

1521 return dict( 

1522 cls=f"{cls.__module__}:{cls.__name__}", 

1523 protocol=proto[0] if isinstance(proto, (tuple, list)) else proto, 

1524 args=json_encoder.make_serializable(self.storage_args), 

1525 **json_encoder.make_serializable(storage_options), 

1526 ) 

1527 

1528 @staticmethod 

1529 def from_dict(dct: dict[str, Any]) -> AbstractFileSystem: 

1530 """ 

1531 Recreate a filesystem instance from dictionary representation. 

1532 

1533 See ``.to_dict()`` for the expected structure of the input. 

1534 

1535 Parameters 

1536 ---------- 

1537 dct: Dict[str, Any] 

1538 

1539 Returns 

1540 ------- 

1541 file system instance, not necessarily of this particular class. 

1542 

1543 Warnings 

1544 -------- 

1545 This can import arbitrary modules (as determined by the ``cls`` key). 

1546 Make sure you haven't installed any modules that may execute malicious code 

1547 at import time. 

1548 """ 

1549 from .json import FilesystemJSONDecoder 

1550 

1551 json_decoder = FilesystemJSONDecoder() 

1552 

1553 dct = dict(dct) # Defensive copy 

1554 

1555 cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct) 

1556 if cls is None: 

1557 raise ValueError("Not a serialized AbstractFileSystem") 

1558 

1559 dct.pop("cls", None) 

1560 dct.pop("protocol", None) 

1561 

1562 return cls( 

1563 *json_decoder.unmake_serializable(dct.pop("args", ())), 

1564 **json_decoder.unmake_serializable(dct), 

1565 ) 

1566 

1567 def _get_pyarrow_filesystem(self): 

1568 """ 

1569 Make a version of the FS instance which will be acceptable to pyarrow 

1570 """ 

1571 # all instances already also derive from pyarrow 

1572 return self 

1573 

1574 def get_mapper(self, root="", check=False, create=False, missing_exceptions=None): 

1575 """Create key/value store based on this file-system 

1576 

1577 Makes a MutableMapping interface to the FS at the given root path. 

1578 See ``fsspec.mapping.FSMap`` for further details. 

1579 """ 

1580 from .mapping import FSMap 

1581 

1582 return FSMap( 

1583 root, 

1584 self, 

1585 check=check, 

1586 create=create, 

1587 missing_exceptions=missing_exceptions, 

1588 ) 

1589 

1590 @classmethod 

1591 def clear_instance_cache(cls): 

1592 """ 

1593 Clear the cache of filesystem instances. 

1594 

1595 Notes 

1596 ----- 

1597 Unless overridden by setting the ``cachable`` class attribute to False, 

1598 the filesystem class stores a reference to newly created instances. This 

1599 prevents Python's normal rules around garbage collection from working, 

1600 since the instances refcount will not drop to zero until 

1601 ``clear_instance_cache`` is called. 

1602 """ 

1603 cls._cache.clear() 

1604 

1605 def created(self, path): 

1606 """Return the created timestamp of a file as a datetime.datetime""" 

1607 raise NotImplementedError 

1608 

1609 def modified(self, path): 

1610 """Return the modified timestamp of a file as a datetime.datetime""" 

1611 raise NotImplementedError 

1612 

1613 def tree( 

1614 self, 

1615 path: str = "/", 

1616 recursion_limit: int = 2, 

1617 max_display: int = 25, 

1618 display_size: bool = False, 

1619 prefix: str = "", 

1620 is_last: bool = True, 

1621 first: bool = True, 

1622 indent_size: int = 4, 

1623 ) -> str: 

1624 """ 

1625 Return a tree-like structure of the filesystem starting from the given path as a string. 

1626 

1627 Parameters 

1628 ---------- 

1629 path: Root path to start traversal from 

1630 recursion_limit: Maximum depth of directory traversal 

1631 max_display: Maximum number of items to display per directory 

1632 display_size: Whether to display file sizes 

1633 prefix: Current line prefix for visual tree structure 

1634 is_last: Whether current item is last in its level 

1635 first: Whether this is the first call (displays root path) 

1636 indent_size: Number of spaces by indent 

1637 

1638 Returns 

1639 ------- 

1640 str: A string representing the tree structure. 

1641 

1642 Example 

1643 ------- 

1644 >>> from fsspec import filesystem 

1645 

1646 >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password') 

1647 >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10) 

1648 >>> print(tree) 

1649 """ 

1650 

1651 def format_bytes(n: int) -> str: 

1652 """Format bytes as text.""" 

1653 for prefix, k in ( 

1654 ("P", 2**50), 

1655 ("T", 2**40), 

1656 ("G", 2**30), 

1657 ("M", 2**20), 

1658 ("k", 2**10), 

1659 ): 

1660 if n >= 0.9 * k: 

1661 return f"{n / k:.2f} {prefix}b" 

1662 return f"{n}B" 

1663 

1664 result = [] 

1665 

1666 if first: 

1667 result.append(path) 

1668 

1669 if recursion_limit: 

1670 indent = " " * indent_size 

1671 contents = self.ls(path, detail=True) 

1672 contents.sort( 

1673 key=lambda x: (x.get("type") != "directory", x.get("name", "")) 

1674 ) 

1675 

1676 if max_display is not None and len(contents) > max_display: 

1677 displayed_contents = contents[:max_display] 

1678 remaining_count = len(contents) - max_display 

1679 else: 

1680 displayed_contents = contents 

1681 remaining_count = 0 

1682 

1683 for i, item in enumerate(displayed_contents): 

1684 is_last_item = (i == len(displayed_contents) - 1) and ( 

1685 remaining_count == 0 

1686 ) 

1687 

1688 branch = ( 

1689 "└" + ("─" * (indent_size - 2)) 

1690 if is_last_item 

1691 else "├" + ("─" * (indent_size - 2)) 

1692 ) 

1693 branch += " " 

1694 new_prefix = prefix + ( 

1695 indent if is_last_item else "│" + " " * (indent_size - 1) 

1696 ) 

1697 

1698 name = os.path.basename(item.get("name", "")) 

1699 

1700 if display_size and item.get("type") == "directory": 

1701 sub_contents = self.ls(item.get("name", ""), detail=True) 

1702 num_files = sum( 

1703 1 for sub_item in sub_contents if sub_item.get("type") == "file" 

1704 ) 

1705 num_folders = sum( 

1706 1 

1707 for sub_item in sub_contents 

1708 if sub_item.get("type") == "directory" 

1709 ) 

1710 

1711 if num_files == 0 and num_folders == 0: 

1712 size = " (empty folder)" 

1713 elif num_files == 0: 

1714 size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})" 

1715 elif num_folders == 0: 

1716 size = f" ({num_files} file{'s' if num_files > 1 else ''})" 

1717 else: 

1718 size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})" 

1719 elif display_size and item.get("type") == "file": 

1720 size = f" ({format_bytes(item.get('size', 0))})" 

1721 else: 

1722 size = "" 

1723 

1724 result.append(f"{prefix}{branch}{name}{size}") 

1725 

1726 if item.get("type") == "directory" and recursion_limit > 0: 

1727 result.append( 

1728 self.tree( 

1729 path=item.get("name", ""), 

1730 recursion_limit=recursion_limit - 1, 

1731 max_display=max_display, 

1732 display_size=display_size, 

1733 prefix=new_prefix, 

1734 is_last=is_last_item, 

1735 first=False, 

1736 indent_size=indent_size, 

1737 ) 

1738 ) 

1739 

1740 if remaining_count > 0: 

1741 more_message = f"{remaining_count} more item(s) not displayed." 

1742 result.append( 

1743 f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}" 

1744 ) 

1745 

1746 return "\n".join(_ for _ in result if _) 

1747 

1748 # ------------------------------------------------------------------------ 

1749 # Aliases 

1750 

1751 def read_bytes(self, path, start=None, end=None, **kwargs): 

1752 """Alias of `AbstractFileSystem.cat_file`.""" 

1753 return self.cat_file(path, start=start, end=end, **kwargs) 

1754 

1755 def write_bytes(self, path, value, **kwargs): 

1756 """Alias of `AbstractFileSystem.pipe_file`.""" 

1757 self.pipe_file(path, value, **kwargs) 

1758 

1759 def makedir(self, path, create_parents=True, **kwargs): 

1760 """Alias of `AbstractFileSystem.mkdir`.""" 

1761 return self.mkdir(path, create_parents=create_parents, **kwargs) 

1762 

1763 def mkdirs(self, path, exist_ok=False): 

1764 """Alias of `AbstractFileSystem.makedirs`.""" 

1765 return self.makedirs(path, exist_ok=exist_ok) 

1766 

1767 def listdir(self, path, detail=True, **kwargs): 

1768 """Alias of `AbstractFileSystem.ls`.""" 

1769 return self.ls(path, detail=detail, **kwargs) 

1770 

1771 def cp(self, path1, path2, **kwargs): 

1772 """Alias of `AbstractFileSystem.copy`.""" 

1773 return self.copy(path1, path2, **kwargs) 

1774 

1775 def move(self, path1, path2, **kwargs): 

1776 """Alias of `AbstractFileSystem.mv`.""" 

1777 return self.mv(path1, path2, **kwargs) 

1778 

1779 def stat(self, path, **kwargs): 

1780 """Alias of `AbstractFileSystem.info`.""" 

1781 return self.info(path, **kwargs) 

1782 

1783 def disk_usage(self, path, total=True, maxdepth=None, **kwargs): 

1784 """Alias of `AbstractFileSystem.du`.""" 

1785 return self.du(path, total=total, maxdepth=maxdepth, **kwargs) 

1786 

1787 def rename(self, path1, path2, **kwargs): 

1788 """Alias of `AbstractFileSystem.mv`.""" 

1789 return self.mv(path1, path2, **kwargs) 

1790 

1791 def delete(self, path, recursive=False, maxdepth=None): 

1792 """Alias of `AbstractFileSystem.rm`.""" 

1793 return self.rm(path, recursive=recursive, maxdepth=maxdepth) 

1794 

1795 def upload(self, lpath, rpath, recursive=False, **kwargs): 

1796 """Alias of `AbstractFileSystem.put`.""" 

1797 return self.put(lpath, rpath, recursive=recursive, **kwargs) 

1798 

1799 def download(self, rpath, lpath, recursive=False, **kwargs): 

1800 """Alias of `AbstractFileSystem.get`.""" 

1801 return self.get(rpath, lpath, recursive=recursive, **kwargs) 

1802 

1803 def sign(self, path, expiration=100, **kwargs): 

1804 """Create a signed URL representing the given path 

1805 

1806 Some implementations allow temporary URLs to be generated, as a 

1807 way of delegating credentials. 

1808 

1809 Parameters 

1810 ---------- 

1811 path : str 

1812 The path on the filesystem 

1813 expiration : int 

1814 Number of seconds to enable the URL for (if supported) 

1815 

1816 Returns 

1817 ------- 

1818 URL : str 

1819 The signed URL 

1820 

1821 Raises 

1822 ------ 

1823 NotImplementedError : if method is not implemented for a filesystem 

1824 """ 

1825 raise NotImplementedError("Sign is not implemented for this filesystem") 

1826 

1827 def _isfilestore(self): 

1828 # Originally inherited from pyarrow DaskFileSystem. Keeping this 

1829 # here for backwards compatibility as long as pyarrow uses its 

1830 # legacy fsspec-compatible filesystems and thus accepts fsspec 

1831 # filesystems as well 

1832 return False 

1833 

1834 

1835class AbstractBufferedFile(io.IOBase): 

1836 """Convenient class to derive from to provide buffering 

1837 

1838 In the case that the backend does not provide a pythonic file-like object 

1839 already, this class contains much of the logic to build one. The only 

1840 methods that need to be overridden are ``_upload_chunk``, 

1841 ``_initiate_upload`` and ``_fetch_range``. 

1842 """ 

1843 

1844 DEFAULT_BLOCK_SIZE = 5 * 2**20 

1845 _details = None 

1846 

1847 def __init__( 

1848 self, 

1849 fs, 

1850 path, 

1851 mode="rb", 

1852 block_size="default", 

1853 autocommit=True, 

1854 cache_type="readahead", 

1855 cache_options=None, 

1856 size=None, 

1857 **kwargs, 

1858 ): 

1859 """ 

1860 Template for files with buffered reading and writing 

1861 

1862 Parameters 

1863 ---------- 

1864 fs: instance of FileSystem 

1865 path: str 

1866 location in file-system 

1867 mode: str 

1868 Normal file modes. Currently only 'wb', 'ab' or 'rb'. Some file 

1869 systems may be read-only, and some may not support append. 

1870 block_size: int 

1871 Buffer size for reading or writing, 'default' for class default 

1872 autocommit: bool 

1873 Whether to write to final destination; may only impact what 

1874 happens when file is being closed. 

1875 cache_type: {"readahead", "none", "mmap", "bytes"}, default "readahead" 

1876 Caching policy in read mode. See the definitions in ``core``. 

1877 cache_options : dict 

1878 Additional options passed to the constructor for the cache specified 

1879 by `cache_type`. 

1880 size: int 

1881 If given and in read mode, suppressed having to look up the file size 

1882 kwargs: 

1883 Gets stored as self.kwargs 

1884 """ 

1885 from .core import caches 

1886 

1887 self.path = path 

1888 self.fs = fs 

1889 self.mode = mode 

1890 self.blocksize = ( 

1891 self.DEFAULT_BLOCK_SIZE if block_size in ["default", None] else block_size 

1892 ) 

1893 self.loc = 0 

1894 self.autocommit = autocommit 

1895 self.end = None 

1896 self.start = None 

1897 self.closed = False 

1898 

1899 if cache_options is None: 

1900 cache_options = {} 

1901 

1902 if "trim" in kwargs: 

1903 warnings.warn( 

1904 "Passing 'trim' to control the cache behavior has been deprecated. " 

1905 "Specify it within the 'cache_options' argument instead.", 

1906 FutureWarning, 

1907 ) 

1908 cache_options["trim"] = kwargs.pop("trim") 

1909 

1910 self.kwargs = kwargs 

1911 

1912 if mode not in {"ab", "rb", "wb", "xb"}: 

1913 raise NotImplementedError("File mode not supported") 

1914 if mode == "rb": 

1915 if size is not None: 

1916 self.size = size 

1917 else: 

1918 self.size = self.details["size"] 

1919 self.cache = caches[cache_type]( 

1920 self.blocksize, self._fetch_range, self.size, **cache_options 

1921 ) 

1922 else: 

1923 self.buffer = io.BytesIO() 

1924 self.offset = None 

1925 self.forced = False 

1926 self.location = None 

1927 

1928 @property 

1929 def details(self): 

1930 if self._details is None: 

1931 self._details = self.fs.info(self.path) 

1932 return self._details 

1933 

1934 @details.setter 

1935 def details(self, value): 

1936 self._details = value 

1937 self.size = value["size"] 

1938 

1939 @property 

1940 def full_name(self): 

1941 return _unstrip_protocol(self.path, self.fs) 

1942 

1943 @property 

1944 def closed(self): 

1945 # get around this attr being read-only in IOBase 

1946 # use getattr here, since this can be called during del 

1947 return getattr(self, "_closed", True) 

1948 

1949 @closed.setter 

1950 def closed(self, c): 

1951 self._closed = c 

1952 

1953 def __hash__(self): 

1954 if "w" in self.mode: 

1955 return id(self) 

1956 else: 

1957 return int(tokenize(self.details), 16) 

1958 

1959 def __eq__(self, other): 

1960 """Files are equal if they have the same checksum, only in read mode""" 

1961 if self is other: 

1962 return True 

1963 return ( 

1964 isinstance(other, type(self)) 

1965 and self.mode == "rb" 

1966 and other.mode == "rb" 

1967 and hash(self) == hash(other) 

1968 ) 

1969 

1970 def commit(self): 

1971 """Move from temp to final destination""" 

1972 

1973 def discard(self): 

1974 """Throw away temporary file""" 

1975 

1976 def info(self): 

1977 """File information about this path""" 

1978 if self.readable(): 

1979 return self.details 

1980 else: 

1981 raise ValueError("Info not available while writing") 

1982 

1983 def tell(self): 

1984 """Current file location""" 

1985 return self.loc 

1986 

1987 def seek(self, loc, whence=0): 

1988 """Set current file location 

1989 

1990 Parameters 

1991 ---------- 

1992 loc: int 

1993 byte location 

1994 whence: {0, 1, 2} 

1995 from start of file, current location or end of file, resp. 

1996 """ 

1997 loc = int(loc) 

1998 if not self.mode == "rb": 

1999 raise OSError(ESPIPE, "Seek only available in read mode") 

2000 if whence == 0: 

2001 nloc = loc 

2002 elif whence == 1: 

2003 nloc = self.loc + loc 

2004 elif whence == 2: 

2005 nloc = self.size + loc 

2006 else: 

2007 raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)") 

2008 if nloc < 0: 

2009 raise ValueError("Seek before start of file") 

2010 self.loc = nloc 

2011 return self.loc 

2012 

2013 def write(self, data): 

2014 """ 

2015 Write data to buffer. 

2016 

2017 Buffer only sent on flush() or if buffer is greater than 

2018 or equal to blocksize. 

2019 

2020 Parameters 

2021 ---------- 

2022 data: bytes 

2023 Set of bytes to be written. 

2024 """ 

2025 if not self.writable(): 

2026 raise ValueError("File not in write mode") 

2027 if self.closed: 

2028 raise ValueError("I/O operation on closed file.") 

2029 if self.forced: 

2030 raise ValueError("This file has been force-flushed, can only close") 

2031 out = self.buffer.write(data) 

2032 self.loc += out 

2033 if self.buffer.tell() >= self.blocksize: 

2034 self.flush() 

2035 return out 

2036 

2037 def flush(self, force=False): 

2038 """ 

2039 Write buffered data to backend store. 

2040 

2041 Writes the current buffer, if it is larger than the block-size, or if 

2042 the file is being closed. 

2043 

2044 Parameters 

2045 ---------- 

2046 force: bool 

2047 When closing, write the last block even if it is smaller than 

2048 blocks are allowed to be. Disallows further writing to this file. 

2049 """ 

2050 

2051 if self.closed: 

2052 raise ValueError("Flush on closed file") 

2053 if force and self.forced: 

2054 raise ValueError("Force flush cannot be called more than once") 

2055 if force: 

2056 self.forced = True 

2057 

2058 if self.readable(): 

2059 # no-op to flush on read-mode 

2060 return 

2061 

2062 if not force and self.buffer.tell() < self.blocksize: 

2063 # Defer write on small block 

2064 return 

2065 

2066 if self.offset is None: 

2067 # Initialize a multipart upload 

2068 self.offset = 0 

2069 try: 

2070 self._initiate_upload() 

2071 except: 

2072 self.closed = True 

2073 raise 

2074 

2075 if self._upload_chunk(final=force) is not False: 

2076 self.offset += self.buffer.seek(0, 2) 

2077 self.buffer = io.BytesIO() 

2078 

2079 def _upload_chunk(self, final=False): 

2080 """Write one part of a multi-block file upload 

2081 

2082 Parameters 

2083 ========== 

2084 final: bool 

2085 This is the last block, so should complete file, if 

2086 self.autocommit is True. 

2087 """ 

2088 # may not yet have been initialized, may need to call _initialize_upload 

2089 

2090 def _initiate_upload(self): 

2091 """Create remote file/upload""" 

2092 pass 

2093 

2094 def _fetch_range(self, start, end): 

2095 """Get the specified set of bytes from remote""" 

2096 return self.fs.cat_file(self.path, start=start, end=end) 

2097 

2098 def read(self, length=-1): 

2099 """ 

2100 Return data from cache, or fetch pieces as necessary 

2101 

2102 Parameters 

2103 ---------- 

2104 length: int (-1) 

2105 Number of bytes to read; if <0, all remaining bytes. 

2106 """ 

2107 length = -1 if length is None else int(length) 

2108 if self.mode != "rb": 

2109 raise ValueError("File not in read mode") 

2110 if length < 0: 

2111 length = self.size - self.loc 

2112 if self.closed: 

2113 raise ValueError("I/O operation on closed file.") 

2114 if length == 0: 

2115 # don't even bother calling fetch 

2116 return b"" 

2117 out = self.cache._fetch(self.loc, self.loc + length) 

2118 

2119 logger.debug( 

2120 "%s read: %i - %i %s", 

2121 self, 

2122 self.loc, 

2123 self.loc + length, 

2124 self.cache._log_stats(), 

2125 ) 

2126 self.loc += len(out) 

2127 return out 

2128 

2129 def readinto(self, b): 

2130 """mirrors builtin file's readinto method 

2131 

2132 https://docs.python.org/3/library/io.html#io.RawIOBase.readinto 

2133 """ 

2134 out = memoryview(b).cast("B") 

2135 data = self.read(out.nbytes) 

2136 out[: len(data)] = data 

2137 return len(data) 

2138 

2139 def readuntil(self, char=b"\n", blocks=None): 

2140 """Return data between current position and first occurrence of char 

2141 

2142 char is included in the output, except if the end of the tile is 

2143 encountered first. 

2144 

2145 Parameters 

2146 ---------- 

2147 char: bytes 

2148 Thing to find 

2149 blocks: None or int 

2150 How much to read in each go. Defaults to file blocksize - which may 

2151 mean a new read on every call. 

2152 """ 

2153 out = [] 

2154 while True: 

2155 start = self.tell() 

2156 part = self.read(blocks or self.blocksize) 

2157 if len(part) == 0: 

2158 break 

2159 found = part.find(char) 

2160 if found > -1: 

2161 out.append(part[: found + len(char)]) 

2162 self.seek(start + found + len(char)) 

2163 break 

2164 out.append(part) 

2165 return b"".join(out) 

2166 

2167 def readline(self): 

2168 """Read until and including the first occurrence of newline character 

2169 

2170 Note that, because of character encoding, this is not necessarily a 

2171 true line ending. 

2172 """ 

2173 return self.readuntil(b"\n") 

2174 

2175 def __next__(self): 

2176 out = self.readline() 

2177 if out: 

2178 return out 

2179 raise StopIteration 

2180 

2181 def __iter__(self): 

2182 return self 

2183 

2184 def readlines(self): 

2185 """Return all data, split by the newline character, including the newline character""" 

2186 data = self.read() 

2187 lines = data.split(b"\n") 

2188 out = [l + b"\n" for l in lines[:-1]] 

2189 if data.endswith(b"\n"): 

2190 return out 

2191 else: 

2192 return out + [lines[-1]] 

2193 # return list(self) ??? 

2194 

2195 def readinto1(self, b): 

2196 return self.readinto(b) 

2197 

2198 def close(self): 

2199 """Close file 

2200 

2201 Finalizes writes, discards cache 

2202 """ 

2203 if getattr(self, "_unclosable", False): 

2204 return 

2205 if self.closed: 

2206 return 

2207 try: 

2208 if self.mode == "rb": 

2209 self.cache = None 

2210 else: 

2211 if not self.forced: 

2212 self.flush(force=True) 

2213 

2214 if self.fs is not None: 

2215 self.fs.invalidate_cache(self.path) 

2216 self.fs.invalidate_cache(self.fs._parent(self.path)) 

2217 finally: 

2218 self.closed = True 

2219 

2220 def readable(self): 

2221 """Whether opened for reading""" 

2222 return "r" in self.mode and not self.closed 

2223 

2224 def seekable(self): 

2225 """Whether is seekable (only in read mode)""" 

2226 return self.readable() 

2227 

2228 def writable(self): 

2229 """Whether opened for writing""" 

2230 return self.mode in {"wb", "ab", "xb"} and not self.closed 

2231 

2232 def __reduce__(self): 

2233 if self.mode != "rb": 

2234 raise RuntimeError("Pickling a writeable file is not supported") 

2235 

2236 return reopen, ( 

2237 self.fs, 

2238 self.path, 

2239 self.mode, 

2240 self.blocksize, 

2241 self.loc, 

2242 self.size, 

2243 self.autocommit, 

2244 self.cache.name if self.cache else "none", 

2245 self.kwargs, 

2246 ) 

2247 

2248 def __del__(self): 

2249 if not self.closed: 

2250 self.close() 

2251 

2252 def __str__(self): 

2253 return f"<File-like object {type(self.fs).__name__}, {self.path}>" 

2254 

2255 __repr__ = __str__ 

2256 

2257 def __enter__(self): 

2258 return self 

2259 

2260 def __exit__(self, *args): 

2261 self.close() 

2262 

2263 

2264def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs): 

2265 file = fs.open( 

2266 path, 

2267 mode=mode, 

2268 block_size=blocksize, 

2269 autocommit=autocommit, 

2270 cache_type=cache_type, 

2271 size=size, 

2272 **kwargs, 

2273 ) 

2274 if loc > 0: 

2275 file.seek(loc) 

2276 return file