1""" upath._flavour_sources
2
3<experimental!>
4
5Warning
6-------
7 Do not modify this file manually!
8 It is generated by `dev/generate_flavours.py`
9
10To be able to parse the different filesystem uri schemes, we need
11the string parsing functionality each of the filesystem implementations.
12In an attempt to support parsing uris without having to import the
13specific filesystems, we extract the necessary subset of the
14AbstractFileSystem classes and generate a new "flavour" class for
15each of the known filesystems. This will allow us to provide a
16`PurePath` equivalent `PureUPath` for each protocol in the future
17without a direct dependency on the underlying filesystem package.
18
19"""
20#
21# skipping protocols:
22# - blockcache
23# - cached
24# - dir
25# - filecache
26# - simplecache
27# protocol import errors:
28# - gdrive (Please install gdrivefs for access to Google Drive)
29# - generic (GenericFileSystem: '_strip_protocol' not a classmethod)
30#
31from __future__ import annotations
32
33import logging
34import os
35import re
36from pathlib import PurePath
37from pathlib import PureWindowsPath
38from typing import Any
39from typing import Literal
40from typing import cast
41from urllib.parse import parse_qs
42from urllib.parse import urlsplit
43
44from fsspec.implementations.local import make_path_posix
45from fsspec.utils import infer_storage_options
46from fsspec.utils import stringify_path
47
48__all__ = [
49 "AbstractFileSystemFlavour",
50 "FileSystemFlavourBase",
51 "flavour_registry",
52]
53
54logger = logging.getLogger(__name__)
55flavour_registry: dict[str, type[FileSystemFlavourBase]] = {}
56
57
58class FileSystemFlavourBase:
59 """base class for the fsspec flavours"""
60
61 protocol: str | tuple[str, ...]
62 root_marker: Literal["/", ""]
63 sep: Literal["/"]
64
65 @classmethod
66 def _strip_protocol(cls, path):
67 raise NotImplementedError
68
69 @staticmethod
70 def _get_kwargs_from_urls(path):
71 raise NotImplementedError
72
73 @classmethod
74 def _parent(cls, path):
75 raise NotImplementedError
76
77 def __init_subclass__(cls: Any, **kwargs):
78 if isinstance(cls.protocol, str):
79 protocols = (cls.protocol,)
80 else:
81 protocols = tuple(cls.protocol)
82 for protocol in protocols:
83 if protocol in flavour_registry:
84 raise ValueError(f"protocol {protocol!r} already registered")
85 flavour_registry[protocol] = cls
86
87
88class AbstractFileSystemFlavour(FileSystemFlavourBase):
89 __orig_class__ = 'fsspec.spec.AbstractFileSystem'
90 __orig_version__ = '2024.10.0'
91 protocol: str | tuple[str, ...] = 'abstract'
92 root_marker: Literal['', '/'] = ''
93 sep: Literal['/'] = '/'
94
95 @classmethod
96 def _strip_protocol(cls, path):
97 """Turn path from fully-qualified to file-system-specific
98
99 May require FS-specific handling, e.g., for relative paths or links.
100 """
101 if isinstance(path, list):
102 return [cls._strip_protocol(p) for p in path]
103 path = stringify_path(path)
104 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
105 for protocol in protos:
106 if path.startswith(protocol + "://"):
107 path = path[len(protocol) + 3 :]
108 elif path.startswith(protocol + "::"):
109 path = path[len(protocol) + 2 :]
110 path = path.rstrip("/")
111 # use of root_marker to make minimum required path, e.g., "/"
112 return path or cls.root_marker
113
114 @staticmethod
115 def _get_kwargs_from_urls(path):
116 """If kwargs can be encoded in the paths, extract them here
117
118 This should happen before instantiation of the class; incoming paths
119 then should be amended to strip the options in methods.
120
121 Examples may look like an sftp path "sftp://user@host:/my/path", where
122 the user and host should become kwargs and later get stripped.
123 """
124 # by default, nothing happens
125 return {}
126
127 @classmethod
128 def _parent(cls, path):
129 path = cls._strip_protocol(path)
130 if "/" in path:
131 parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
132 return cls.root_marker + parent
133 else:
134 return cls.root_marker
135
136
137class AsyncLocalFileSystemFlavour(AbstractFileSystemFlavour):
138 __orig_class__ = 'morefs.asyn_local.AsyncLocalFileSystem'
139 __orig_version__ = '0.2.2'
140 protocol = ()
141 root_marker = '/'
142 sep = '/'
143 local_file = True
144
145 @classmethod
146 def _strip_protocol(cls, path):
147 path = stringify_path(path)
148 if path.startswith("file://"):
149 path = path[7:]
150 elif path.startswith("file:"):
151 path = path[5:]
152 elif path.startswith("local://"):
153 path = path[8:]
154 elif path.startswith("local:"):
155 path = path[6:]
156
157 path = make_path_posix(path)
158 if os.sep != "/":
159 # This code-path is a stripped down version of
160 # > drive, path = ntpath.splitdrive(path)
161 if path[1:2] == ":":
162 # Absolute drive-letter path, e.g. X:\Windows
163 # Relative path with drive, e.g. X:Windows
164 drive, path = path[:2], path[2:]
165 elif path[:2] == "//":
166 # UNC drives, e.g. \\server\share or \\?\UNC\server\share
167 # Device drives, e.g. \\.\device or \\?\device
168 if (index1 := path.find("/", 2)) == -1 or (
169 index2 := path.find("/", index1 + 1)
170 ) == -1:
171 drive, path = path, ""
172 else:
173 drive, path = path[:index2], path[index2:]
174 else:
175 # Relative path, e.g. Windows
176 drive = ""
177
178 path = path.rstrip("/") or cls.root_marker
179 return drive + path
180
181 else:
182 return path.rstrip("/") or cls.root_marker
183
184 @classmethod
185 def _parent(cls, path):
186 path = cls._strip_protocol(path)
187 if os.sep == "/":
188 # posix native
189 return path.rsplit("/", 1)[0] or "/"
190 else:
191 # NT
192 path_ = path.rsplit("/", 1)[0]
193 if len(path_) <= 3:
194 if path_[1:2] == ":":
195 # nt root (something like c:/)
196 return path_[0] + ":/"
197 # More cases may be required here
198 return path_
199
200
201class AzureBlobFileSystemFlavour(AbstractFileSystemFlavour):
202 __orig_class__ = 'adlfs.spec.AzureBlobFileSystem'
203 __orig_version__ = '2024.7.0'
204 protocol = ('abfs', 'az', 'abfss')
205 root_marker = ''
206 sep = '/'
207
208 @classmethod
209 def _strip_protocol(cls, path: str):
210 """
211 Remove the protocol from the input path
212
213 Parameters
214 ----------
215 path: str
216 Path to remove the protocol from
217
218 Returns
219 -------
220 str
221 Returns a path without the protocol
222 """
223 if isinstance(path, list): # type: ignore[unreachable]
224 return [cls._strip_protocol(p) for p in path] # type: ignore[unreachable]
225
226 STORE_SUFFIX = ".dfs.core.windows.net"
227 logger.debug(f"_strip_protocol for {path}")
228 if not path.startswith(("abfs://", "az://", "abfss://")):
229 path = path.lstrip("/")
230 path = "abfs://" + path
231 ops = infer_storage_options(path)
232 if "username" in ops:
233 if ops.get("username", None):
234 ops["path"] = ops["username"] + ops["path"]
235 # we need to make sure that the path retains
236 # the format {host}/{path}
237 # here host is the container_name
238 elif ops.get("host", None):
239 if (
240 ops["host"].count(STORE_SUFFIX) == 0
241 ): # no store-suffix, so this is container-name
242 ops["path"] = ops["host"] + ops["path"]
243 url_query = ops.get("url_query")
244 if url_query is not None:
245 ops["path"] = f"{ops['path']}?{url_query}"
246
247 logger.debug(f"_strip_protocol({path}) = {ops}")
248 stripped_path = ops["path"].lstrip("/")
249 return stripped_path
250
251 @staticmethod
252 def _get_kwargs_from_urls(urlpath):
253 """Get the account_name from the urlpath and pass to storage_options"""
254 ops = infer_storage_options(urlpath)
255 out = {}
256 host = ops.get("host", None)
257 if host:
258 match = re.match(
259 r"(?P<account_name>.+)\.(dfs|blob)\.core\.windows\.net", host
260 )
261 if match:
262 account_name = match.groupdict()["account_name"]
263 out["account_name"] = account_name
264 url_query = ops.get("url_query")
265 if url_query is not None:
266 from urllib.parse import parse_qs
267
268 parsed = parse_qs(url_query)
269 if "versionid" in parsed:
270 out["version_aware"] = True
271 return out
272
273
274class AzureDatalakeFileSystemFlavour(AbstractFileSystemFlavour):
275 __orig_class__ = 'adlfs.gen1.AzureDatalakeFileSystem'
276 __orig_version__ = '2024.7.0'
277 protocol = ('adl',)
278 root_marker = ''
279 sep = '/'
280
281 @classmethod
282 def _strip_protocol(cls, path):
283 ops = infer_storage_options(path)
284 return ops["path"]
285
286 @staticmethod
287 def _get_kwargs_from_urls(paths):
288 """Get the store_name from the urlpath and pass to storage_options"""
289 ops = infer_storage_options(paths)
290 out = {}
291 if ops.get("host", None):
292 out["store_name"] = ops["host"]
293 return out
294
295
296class BoxFileSystemFlavour(AbstractFileSystemFlavour):
297 __orig_class__ = 'boxfs.boxfs.BoxFileSystem'
298 __orig_version__ = '0.3.0'
299 protocol = ('box',)
300 root_marker = '/'
301 sep = '/'
302
303 @classmethod
304 def _strip_protocol(cls, path) -> str:
305 path = super()._strip_protocol(path)
306 path = path.replace("\\", "/")
307 # Make all paths start with root marker
308 if not path.startswith(cls.root_marker):
309 path = cls.root_marker + path
310 return path
311
312
313class DaskWorkerFileSystemFlavour(AbstractFileSystemFlavour):
314 __orig_class__ = 'fsspec.implementations.dask.DaskWorkerFileSystem'
315 __orig_version__ = '2024.10.0'
316 protocol = ('dask',)
317 root_marker = ''
318 sep = '/'
319
320 @staticmethod
321 def _get_kwargs_from_urls(path):
322 so = infer_storage_options(path)
323 if "host" in so and "port" in so:
324 return {"client": f"{so['host']}:{so['port']}"}
325 else:
326 return {}
327
328
329class DataFileSystemFlavour(AbstractFileSystemFlavour):
330 __orig_class__ = 'fsspec.implementations.data.DataFileSystem'
331 __orig_version__ = '2024.10.0'
332 protocol = ('data',)
333 root_marker = ''
334 sep = '/'
335
336
337class DatabricksFileSystemFlavour(AbstractFileSystemFlavour):
338 __orig_class__ = 'fsspec.implementations.dbfs.DatabricksFileSystem'
339 __orig_version__ = '2024.10.0'
340 protocol = ('dbfs',)
341 root_marker = ''
342 sep = '/'
343
344
345class DictFSFlavour(AbstractFileSystemFlavour):
346 __orig_class__ = 'morefs.dict.DictFS'
347 __orig_version__ = '0.2.2'
348 protocol = ('dictfs',)
349 root_marker = ''
350 sep = '/'
351
352 @classmethod
353 def _strip_protocol(cls, path: str) -> str:
354 if path.startswith("dictfs://"):
355 path = path[len("dictfs://") :]
356 if "::" in path or "://" in path:
357 return path.rstrip("/")
358 path = path.lstrip("/").rstrip("/")
359 return "/" + path if path else cls.root_marker
360
361
362class DropboxDriveFileSystemFlavour(AbstractFileSystemFlavour):
363 __orig_class__ = 'dropboxdrivefs.core.DropboxDriveFileSystem'
364 __orig_version__ = '1.4.1'
365 protocol = ('dropbox',)
366 root_marker = ''
367 sep = '/'
368
369
370class FTPFileSystemFlavour(AbstractFileSystemFlavour):
371 __orig_class__ = 'fsspec.implementations.ftp.FTPFileSystem'
372 __orig_version__ = '2024.10.0'
373 protocol = ('ftp',)
374 root_marker = '/'
375 sep = '/'
376
377 @classmethod
378 def _strip_protocol(cls, path):
379 return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/")
380
381 @staticmethod
382 def _get_kwargs_from_urls(urlpath):
383 out = infer_storage_options(urlpath)
384 out.pop("path", None)
385 out.pop("protocol", None)
386 return out
387
388
389class GCSFileSystemFlavour(AbstractFileSystemFlavour):
390 __orig_class__ = 'gcsfs.core.GCSFileSystem'
391 __orig_version__ = '2024.10.0'
392 protocol = ('gs', 'gcs')
393 root_marker = ''
394 sep = '/'
395
396 @classmethod
397 def _strip_protocol(cls, path):
398 if isinstance(path, list):
399 return [cls._strip_protocol(p) for p in path]
400 path = stringify_path(path)
401 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
402 for protocol in protos:
403 if path.startswith(protocol + "://"):
404 path = path[len(protocol) + 3 :]
405 elif path.startswith(protocol + "::"):
406 path = path[len(protocol) + 2 :]
407 # use of root_marker to make minimum required path, e.g., "/"
408 return path or cls.root_marker
409
410 @classmethod
411 def _get_kwargs_from_urls(cls, path):
412 _, _, generation = cls._split_path(path, version_aware=True)
413 if generation is not None:
414 return {"version_aware": True}
415 return {}
416
417 @classmethod
418 def _split_path(cls, path, version_aware=False):
419 """
420 Normalise GCS path string into bucket and key.
421
422 Parameters
423 ----------
424 path : string
425 Input path, like `gcs://mybucket/path/to/file`.
426 Path is of the form: '[gs|gcs://]bucket[/key][?querystring][#fragment]'
427
428 GCS allows object generation (object version) to be specified in either
429 the URL fragment or the `generation` query parameter. When provided,
430 the fragment will take priority over the `generation` query paramenter.
431
432 Returns
433 -------
434 (bucket, key, generation) tuple
435 """
436 path = cls._strip_protocol(path).lstrip("/")
437 if "/" not in path:
438 return path, "", None
439 bucket, keypart = path.split("/", 1)
440 key = keypart
441 generation = None
442 if version_aware:
443 parts = urlsplit(keypart)
444 try:
445 if parts.fragment:
446 generation = parts.fragment
447 elif parts.query:
448 parsed = parse_qs(parts.query)
449 if "generation" in parsed:
450 generation = parsed["generation"][0]
451 # Sanity check whether this could be a valid generation ID. If
452 # it is not, assume that # or ? characters are supposed to be
453 # part of the object name.
454 if generation is not None:
455 int(generation)
456 key = parts.path
457 except ValueError:
458 generation = None
459 return (
460 bucket,
461 key,
462 generation,
463 )
464
465
466class GitFileSystemFlavour(AbstractFileSystemFlavour):
467 __orig_class__ = 'fsspec.implementations.git.GitFileSystem'
468 __orig_version__ = '2024.10.0'
469 protocol = ('git',)
470 root_marker = ''
471 sep = '/'
472
473 @classmethod
474 def _strip_protocol(cls, path):
475 path = super()._strip_protocol(path).lstrip("/")
476 if ":" in path:
477 path = path.split(":", 1)[1]
478 if "@" in path:
479 path = path.split("@", 1)[1]
480 return path.lstrip("/")
481
482 @staticmethod
483 def _get_kwargs_from_urls(path):
484 if path.startswith("git://"):
485 path = path[6:]
486 out = {}
487 if ":" in path:
488 out["path"], path = path.split(":", 1)
489 if "@" in path:
490 out["ref"], path = path.split("@", 1)
491 return out
492
493
494class GithubFileSystemFlavour(AbstractFileSystemFlavour):
495 __orig_class__ = 'fsspec.implementations.github.GithubFileSystem'
496 __orig_version__ = '2024.10.0'
497 protocol = ('github',)
498 root_marker = ''
499 sep = '/'
500
501 @classmethod
502 def _strip_protocol(cls, path):
503 opts = infer_storage_options(path)
504 if "username" not in opts:
505 return super()._strip_protocol(path)
506 return opts["path"].lstrip("/")
507
508 @staticmethod
509 def _get_kwargs_from_urls(path):
510 opts = infer_storage_options(path)
511 if "username" not in opts:
512 return {}
513 out = {"org": opts["username"], "repo": opts["password"]}
514 if opts["host"]:
515 out["sha"] = opts["host"]
516 return out
517
518
519class HTTPFileSystemFlavour(AbstractFileSystemFlavour):
520 __orig_class__ = 'fsspec.implementations.http.HTTPFileSystem'
521 __orig_version__ = '2024.10.0'
522 protocol = ('http', 'https')
523 root_marker = ''
524 sep = '/'
525
526 @classmethod
527 def _strip_protocol(cls, path):
528 """For HTTP, we always want to keep the full URL"""
529 return path
530
531 @classmethod
532 def _parent(cls, path):
533 # override, since _strip_protocol is different for URLs
534 par = super()._parent(path)
535 if len(par) > 7: # "http://..."
536 return par
537 return ""
538
539
540class HadoopFileSystemFlavour(AbstractFileSystemFlavour):
541 __orig_class__ = 'fsspec.implementations.arrow.HadoopFileSystem'
542 __orig_version__ = '2024.10.0'
543 protocol = ('hdfs', 'arrow_hdfs')
544 root_marker = '/'
545 sep = '/'
546
547 @classmethod
548 def _strip_protocol(cls, path):
549 ops = infer_storage_options(path)
550 path = ops["path"]
551 if path.startswith("//"):
552 # special case for "hdfs://path" (without the triple slash)
553 path = path[1:]
554 return path
555
556 @staticmethod
557 def _get_kwargs_from_urls(path):
558 ops = infer_storage_options(path)
559 out = {}
560 if ops.get("host", None):
561 out["host"] = ops["host"]
562 if ops.get("username", None):
563 out["user"] = ops["username"]
564 if ops.get("port", None):
565 out["port"] = ops["port"]
566 if ops.get("url_query", None):
567 queries = parse_qs(ops["url_query"])
568 if queries.get("replication", None):
569 out["replication"] = int(queries["replication"][0])
570 return out
571
572
573class HfFileSystemFlavour(AbstractFileSystemFlavour):
574 __orig_class__ = 'huggingface_hub.hf_file_system.HfFileSystem'
575 __orig_version__ = '0.26.5'
576 protocol = ('hf',)
577 root_marker = ''
578 sep = '/'
579
580
581class JupyterFileSystemFlavour(AbstractFileSystemFlavour):
582 __orig_class__ = 'fsspec.implementations.jupyter.JupyterFileSystem'
583 __orig_version__ = '2024.10.0'
584 protocol = ('jupyter', 'jlab')
585 root_marker = ''
586 sep = '/'
587
588
589class LakeFSFileSystemFlavour(AbstractFileSystemFlavour):
590 __orig_class__ = 'lakefs_spec.spec.LakeFSFileSystem'
591 __orig_version__ = '0.11.0'
592 protocol = ('lakefs',)
593 root_marker = ''
594 sep = '/'
595
596 @classmethod
597 def _strip_protocol(cls, path):
598 """Copied verbatim from the base class, save for the slash rstrip."""
599 if isinstance(path, list):
600 return [cls._strip_protocol(p) for p in path]
601 spath = super()._strip_protocol(path)
602 if stringify_path(path).endswith("/"):
603 return spath + "/"
604 return spath
605
606
607class LibArchiveFileSystemFlavour(AbstractFileSystemFlavour):
608 __orig_class__ = 'fsspec.implementations.libarchive.LibArchiveFileSystem'
609 __orig_version__ = '2024.10.0'
610 protocol = ('libarchive',)
611 root_marker = ''
612 sep = '/'
613
614 @classmethod
615 def _strip_protocol(cls, path):
616 # file paths are always relative to the archive root
617 return super()._strip_protocol(path).lstrip("/")
618
619
620class LocalFileSystemFlavour(AbstractFileSystemFlavour):
621 __orig_class__ = 'fsspec.implementations.local.LocalFileSystem'
622 __orig_version__ = '2024.10.0'
623 protocol = ('file', 'local')
624 root_marker = '/'
625 sep = '/'
626 local_file = True
627
628 @classmethod
629 def _strip_protocol(cls, path):
630 path = stringify_path(path)
631 if path.startswith("file://"):
632 path = path[7:]
633 elif path.startswith("file:"):
634 path = path[5:]
635 elif path.startswith("local://"):
636 path = path[8:]
637 elif path.startswith("local:"):
638 path = path[6:]
639
640 path = make_path_posix(path)
641 if os.sep != "/":
642 # This code-path is a stripped down version of
643 # > drive, path = ntpath.splitdrive(path)
644 if path[1:2] == ":":
645 # Absolute drive-letter path, e.g. X:\Windows
646 # Relative path with drive, e.g. X:Windows
647 drive, path = path[:2], path[2:]
648 elif path[:2] == "//":
649 # UNC drives, e.g. \\server\share or \\?\UNC\server\share
650 # Device drives, e.g. \\.\device or \\?\device
651 if (index1 := path.find("/", 2)) == -1 or (
652 index2 := path.find("/", index1 + 1)
653 ) == -1:
654 drive, path = path, ""
655 else:
656 drive, path = path[:index2], path[index2:]
657 else:
658 # Relative path, e.g. Windows
659 drive = ""
660
661 path = path.rstrip("/") or cls.root_marker
662 return drive + path
663
664 else:
665 return path.rstrip("/") or cls.root_marker
666
667 @classmethod
668 def _parent(cls, path):
669 path = cls._strip_protocol(path)
670 if os.sep == "/":
671 # posix native
672 return path.rsplit("/", 1)[0] or "/"
673 else:
674 # NT
675 path_ = path.rsplit("/", 1)[0]
676 if len(path_) <= 3:
677 if path_[1:2] == ":":
678 # nt root (something like c:/)
679 return path_[0] + ":/"
680 # More cases may be required here
681 return path_
682
683
684class MemFSFlavour(AbstractFileSystemFlavour):
685 __orig_class__ = 'morefs.memory.MemFS'
686 __orig_version__ = '0.2.2'
687 protocol = ('memfs',)
688 root_marker = ''
689 sep = '/'
690
691 @classmethod
692 def _strip_protocol(cls, path):
693 if path.startswith("memfs://"):
694 path = path[len("memfs://") :]
695 return MemoryFileSystemFlavour._strip_protocol(path) # pylint: disable=protected-access
696
697
698class MemoryFileSystemFlavour(AbstractFileSystemFlavour):
699 __orig_class__ = 'fsspec.implementations.memory.MemoryFileSystem'
700 __orig_version__ = '2024.10.0'
701 protocol = ('memory',)
702 root_marker = '/'
703 sep = '/'
704
705 @classmethod
706 def _strip_protocol(cls, path):
707 if isinstance(path, PurePath):
708 if isinstance(path, PureWindowsPath):
709 return LocalFileSystemFlavour._strip_protocol(path)
710 else:
711 path = stringify_path(path)
712
713 if path.startswith("memory://"):
714 path = path[len("memory://") :]
715 if "::" in path or "://" in path:
716 return path.rstrip("/")
717 path = path.lstrip("/").rstrip("/")
718 return "/" + path if path else ""
719
720
721class OCIFileSystemFlavour(AbstractFileSystemFlavour):
722 __orig_class__ = 'ocifs.core.OCIFileSystem'
723 __orig_version__ = '1.3.1'
724 protocol = ('oci', 'ocilake')
725 root_marker = ''
726 sep = '/'
727
728 @classmethod
729 def _strip_protocol(cls, path):
730 if isinstance(path, list):
731 return [cls._strip_protocol(p) for p in path]
732 path = stringify_path(path)
733 stripped_path = super()._strip_protocol(path)
734 if stripped_path == cls.root_marker and "@" in path:
735 return "@" + path.rstrip("/").split("@", 1)[1]
736 return stripped_path
737
738 @classmethod
739 def _parent(cls, path):
740 path = cls._strip_protocol(path.rstrip("/"))
741 if "/" in path:
742 return cls.root_marker + path.rsplit("/", 1)[0]
743 elif "@" in path:
744 return cls.root_marker + "@" + path.split("@", 1)[1]
745 else:
746 raise ValueError(f"the following path does not specify a namespace: {path}")
747
748
749class OSSFileSystemFlavour(AbstractFileSystemFlavour):
750 __orig_class__ = 'ossfs.core.OSSFileSystem'
751 __orig_version__ = '2023.12.0'
752 protocol = ('oss',)
753 root_marker = ''
754 sep = '/'
755
756 @classmethod
757 def _strip_protocol(cls, path):
758 """Turn path from fully-qualified to file-system-specifi
759 Parameters
760 ----------
761 path : Union[str, List[str]]
762 Input path, like
763 `http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject`
764 `oss://mybucket/myobject`
765 Examples
766 --------
767 >>> _strip_protocol(
768 "http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject"
769 )
770 ('/mybucket/myobject')
771 >>> _strip_protocol(
772 "oss://mybucket/myobject"
773 )
774 ('/mybucket/myobject')
775 """
776 if isinstance(path, list):
777 return [cls._strip_protocol(p) for p in path]
778 path_string = stringify_path(path)
779 if path_string.startswith("oss://"):
780 path_string = path_string[5:]
781
782 parser_re = r"https?://(?P<endpoint>oss.+aliyuncs\.com)(?P<path>/.+)"
783 matcher = re.compile(parser_re).match(path_string)
784 if matcher:
785 path_string = matcher["path"]
786 return path_string or cls.root_marker
787
788
789class OverlayFileSystemFlavour(AbstractFileSystemFlavour):
790 __orig_class__ = 'morefs.overlay.OverlayFileSystem'
791 __orig_version__ = '0.2.2'
792 protocol = ('overlayfs',)
793 root_marker = ''
794 sep = '/'
795
796
797class ReferenceFileSystemFlavour(AbstractFileSystemFlavour):
798 __orig_class__ = 'fsspec.implementations.reference.ReferenceFileSystem'
799 __orig_version__ = '2024.10.0'
800 protocol = ('reference',)
801 root_marker = ''
802 sep = '/'
803
804
805class S3FileSystemFlavour(AbstractFileSystemFlavour):
806 __orig_class__ = 's3fs.core.S3FileSystem'
807 __orig_version__ = '2024.10.0'
808 protocol = ('s3', 's3a')
809 root_marker = ''
810 sep = '/'
811
812 @staticmethod
813 def _get_kwargs_from_urls(urlpath):
814 """
815 When we have a urlpath that contains a ?versionId=
816
817 Assume that we want to use version_aware mode for
818 the filesystem.
819 """
820 url_storage_opts = infer_storage_options(urlpath)
821 url_query = url_storage_opts.get("url_query")
822 out = {}
823 if url_query is not None:
824 from urllib.parse import parse_qs
825
826 parsed = parse_qs(url_query)
827 if "versionId" in parsed:
828 out["version_aware"] = True
829 return out
830
831
832class SFTPFileSystemFlavour(AbstractFileSystemFlavour):
833 __orig_class__ = 'fsspec.implementations.sftp.SFTPFileSystem'
834 __orig_version__ = '2024.10.0'
835 protocol = ('sftp', 'ssh')
836 root_marker = ''
837 sep = '/'
838
839 @classmethod
840 def _strip_protocol(cls, path):
841 return infer_storage_options(path)["path"]
842
843 @staticmethod
844 def _get_kwargs_from_urls(urlpath):
845 out = infer_storage_options(urlpath)
846 out.pop("path", None)
847 out.pop("protocol", None)
848 return out
849
850
851class SMBFileSystemFlavour(AbstractFileSystemFlavour):
852 __orig_class__ = 'fsspec.implementations.smb.SMBFileSystem'
853 __orig_version__ = '2024.10.0'
854 protocol = ('smb',)
855 root_marker = ''
856 sep = '/'
857
858 @classmethod
859 def _strip_protocol(cls, path):
860 return infer_storage_options(path)["path"]
861
862 @staticmethod
863 def _get_kwargs_from_urls(path):
864 # smb://workgroup;user:password@host:port/share/folder/file.csv
865 out = infer_storage_options(path)
866 out.pop("path", None)
867 out.pop("protocol", None)
868 return out
869
870
871class TarFileSystemFlavour(AbstractFileSystemFlavour):
872 __orig_class__ = 'fsspec.implementations.tar.TarFileSystem'
873 __orig_version__ = '2024.10.0'
874 protocol = ('tar',)
875 root_marker = ''
876 sep = '/'
877
878
879class WandbFSFlavour(AbstractFileSystemFlavour):
880 __orig_class__ = 'wandbfs._wandbfs.WandbFS'
881 __orig_version__ = '0.0.2'
882 protocol = ('wandb',)
883 root_marker = ''
884 sep = '/'
885
886
887class WebHDFSFlavour(AbstractFileSystemFlavour):
888 __orig_class__ = 'fsspec.implementations.webhdfs.WebHDFS'
889 __orig_version__ = '2024.10.0'
890 protocol = ('webhdfs', 'webHDFS')
891 root_marker = ''
892 sep = '/'
893
894 @classmethod
895 def _strip_protocol(cls, path):
896 return infer_storage_options(path)["path"]
897
898 @staticmethod
899 def _get_kwargs_from_urls(urlpath):
900 out = infer_storage_options(urlpath)
901 out.pop("path", None)
902 out.pop("protocol", None)
903 if "username" in out:
904 out["user"] = out.pop("username")
905 return out
906
907
908class WebdavFileSystemFlavour(AbstractFileSystemFlavour):
909 __orig_class__ = 'webdav4.fsspec.WebdavFileSystem'
910 __orig_version__ = '0.10.0'
911 protocol = ('webdav', 'dav')
912 root_marker = ''
913 sep = '/'
914
915 @classmethod
916 def _strip_protocol(cls, path: str) -> str:
917 """Strips protocol from the given path, overriding for type-casting."""
918 stripped = super()._strip_protocol(path)
919 return cast(str, stripped)
920
921
922class XRootDFileSystemFlavour(AbstractFileSystemFlavour):
923 __orig_class__ = 'fsspec_xrootd.xrootd.XRootDFileSystem'
924 __orig_version__ = '0.4.0'
925 protocol = ('root',)
926 root_marker = '/'
927 sep = '/'
928
929 @classmethod
930 def _strip_protocol(cls, path: str | list[str]) -> Any:
931 if isinstance(path, str):
932 if path.startswith(cls.protocol):
933 x = urlsplit(path); return (x.path + f'?{x.query}' if x.query else '').rstrip("/") or cls.root_marker
934 # assume already stripped
935 return path.rstrip("/") or cls.root_marker
936 elif isinstance(path, list):
937 return [cls._strip_protocol(item) for item in path]
938 else:
939 raise ValueError("Strip protocol not given string or list")
940
941 @staticmethod
942 def _get_kwargs_from_urls(u: str) -> dict[Any, Any]:
943 url = urlsplit(u)
944 # The hostid encapsulates user,pass,host,port in one string
945 return {"hostid": url.netloc}
946
947
948class ZipFileSystemFlavour(AbstractFileSystemFlavour):
949 __orig_class__ = 'fsspec.implementations.zip.ZipFileSystem'
950 __orig_version__ = '2024.10.0'
951 protocol = ('zip',)
952 root_marker = ''
953 sep = '/'
954
955 @classmethod
956 def _strip_protocol(cls, path):
957 # zip file paths are always relative to the archive root
958 return super()._strip_protocol(path).lstrip("/")
959
960
961class _DVCFileSystemFlavour(AbstractFileSystemFlavour):
962 __orig_class__ = 'dvc.fs.dvc._DVCFileSystem'
963 __orig_version__ = '3.58.0'
964 protocol = ('dvc',)
965 root_marker = '/'
966 sep = '/'