1""" upath._flavour_sources
2
3<experimental!>
4
5Warning
6-------
7 Do not modify this file manually!
8 It is generated by `dev/generate_flavours.py`
9
10To be able to parse the different filesystem uri schemes, we need
11the string parsing functionality each of the filesystem implementations.
12In an attempt to support parsing uris without having to import the
13specific filesystems, we extract the necessary subset of the
14AbstractFileSystem classes and generate a new "flavour" class for
15each of the known filesystems. This will allow us to provide a
16`PurePath` equivalent `PureUPath` for each protocol in the future
17without a direct dependency on the underlying filesystem package.
18
19"""
20#
21# skipping protocols:
22# - blockcache
23# - cached
24# - dir
25# - filecache
26# protocol import errors:
27# - gdrive (Please install gdrive_fs for access to Google Drive)
28# - generic (GenericFileSystem: '_strip_protocol' not a classmethod)
29# - pyscript (Install requests (cpython) or run in pyscript)
30# - tos (Install tosfs to access ByteDance volcano engine Tinder Object Storage)
31# - tosfs (Install tosfs to access ByteDance volcano engine Tinder Object Storage)
32#
33from __future__ import annotations
34
35import logging
36import os
37import re
38from pathlib import PurePath
39from pathlib import PureWindowsPath
40from typing import Any
41from typing import Literal
42from typing import cast
43from urllib.parse import parse_qs
44from urllib.parse import urlsplit
45
46from fsspec.implementations.local import make_path_posix
47from fsspec.utils import infer_storage_options
48from fsspec.utils import stringify_path
49
50__all__ = [
51 "AbstractFileSystemFlavour",
52 "FileSystemFlavourBase",
53 "flavour_registry",
54]
55
56logger = logging.getLogger(__name__)
57flavour_registry: dict[str, type[FileSystemFlavourBase]] = {}
58
59
60class FileSystemFlavourBase:
61 """base class for the fsspec flavours"""
62
63 protocol: str | tuple[str, ...]
64 root_marker: Literal["/", ""]
65 sep: Literal["/"]
66
67 @classmethod
68 def _strip_protocol(cls, path):
69 raise NotImplementedError
70
71 @staticmethod
72 def _get_kwargs_from_urls(path):
73 raise NotImplementedError
74
75 @classmethod
76 def _parent(cls, path):
77 raise NotImplementedError
78
79 def __init_subclass__(cls: Any, **kwargs):
80 if isinstance(cls.protocol, str):
81 protocols = (cls.protocol,)
82 else:
83 protocols = tuple(cls.protocol)
84 for protocol in protocols:
85 if protocol in flavour_registry:
86 raise ValueError(f"protocol {protocol!r} already registered")
87 flavour_registry[protocol] = cls
88
89
90class AbstractFileSystemFlavour(FileSystemFlavourBase):
91 __orig_class__ = 'fsspec.spec.AbstractFileSystem'
92 __orig_version__ = '2025.10.0'
93 protocol: str | tuple[str, ...] = 'abstract'
94 root_marker: Literal['', '/'] = ''
95 sep: Literal['/'] = '/'
96
97 @classmethod
98 def _strip_protocol(cls, path):
99 """Turn path from fully-qualified to file-system-specific
100
101 May require FS-specific handling, e.g., for relative paths or links.
102 """
103 if isinstance(path, list):
104 return [cls._strip_protocol(p) for p in path]
105 path = stringify_path(path)
106 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
107 for protocol in protos:
108 if path.startswith(protocol + "://"):
109 path = path[len(protocol) + 3 :]
110 elif path.startswith(protocol + "::"):
111 path = path[len(protocol) + 2 :]
112 path = path.rstrip("/")
113 # use of root_marker to make minimum required path, e.g., "/"
114 return path or cls.root_marker
115
116 @staticmethod
117 def _get_kwargs_from_urls(path):
118 """If kwargs can be encoded in the paths, extract them here
119
120 This should happen before instantiation of the class; incoming paths
121 then should be amended to strip the options in methods.
122
123 Examples may look like an sftp path "sftp://user@host:/my/path", where
124 the user and host should become kwargs and later get stripped.
125 """
126 # by default, nothing happens
127 return {}
128
129 @classmethod
130 def _parent(cls, path):
131 path = cls._strip_protocol(path)
132 if "/" in path:
133 parent = path.rsplit("/", 1)[0].lstrip(cls.root_marker)
134 return cls.root_marker + parent
135 else:
136 return cls.root_marker
137
138
139class AsyncFileSystemWrapperFlavour(AbstractFileSystemFlavour):
140 __orig_class__ = 'fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper'
141 __orig_version__ = '2025.10.0'
142 protocol = ('asyncwrapper', 'async_wrapper')
143 root_marker = ''
144 sep = '/'
145
146
147class AsyncLocalFileSystemFlavour(AbstractFileSystemFlavour):
148 __orig_class__ = 'morefs.asyn_local.AsyncLocalFileSystem'
149 __orig_version__ = '0.2.2'
150 protocol = ()
151 root_marker = '/'
152 sep = '/'
153 local_file = True
154
155 @classmethod
156 def _strip_protocol(cls, path):
157 path = stringify_path(path)
158 if path.startswith("file://"):
159 path = path[7:]
160 elif path.startswith("file:"):
161 path = path[5:]
162 elif path.startswith("local://"):
163 path = path[8:]
164 elif path.startswith("local:"):
165 path = path[6:]
166
167 path = make_path_posix(path)
168 if os.sep != "/":
169 # This code-path is a stripped down version of
170 # > drive, path = ntpath.splitdrive(path)
171 if path[1:2] == ":":
172 # Absolute drive-letter path, e.g. X:\Windows
173 # Relative path with drive, e.g. X:Windows
174 drive, path = path[:2], path[2:]
175 elif path[:2] == "//":
176 # UNC drives, e.g. \\server\share or \\?\UNC\server\share
177 # Device drives, e.g. \\.\device or \\?\device
178 if (index1 := path.find("/", 2)) == -1 or (
179 index2 := path.find("/", index1 + 1)
180 ) == -1:
181 drive, path = path, ""
182 else:
183 drive, path = path[:index2], path[index2:]
184 else:
185 # Relative path, e.g. Windows
186 drive = ""
187
188 path = path.rstrip("/") or cls.root_marker
189 return drive + path
190
191 else:
192 return path.rstrip("/") or cls.root_marker
193
194 @classmethod
195 def _parent(cls, path):
196 path = cls._strip_protocol(path)
197 if os.sep == "/":
198 # posix native
199 return path.rsplit("/", 1)[0] or "/"
200 else:
201 # NT
202 path_ = path.rsplit("/", 1)[0]
203 if len(path_) <= 3:
204 if path_[1:2] == ":":
205 # nt root (something like c:/)
206 return path_[0] + ":/"
207 # More cases may be required here
208 return path_
209
210
211class AzureBlobFileSystemFlavour(AbstractFileSystemFlavour):
212 __orig_class__ = 'adlfs.spec.AzureBlobFileSystem'
213 __orig_version__ = '2025.8.0'
214 protocol = ('abfs', 'az', 'abfss')
215 root_marker = ''
216 sep = '/'
217
218 @classmethod
219 def _strip_protocol(cls, path: str):
220 """
221 Remove the protocol from the input path
222
223 Parameters
224 ----------
225 path: str
226 Path to remove the protocol from
227
228 Returns
229 -------
230 str
231 Returns a path without the protocol
232 """
233 if isinstance(path, list): # type: ignore[unreachable]
234 return [cls._strip_protocol(p) for p in path] # type: ignore[unreachable]
235
236 STORE_SUFFIX = ".dfs.core.windows.net"
237 logger.debug(f"_strip_protocol for {path}")
238 if not path.startswith(("abfs://", "az://", "abfss://")):
239 path = path.lstrip("/")
240 path = "abfs://" + path
241 ops = infer_storage_options(path)
242 if "username" in ops:
243 if ops.get("username", None):
244 ops["path"] = ops["username"] + ops["path"]
245 # we need to make sure that the path retains
246 # the format {host}/{path}
247 # here host is the container_name
248 elif ops.get("host", None):
249 if (
250 ops["host"].count(STORE_SUFFIX) == 0
251 ): # no store-suffix, so this is container-name
252 ops["path"] = ops["host"] + ops["path"]
253 url_query = ops.get("url_query")
254 if url_query is not None:
255 ops["path"] = f"{ops['path']}?{url_query}"
256
257 logger.debug(f"_strip_protocol({path}) = {ops}")
258 stripped_path = ops["path"].lstrip("/")
259 return stripped_path
260
261 @staticmethod
262 def _get_kwargs_from_urls(urlpath):
263 """Get the account_name from the urlpath and pass to storage_options"""
264 ops = infer_storage_options(urlpath)
265 out = {}
266 host = ops.get("host", None)
267 if host:
268 match = re.match(
269 r"(?P<account_name>.+)\.(dfs|blob)\.core\.windows\.net", host
270 )
271 if match:
272 account_name = match.groupdict()["account_name"]
273 out["account_name"] = account_name
274 url_query = ops.get("url_query")
275 if url_query is not None:
276 from urllib.parse import parse_qs
277
278 parsed = parse_qs(url_query)
279 if "versionid" in parsed:
280 out["version_aware"] = True
281 return out
282
283
284class AzureDatalakeFileSystemFlavour(AbstractFileSystemFlavour):
285 __orig_class__ = 'adlfs.gen1.AzureDatalakeFileSystem'
286 __orig_version__ = '2025.8.0'
287 protocol = ('adl',)
288 root_marker = ''
289 sep = '/'
290
291 @classmethod
292 def _strip_protocol(cls, path):
293 ops = infer_storage_options(path)
294 return ops["path"]
295
296 @staticmethod
297 def _get_kwargs_from_urls(paths):
298 """Get the store_name from the urlpath and pass to storage_options"""
299 ops = infer_storage_options(paths)
300 out = {}
301 if ops.get("host", None):
302 out["store_name"] = ops["host"]
303 return out
304
305
306class BoxFileSystemFlavour(AbstractFileSystemFlavour):
307 __orig_class__ = 'boxfs.boxfs.BoxFileSystem'
308 __orig_version__ = '0.3.0'
309 protocol = ('box',)
310 root_marker = '/'
311 sep = '/'
312
313 @classmethod
314 def _strip_protocol(cls, path) -> str:
315 path = super()._strip_protocol(path)
316 path = path.replace("\\", "/")
317 # Make all paths start with root marker
318 if not path.startswith(cls.root_marker):
319 path = cls.root_marker + path
320 return path
321
322
323class DaskWorkerFileSystemFlavour(AbstractFileSystemFlavour):
324 __orig_class__ = 'fsspec.implementations.dask.DaskWorkerFileSystem'
325 __orig_version__ = '2025.10.0'
326 protocol = ('dask',)
327 root_marker = ''
328 sep = '/'
329
330 @staticmethod
331 def _get_kwargs_from_urls(path):
332 so = infer_storage_options(path)
333 if "host" in so and "port" in so:
334 return {"client": f"{so['host']}:{so['port']}"}
335 else:
336 return {}
337
338
339class DataFileSystemFlavour(AbstractFileSystemFlavour):
340 __orig_class__ = 'fsspec.implementations.data.DataFileSystem'
341 __orig_version__ = '2025.10.0'
342 protocol = ('data',)
343 root_marker = ''
344 sep = "" # type: ignore[assignment]
345 altsep = " " # type: ignore[assignment]
346
347
348class DatabricksFileSystemFlavour(AbstractFileSystemFlavour):
349 __orig_class__ = 'fsspec.implementations.dbfs.DatabricksFileSystem'
350 __orig_version__ = '2025.10.0'
351 protocol = ('dbfs',)
352 root_marker = ''
353 sep = '/'
354
355
356class DictFSFlavour(AbstractFileSystemFlavour):
357 __orig_class__ = 'morefs.dict.DictFS'
358 __orig_version__ = '0.2.2'
359 protocol = ('dictfs',)
360 root_marker = ''
361 sep = '/'
362
363 @classmethod
364 def _strip_protocol(cls, path: str) -> str:
365 if path.startswith("dictfs://"):
366 path = path[len("dictfs://") :]
367 if "::" in path or "://" in path:
368 return path.rstrip("/")
369 path = path.lstrip("/").rstrip("/")
370 return "/" + path if path else cls.root_marker
371
372
373class DropboxDriveFileSystemFlavour(AbstractFileSystemFlavour):
374 __orig_class__ = 'dropboxdrivefs.core.DropboxDriveFileSystem'
375 __orig_version__ = '1.4.1'
376 protocol = ('dropbox',)
377 root_marker = ''
378 sep = '/'
379
380
381class FTPFileSystemFlavour(AbstractFileSystemFlavour):
382 __orig_class__ = 'fsspec.implementations.ftp.FTPFileSystem'
383 __orig_version__ = '2025.10.0'
384 protocol = ('ftp',)
385 root_marker = '/'
386 sep = '/'
387
388 @classmethod
389 def _strip_protocol(cls, path):
390 return "/" + infer_storage_options(path)["path"].lstrip("/").rstrip("/")
391
392 @staticmethod
393 def _get_kwargs_from_urls(urlpath):
394 out = infer_storage_options(urlpath)
395 out.pop("path", None)
396 out.pop("protocol", None)
397 return out
398
399
400class GCSFileSystemFlavour(AbstractFileSystemFlavour):
401 __orig_class__ = 'gcsfs.core.GCSFileSystem'
402 __orig_version__ = '2025.10.0'
403 protocol = ('gs', 'gcs')
404 root_marker = ''
405 sep = '/'
406
407 @classmethod
408 def _strip_protocol(cls, path):
409 if isinstance(path, list):
410 return [cls._strip_protocol(p) for p in path]
411 path = stringify_path(path)
412 protos = (cls.protocol,) if isinstance(cls.protocol, str) else cls.protocol
413 for protocol in protos:
414 if path.startswith(protocol + "://"):
415 path = path[len(protocol) + 3 :]
416 elif path.startswith(protocol + "::"):
417 path = path[len(protocol) + 2 :]
418 # use of root_marker to make minimum required path, e.g., "/"
419 return path or cls.root_marker
420
421 @classmethod
422 def _get_kwargs_from_urls(cls, path):
423 _, _, generation = cls._split_path(path, version_aware=True)
424 if generation is not None:
425 return {"version_aware": True}
426 return {}
427
428 @classmethod
429 def _split_path(cls, path, version_aware=False):
430 """
431 Normalise GCS path string into bucket and key.
432
433 Parameters
434 ----------
435 path : string
436 Input path, like `gcs://mybucket/path/to/file`.
437 Path is of the form: '[gs|gcs://]bucket[/key][?querystring][#fragment]'
438
439 GCS allows object generation (object version) to be specified in either
440 the URL fragment or the `generation` query parameter. When provided,
441 the fragment will take priority over the `generation` query paramenter.
442
443 Returns
444 -------
445 (bucket, key, generation) tuple
446 """
447 path = cls._strip_protocol(path).lstrip("/")
448 if "/" not in path:
449 return path, "", None
450 bucket, keypart = path.split("/", 1)
451 key = keypart
452 generation = None
453 if version_aware:
454 parts = urlsplit(keypart)
455 try:
456 if parts.fragment:
457 generation = parts.fragment
458 elif parts.query:
459 parsed = parse_qs(parts.query)
460 if "generation" in parsed:
461 generation = parsed["generation"][0]
462 # Sanity check whether this could be a valid generation ID. If
463 # it is not, assume that # or ? characters are supposed to be
464 # part of the object name.
465 if generation is not None:
466 int(generation)
467 key = parts.path
468 except ValueError:
469 generation = None
470 return (
471 bucket,
472 key,
473 generation,
474 )
475
476
477class GistFileSystemFlavour(AbstractFileSystemFlavour):
478 __orig_class__ = 'fsspec.implementations.gist.GistFileSystem'
479 __orig_version__ = '2025.10.0'
480 protocol = ('gist',)
481 root_marker = ''
482 sep = '/'
483
484 @classmethod
485 def _strip_protocol(cls, path):
486 """
487 Remove 'gist://' from the path, if present.
488 """
489 # The default infer_storage_options can handle gist://username:token@id/file
490 # or gist://id/file, but let's ensure we handle a normal usage too.
491 # We'll just strip the protocol prefix if it exists.
492 path = infer_storage_options(path).get("path", path)
493 return path.lstrip("/")
494
495 @staticmethod
496 def _get_kwargs_from_urls(path):
497 """
498 Parse 'gist://' style URLs into GistFileSystem constructor kwargs.
499 For example:
500 gist://:TOKEN@<gist_id>/file.txt
501 gist://username:TOKEN@<gist_id>/file.txt
502 """
503 so = infer_storage_options(path)
504 out = {}
505 if "username" in so and so["username"]:
506 out["username"] = so["username"]
507 if "password" in so and so["password"]:
508 out["token"] = so["password"]
509 if "host" in so and so["host"]:
510 # We interpret 'host' as the gist ID
511 out["gist_id"] = so["host"]
512
513 # Extract SHA and filename from path
514 if "path" in so and so["path"]:
515 path_parts = so["path"].rsplit("/", 2)[-2:]
516 if len(path_parts) == 2:
517 if path_parts[0]: # SHA present
518 out["sha"] = path_parts[0]
519 if path_parts[1]: # filename also present
520 out["filenames"] = [path_parts[1]]
521
522 return out
523
524
525class GitFileSystemFlavour(AbstractFileSystemFlavour):
526 __orig_class__ = 'fsspec.implementations.git.GitFileSystem'
527 __orig_version__ = '2025.10.0'
528 protocol = ('git',)
529 root_marker = ''
530 sep = '/'
531
532 @classmethod
533 def _strip_protocol(cls, path):
534 path = super()._strip_protocol(path).lstrip("/")
535 if ":" in path:
536 path = path.split(":", 1)[1]
537 if "@" in path:
538 path = path.split("@", 1)[1]
539 return path.lstrip("/")
540
541 @staticmethod
542 def _get_kwargs_from_urls(path):
543 path = path.removeprefix("git://")
544 out = {}
545 if ":" in path:
546 out["path"], path = path.split(":", 1)
547 if "@" in path:
548 out["ref"], path = path.split("@", 1)
549 return out
550
551
552class GithubFileSystemFlavour(AbstractFileSystemFlavour):
553 __orig_class__ = 'fsspec.implementations.github.GithubFileSystem'
554 __orig_version__ = '2025.10.0'
555 protocol = ('github',)
556 root_marker = ''
557 sep = '/'
558
559 @classmethod
560 def _strip_protocol(cls, path):
561 opts = infer_storage_options(path)
562 if "username" not in opts:
563 return super()._strip_protocol(path)
564 return opts["path"].lstrip("/")
565
566 @staticmethod
567 def _get_kwargs_from_urls(path):
568 opts = infer_storage_options(path)
569 if "username" not in opts:
570 return {}
571 out = {"org": opts["username"], "repo": opts["password"]}
572 if opts["host"]:
573 out["sha"] = opts["host"]
574 return out
575
576
577class HTTPFileSystemFlavour(AbstractFileSystemFlavour):
578 __orig_class__ = 'fsspec.implementations.http.HTTPFileSystem'
579 __orig_version__ = '2025.10.0'
580 protocol = ('http', 'https')
581 root_marker = ''
582 sep = '/'
583
584 @classmethod
585 def _strip_protocol(cls, path):
586 """For HTTP, we always want to keep the full URL"""
587 return path
588
589 @classmethod
590 def _parent(cls, path):
591 # override, since _strip_protocol is different for URLs
592 par = super()._parent(path)
593 if len(par) > 7: # "http://..."
594 return par
595 return ""
596
597
598class HadoopFileSystemFlavour(AbstractFileSystemFlavour):
599 __orig_class__ = 'fsspec.implementations.arrow.HadoopFileSystem'
600 __orig_version__ = '2025.10.0'
601 protocol = ('hdfs', 'arrow_hdfs')
602 root_marker = '/'
603 sep = '/'
604
605 @classmethod
606 def _strip_protocol(cls, path):
607 ops = infer_storage_options(path)
608 path = ops["path"]
609 if path.startswith("//"):
610 # special case for "hdfs://path" (without the triple slash)
611 path = path[1:]
612 return path
613
614 @staticmethod
615 def _get_kwargs_from_urls(path):
616 ops = infer_storage_options(path)
617 out = {}
618 if ops.get("host", None):
619 out["host"] = ops["host"]
620 if ops.get("username", None):
621 out["user"] = ops["username"]
622 if ops.get("port", None):
623 out["port"] = ops["port"]
624 if ops.get("url_query", None):
625 queries = parse_qs(ops["url_query"])
626 if queries.get("replication", None):
627 out["replication"] = int(queries["replication"][0])
628 return out
629
630
631class HfFileSystemFlavour(AbstractFileSystemFlavour):
632 __orig_class__ = 'huggingface_hub.hf_file_system.HfFileSystem'
633 __orig_version__ = '1.2.3'
634 protocol = ('hf',)
635 root_marker = ''
636 sep = '/'
637
638
639class JupyterFileSystemFlavour(AbstractFileSystemFlavour):
640 __orig_class__ = 'fsspec.implementations.jupyter.JupyterFileSystem'
641 __orig_version__ = '2025.10.0'
642 protocol = ('jupyter', 'jlab')
643 root_marker = ''
644 sep = '/'
645
646
647class LakeFSFileSystemFlavour(AbstractFileSystemFlavour):
648 __orig_class__ = 'lakefs_spec.spec.LakeFSFileSystem'
649 __orig_version__ = '0.12.0'
650 protocol = ('lakefs',)
651 root_marker = ''
652 sep = '/'
653
654 @classmethod
655 def _strip_protocol(cls, path):
656 """Copied verbatim from the base class, save for the slash rstrip."""
657 if isinstance(path, list):
658 return [cls._strip_protocol(p) for p in path]
659 spath = super()._strip_protocol(path)
660 if stringify_path(path).endswith("/"):
661 return spath + "/"
662 return spath
663
664
665class LibArchiveFileSystemFlavour(AbstractFileSystemFlavour):
666 __orig_class__ = 'fsspec.implementations.libarchive.LibArchiveFileSystem'
667 __orig_version__ = '2025.10.0'
668 protocol = ('libarchive',)
669 root_marker = ''
670 sep = '/'
671
672 @classmethod
673 def _strip_protocol(cls, path):
674 # file paths are always relative to the archive root
675 return super()._strip_protocol(path).lstrip("/")
676
677
678class LocalFileSystemFlavour(AbstractFileSystemFlavour):
679 __orig_class__ = 'fsspec.implementations.local.LocalFileSystem'
680 __orig_version__ = '2025.10.0'
681 protocol = ('file', 'local')
682 root_marker = '/'
683 sep = '/'
684 local_file = True
685
686 @classmethod
687 def _strip_protocol(cls, path):
688 path = stringify_path(path)
689 if path.startswith("file://"):
690 path = path[7:]
691 elif path.startswith("file:"):
692 path = path[5:]
693 elif path.startswith("local://"):
694 path = path[8:]
695 elif path.startswith("local:"):
696 path = path[6:]
697
698 path = make_path_posix(path)
699 if os.sep != "/":
700 # This code-path is a stripped down version of
701 # > drive, path = ntpath.splitdrive(path)
702 if path[1:2] == ":":
703 # Absolute drive-letter path, e.g. X:\Windows
704 # Relative path with drive, e.g. X:Windows
705 drive, path = path[:2], path[2:]
706 elif path[:2] == "//":
707 # UNC drives, e.g. \\server\share or \\?\UNC\server\share
708 # Device drives, e.g. \\.\device or \\?\device
709 if (index1 := path.find("/", 2)) == -1 or (
710 index2 := path.find("/", index1 + 1)
711 ) == -1:
712 drive, path = path, ""
713 else:
714 drive, path = path[:index2], path[index2:]
715 else:
716 # Relative path, e.g. Windows
717 drive = ""
718
719 path = path.rstrip("/") or cls.root_marker
720 return drive + path
721
722 else:
723 return path.rstrip("/") or cls.root_marker
724
725 @classmethod
726 def _parent(cls, path):
727 path = cls._strip_protocol(path)
728 if os.sep == "/":
729 # posix native
730 return path.rsplit("/", 1)[0] or "/"
731 else:
732 # NT
733 path_ = path.rsplit("/", 1)[0]
734 if len(path_) <= 3:
735 if path_[1:2] == ":":
736 # nt root (something like c:/)
737 return path_[0] + ":/"
738 # More cases may be required here
739 return path_
740
741
742class MemFSFlavour(AbstractFileSystemFlavour):
743 __orig_class__ = 'morefs.memory.MemFS'
744 __orig_version__ = '0.2.2'
745 protocol = ('memfs',)
746 root_marker = ''
747 sep = '/'
748
749 @classmethod
750 def _strip_protocol(cls, path):
751 if path.startswith("memfs://"):
752 path = path[len("memfs://") :]
753 return MemoryFileSystemFlavour._strip_protocol(path) # pylint: disable=protected-access
754
755
756class MemoryFileSystemFlavour(AbstractFileSystemFlavour):
757 __orig_class__ = 'fsspec.implementations.memory.MemoryFileSystem'
758 __orig_version__ = '2025.10.0'
759 protocol = ('memory',)
760 root_marker = '/'
761 sep = '/'
762
763 @classmethod
764 def _strip_protocol(cls, path):
765 if isinstance(path, PurePath):
766 if isinstance(path, PureWindowsPath):
767 return LocalFileSystemFlavour._strip_protocol(path)
768 else:
769 path = stringify_path(path)
770
771 path = path.removeprefix("memory://")
772 if "::" in path or "://" in path:
773 return path.rstrip("/")
774 path = path.lstrip("/").rstrip("/")
775 return "/" + path if path else ""
776
777
778class OCIFileSystemFlavour(AbstractFileSystemFlavour):
779 __orig_class__ = 'ocifs.core.OCIFileSystem'
780 __orig_version__ = '1.3.4'
781 protocol = ('oci', 'ocilake')
782 root_marker = ''
783 sep = '/'
784
785 @classmethod
786 def _strip_protocol(cls, path):
787 if isinstance(path, list):
788 return [cls._strip_protocol(p) for p in path]
789 path = stringify_path(path)
790 stripped_path = super()._strip_protocol(path)
791 if stripped_path == cls.root_marker and "@" in path:
792 return "@" + path.rstrip("/").split("@", 1)[1]
793 return stripped_path
794
795 @classmethod
796 def _parent(cls, path):
797 path = cls._strip_protocol(path.rstrip("/"))
798 if "/" in path:
799 return cls.root_marker + path.rsplit("/", 1)[0]
800 elif "@" in path:
801 return cls.root_marker + "@" + path.split("@", 1)[1]
802 else:
803 raise ValueError(f"the following path does not specify a namespace: {path}")
804
805
806class OSSFileSystemFlavour(AbstractFileSystemFlavour):
807 __orig_class__ = 'ossfs.core.OSSFileSystem'
808 __orig_version__ = '2025.5.0'
809 protocol = ('oss',)
810 root_marker = ''
811 sep = '/'
812
813 @classmethod
814 def _strip_protocol(cls, path):
815 """Turn path from fully-qualified to file-system-specifi
816 Parameters
817 ----------
818 path : Union[str, List[str]]
819 Input path, like
820 `http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject`
821 `oss://mybucket/myobject`
822 Examples
823 --------
824 >>> _strip_protocol(
825 "http://oss-cn-hangzhou.aliyuncs.com/mybucket/myobject"
826 )
827 ('/mybucket/myobject')
828 >>> _strip_protocol(
829 "oss://mybucket/myobject"
830 )
831 ('/mybucket/myobject')
832 """
833 if isinstance(path, list):
834 return [cls._strip_protocol(p) for p in path]
835 path_string = stringify_path(path)
836 if path_string.startswith("oss://"):
837 path_string = path_string[5:]
838
839 parser_re = r"https?://(?P<endpoint>oss.+aliyuncs\.com)(?P<path>/.+)"
840 matcher = re.compile(parser_re).match(path_string)
841 if matcher:
842 path_string = matcher["path"]
843 return path_string or cls.root_marker
844
845
846class OverlayFileSystemFlavour(AbstractFileSystemFlavour):
847 __orig_class__ = 'morefs.overlay.OverlayFileSystem'
848 __orig_version__ = '0.2.2'
849 protocol = ('overlayfs',)
850 root_marker = ''
851 sep = '/'
852
853
854class ReferenceFileSystemFlavour(AbstractFileSystemFlavour):
855 __orig_class__ = 'fsspec.implementations.reference.ReferenceFileSystem'
856 __orig_version__ = '2025.10.0'
857 protocol = ('reference',)
858 root_marker = ''
859 sep = '/'
860
861
862class S3FileSystemFlavour(AbstractFileSystemFlavour):
863 __orig_class__ = 's3fs.core.S3FileSystem'
864 __orig_version__ = '2025.10.0'
865 protocol = ('s3', 's3a')
866 root_marker = ''
867 sep = '/'
868
869 @staticmethod
870 def _get_kwargs_from_urls(urlpath):
871 """
872 When we have a urlpath that contains a ?versionId=
873
874 Assume that we want to use version_aware mode for
875 the filesystem.
876 """
877 from urllib.parse import urlsplit
878
879 url_query = urlsplit(urlpath).query
880 out = {}
881 if url_query is not None:
882 from urllib.parse import parse_qs
883
884 parsed = parse_qs(url_query)
885 if "versionId" in parsed:
886 out["version_aware"] = True
887 return out
888
889
890class SFTPFileSystemFlavour(AbstractFileSystemFlavour):
891 __orig_class__ = 'fsspec.implementations.sftp.SFTPFileSystem'
892 __orig_version__ = '2025.10.0'
893 protocol = ('sftp', 'ssh')
894 root_marker = ''
895 sep = '/'
896
897 @classmethod
898 def _strip_protocol(cls, path):
899 return infer_storage_options(path)["path"]
900
901 @staticmethod
902 def _get_kwargs_from_urls(urlpath):
903 out = infer_storage_options(urlpath)
904 out.pop("path", None)
905 out.pop("protocol", None)
906 return out
907
908
909class SMBFileSystemFlavour(AbstractFileSystemFlavour):
910 __orig_class__ = 'fsspec.implementations.smb.SMBFileSystem'
911 __orig_version__ = '2025.10.0'
912 protocol = ('smb',)
913 root_marker = ''
914 sep = '/'
915
916 @classmethod
917 def _strip_protocol(cls, path):
918 return infer_storage_options(path)["path"]
919
920 @staticmethod
921 def _get_kwargs_from_urls(path):
922 # smb://workgroup;user:password@host:port/share/folder/file.csv
923 out = infer_storage_options(path)
924 out.pop("path", None)
925 out.pop("protocol", None)
926 return out
927
928
929class SimpleCacheFileSystemFlavour(AbstractFileSystemFlavour):
930 __orig_class__ = 'fsspec.implementations.cached.SimpleCacheFileSystem'
931 __orig_version__ = '2025.10.0'
932 protocol = ('simplecache',)
933 root_marker = ''
934 sep = '/'
935 local_file = True
936
937
938class TarFileSystemFlavour(AbstractFileSystemFlavour):
939 __orig_class__ = 'fsspec.implementations.tar.TarFileSystem'
940 __orig_version__ = '2025.10.0'
941 protocol = ('tar',)
942 root_marker = ''
943 sep = '/'
944
945
946class WandbFSFlavour(AbstractFileSystemFlavour):
947 __orig_class__ = 'wandbfs._wandbfs.WandbFS'
948 __orig_version__ = '0.0.2'
949 protocol = ('wandb',)
950 root_marker = ''
951 sep = '/'
952
953
954class WebHDFSFlavour(AbstractFileSystemFlavour):
955 __orig_class__ = 'fsspec.implementations.webhdfs.WebHDFS'
956 __orig_version__ = '2025.10.0'
957 protocol = ('webhdfs', 'webHDFS')
958 root_marker = ''
959 sep = '/'
960
961 @classmethod
962 def _strip_protocol(cls, path):
963 return infer_storage_options(path)["path"]
964
965 @staticmethod
966 def _get_kwargs_from_urls(urlpath):
967 out = infer_storage_options(urlpath)
968 out.pop("path", None)
969 out.pop("protocol", None)
970 if "username" in out:
971 out["user"] = out.pop("username")
972 return out
973
974
975class WebdavFileSystemFlavour(AbstractFileSystemFlavour):
976 __orig_class__ = 'webdav4.fsspec.WebdavFileSystem'
977 __orig_version__ = '0.10.0'
978 protocol = ('webdav', 'dav')
979 root_marker = ''
980 sep = '/'
981
982 @classmethod
983 def _strip_protocol(cls, path: str) -> str:
984 """Strips protocol from the given path, overriding for type-casting."""
985 stripped = super()._strip_protocol(path)
986 return cast(str, stripped)
987
988
989class XRootDFileSystemFlavour(AbstractFileSystemFlavour):
990 __orig_class__ = 'fsspec_xrootd.xrootd.XRootDFileSystem'
991 __orig_version__ = '0.5.1'
992 protocol = ('root',)
993 root_marker = '/'
994 sep = '/'
995
996 @classmethod
997 def _strip_protocol(cls, path: str | list[str]) -> Any:
998 if isinstance(path, str):
999 if path.startswith(cls.protocol):
1000 x = urlsplit(path); return (x.path + f'?{x.query}' if x.query else '').rstrip("/") or cls.root_marker
1001 # assume already stripped
1002 return path.rstrip("/") or cls.root_marker
1003 elif isinstance(path, list):
1004 return [cls._strip_protocol(item) for item in path]
1005 else:
1006 raise ValueError("Strip protocol not given string or list")
1007
1008 @staticmethod
1009 def _get_kwargs_from_urls(u: str) -> dict[Any, Any]:
1010 url = urlsplit(u)
1011 # The hostid encapsulates user,pass,host,port in one string
1012 return {"hostid": url.netloc}
1013
1014
1015class ZipFileSystemFlavour(AbstractFileSystemFlavour):
1016 __orig_class__ = 'fsspec.implementations.zip.ZipFileSystem'
1017 __orig_version__ = '2025.10.0'
1018 protocol = ('zip',)
1019 root_marker = ''
1020 sep = '/'
1021
1022 @classmethod
1023 def _strip_protocol(cls, path):
1024 # zip file paths are always relative to the archive root
1025 return super()._strip_protocol(path).lstrip("/")
1026
1027
1028class _DVCFileSystemFlavour(AbstractFileSystemFlavour):
1029 __orig_class__ = 'dvc.fs.dvc._DVCFileSystem'
1030 __orig_version__ = '3.65.0'
1031 protocol = ('dvc',)
1032 root_marker = '/'
1033 sep = '/'