Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/fsspec/mapping.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

106 statements  

1import array 

2import logging 

3import posixpath 

4import warnings 

5from collections.abc import MutableMapping 

6from functools import cached_property 

7 

8from fsspec.core import url_to_fs 

9 

10logger = logging.getLogger("fsspec.mapping") 

11 

12 

13class FSMap(MutableMapping): 

14 """Wrap a FileSystem instance as a mutable wrapping. 

15 

16 The keys of the mapping become files under the given root, and the 

17 values (which must be bytes) the contents of those files. 

18 

19 Parameters 

20 ---------- 

21 root: string 

22 prefix for all the files 

23 fs: FileSystem instance 

24 check: bool (=True) 

25 performs a touch at the location, to check for write access. 

26 

27 Examples 

28 -------- 

29 >>> fs = FileSystem(**parameters) # doctest: +SKIP 

30 >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP 

31 or, more likely 

32 >>> d = fs.get_mapper('my-data/path/') 

33 

34 >>> d['loc1'] = b'Hello World' # doctest: +SKIP 

35 >>> list(d.keys()) # doctest: +SKIP 

36 ['loc1'] 

37 >>> d['loc1'] # doctest: +SKIP 

38 b'Hello World' 

39 """ 

40 

41 def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): 

42 self.fs = fs 

43 self.root = fs._strip_protocol(root) 

44 self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] 

45 if missing_exceptions is None: 

46 missing_exceptions = ( 

47 FileNotFoundError, 

48 IsADirectoryError, 

49 NotADirectoryError, 

50 ) 

51 self.missing_exceptions = missing_exceptions 

52 self.check = check 

53 self.create = create 

54 if create: 

55 if not self.fs.exists(root): 

56 self.fs.mkdir(root) 

57 if check: 

58 if not self.fs.exists(root): 

59 raise ValueError( 

60 f"Path {root} does not exist. Create " 

61 f" with the ``create=True`` keyword" 

62 ) 

63 self.fs.touch(root + "/a") 

64 self.fs.rm(root + "/a") 

65 

66 @cached_property 

67 def dirfs(self): 

68 """dirfs instance that can be used with the same keys as the mapper""" 

69 from .implementations.dirfs import DirFileSystem 

70 

71 return DirFileSystem(path=self._root_key_to_str, fs=self.fs) 

72 

73 def clear(self): 

74 """Remove all keys below root - empties out mapping""" 

75 logger.info("Clear mapping at %s", self.root) 

76 try: 

77 self.fs.rm(self.root, True) 

78 self.fs.mkdir(self.root) 

79 except: # noqa: E722 

80 pass 

81 

82 def getitems(self, keys, on_error="raise"): 

83 """Fetch multiple items from the store 

84 

85 If the backend is async-able, this might proceed concurrently 

86 

87 Parameters 

88 ---------- 

89 keys: list(str) 

90 They keys to be fetched 

91 on_error : "raise", "omit", "return" 

92 If raise, an underlying exception will be raised (converted to KeyError 

93 if the type is in self.missing_exceptions); if omit, keys with exception 

94 will simply not be included in the output; if "return", all keys are 

95 included in the output, but the value will be bytes or an exception 

96 instance. 

97 

98 Returns 

99 ------- 

100 dict(key, bytes|exception) 

101 """ 

102 keys2 = [self._key_to_str(k) for k in keys] 

103 oe = on_error if on_error == "raise" else "return" 

104 try: 

105 out = self.fs.cat(keys2, on_error=oe) 

106 if isinstance(out, bytes): 

107 out = {keys2[0]: out} 

108 except self.missing_exceptions as e: 

109 raise KeyError from e 

110 out = { 

111 k: (KeyError() if isinstance(v, self.missing_exceptions) else v) 

112 for k, v in out.items() 

113 } 

114 return { 

115 key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2)) 

116 for key, k2 in zip(keys, keys2) 

117 if on_error == "return" or not isinstance(out[k2], BaseException) 

118 } 

119 

120 def setitems(self, values_dict): 

121 """Set the values of multiple items in the store 

122 

123 Parameters 

124 ---------- 

125 values_dict: dict(str, bytes) 

126 """ 

127 values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} 

128 self.fs.pipe(values) 

129 

130 def delitems(self, keys): 

131 """Remove multiple keys from the store""" 

132 self.fs.rm([self._key_to_str(k) for k in keys]) 

133 

134 def _key_to_str(self, key): 

135 """Generate full path for the key""" 

136 if not isinstance(key, str): 

137 # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" 

138 warnings.warn( 

139 "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError", 

140 DeprecationWarning, 

141 ) 

142 if isinstance(key, list): 

143 key = tuple(key) 

144 key = str(key) 

145 return f"{self._root_key_to_str}{key}".rstrip("/") 

146 

147 def _str_to_key(self, s): 

148 """Strip path of to leave key name""" 

149 return s[len(self.root) :].lstrip("/") 

150 

151 def __getitem__(self, key, default=None): 

152 """Retrieve data""" 

153 k = self._key_to_str(key) 

154 try: 

155 result = self.fs.cat(k) 

156 except self.missing_exceptions as exc: 

157 if default is not None: 

158 return default 

159 raise KeyError(key) from exc 

160 return result 

161 

162 def pop(self, key, default=None): 

163 """Pop data""" 

164 result = self.__getitem__(key, default) 

165 try: 

166 del self[key] 

167 except KeyError: 

168 pass 

169 return result 

170 

171 def __setitem__(self, key, value): 

172 """Store value in key""" 

173 key = self._key_to_str(key) 

174 self.fs.mkdirs(self.fs._parent(key), exist_ok=True) 

175 self.fs.pipe_file(key, maybe_convert(value)) 

176 

177 def __iter__(self): 

178 return (self._str_to_key(x) for x in self.fs.find(self.root)) 

179 

180 def __len__(self): 

181 return len(self.fs.find(self.root)) 

182 

183 def __delitem__(self, key): 

184 """Remove key""" 

185 try: 

186 self.fs.rm(self._key_to_str(key)) 

187 except Exception as exc: 

188 raise KeyError from exc 

189 

190 def __contains__(self, key): 

191 """Does key exist in mapping?""" 

192 path = self._key_to_str(key) 

193 return self.fs.isfile(path) 

194 

195 def __reduce__(self): 

196 return FSMap, (self.root, self.fs, False, False, self.missing_exceptions) 

197 

198 

199def maybe_convert(value): 

200 if isinstance(value, array.array) or hasattr(value, "__array__"): 

201 # bytes-like things 

202 if hasattr(value, "dtype") and value.dtype.kind in "Mm": 

203 # The buffer interface doesn't support datetime64/timdelta64 numpy 

204 # arrays 

205 value = value.view("int64") 

206 value = bytes(memoryview(value)) 

207 return value 

208 

209 

210def get_mapper( 

211 url="", 

212 check=False, 

213 create=False, 

214 missing_exceptions=None, 

215 alternate_root=None, 

216 **kwargs, 

217): 

218 """Create key-value interface for given URL and options 

219 

220 The URL will be of the form "protocol://location" and point to the root 

221 of the mapper required. All keys will be file-names below this location, 

222 and their values the contents of each key. 

223 

224 Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``. 

225 

226 Parameters 

227 ---------- 

228 url: str 

229 Root URL of mapping 

230 check: bool 

231 Whether to attempt to read from the location before instantiation, to 

232 check that the mapping does exist 

233 create: bool 

234 Whether to make the directory corresponding to the root before 

235 instantiating 

236 missing_exceptions: None or tuple 

237 If given, these exception types will be regarded as missing keys and 

238 return KeyError when trying to read data. By default, you get 

239 (FileNotFoundError, IsADirectoryError, NotADirectoryError) 

240 alternate_root: None or str 

241 In cases of complex URLs, the parser may fail to pick the correct part 

242 for the mapper root, so this arg can override 

243 

244 Returns 

245 ------- 

246 ``FSMap`` instance, the dict-like key-value store. 

247 """ 

248 # Removing protocol here - could defer to each open() on the backend 

249 fs, urlpath = url_to_fs(url, **kwargs) 

250 root = alternate_root if alternate_root is not None else urlpath 

251 return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)