Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/fsspec/mapping.py: 25%

103 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:40 +0000

1import array 

2import posixpath 

3import warnings 

4from collections.abc import MutableMapping 

5from functools import cached_property 

6 

7from .core import url_to_fs 

8 

9 

10class FSMap(MutableMapping): 

11 """Wrap a FileSystem instance as a mutable wrapping. 

12 

13 The keys of the mapping become files under the given root, and the 

14 values (which must be bytes) the contents of those files. 

15 

16 Parameters 

17 ---------- 

18 root: string 

19 prefix for all the files 

20 fs: FileSystem instance 

21 check: bool (=True) 

22 performs a touch at the location, to check for write access. 

23 

24 Examples 

25 -------- 

26 >>> fs = FileSystem(**parameters) # doctest: +SKIP 

27 >>> d = FSMap('my-data/path/', fs) # doctest: +SKIP 

28 or, more likely 

29 >>> d = fs.get_mapper('my-data/path/') 

30 

31 >>> d['loc1'] = b'Hello World' # doctest: +SKIP 

32 >>> list(d.keys()) # doctest: +SKIP 

33 ['loc1'] 

34 >>> d['loc1'] # doctest: +SKIP 

35 b'Hello World' 

36 """ 

37 

38 def __init__(self, root, fs, check=False, create=False, missing_exceptions=None): 

39 self.fs = fs 

40 self.root = fs._strip_protocol(root).rstrip("/") 

41 self._root_key_to_str = fs._strip_protocol(posixpath.join(root, "x"))[:-1] 

42 if missing_exceptions is None: 

43 missing_exceptions = ( 

44 FileNotFoundError, 

45 IsADirectoryError, 

46 NotADirectoryError, 

47 ) 

48 self.missing_exceptions = missing_exceptions 

49 self.check = check 

50 self.create = create 

51 if create: 

52 if not self.fs.exists(root): 

53 self.fs.mkdir(root) 

54 if check: 

55 if not self.fs.exists(root): 

56 raise ValueError( 

57 f"Path {root} does not exist. Create " 

58 f" with the ``create=True`` keyword" 

59 ) 

60 self.fs.touch(root + "/a") 

61 self.fs.rm(root + "/a") 

62 

63 @cached_property 

64 def dirfs(self): 

65 """dirfs instance that can be used with the same keys as the mapper""" 

66 from .implementations.dirfs import DirFileSystem 

67 

68 return DirFileSystem(path=self._root_key_to_str, fs=self.fs) 

69 

70 def clear(self): 

71 """Remove all keys below root - empties out mapping""" 

72 try: 

73 self.fs.rm(self.root, True) 

74 self.fs.mkdir(self.root) 

75 except: # noqa: E722 

76 pass 

77 

78 def getitems(self, keys, on_error="raise"): 

79 """Fetch multiple items from the store 

80 

81 If the backend is async-able, this might proceed concurrently 

82 

83 Parameters 

84 ---------- 

85 keys: list(str) 

86 They keys to be fetched 

87 on_error : "raise", "omit", "return" 

88 If raise, an underlying exception will be raised (converted to KeyError 

89 if the type is in self.missing_exceptions); if omit, keys with exception 

90 will simply not be included in the output; if "return", all keys are 

91 included in the output, but the value will be bytes or an exception 

92 instance. 

93 

94 Returns 

95 ------- 

96 dict(key, bytes|exception) 

97 """ 

98 keys2 = [self._key_to_str(k) for k in keys] 

99 oe = on_error if on_error == "raise" else "return" 

100 try: 

101 out = self.fs.cat(keys2, on_error=oe) 

102 if isinstance(out, bytes): 

103 out = {keys2[0]: out} 

104 except self.missing_exceptions as e: 

105 raise KeyError from e 

106 out = { 

107 k: (KeyError() if isinstance(v, self.missing_exceptions) else v) 

108 for k, v in out.items() 

109 } 

110 return { 

111 key: out[k2] 

112 for key, k2 in zip(keys, keys2) 

113 if on_error == "return" or not isinstance(out[k2], BaseException) 

114 } 

115 

116 def setitems(self, values_dict): 

117 """Set the values of multiple items in the store 

118 

119 Parameters 

120 ---------- 

121 values_dict: dict(str, bytes) 

122 """ 

123 values = {self._key_to_str(k): maybe_convert(v) for k, v in values_dict.items()} 

124 self.fs.pipe(values) 

125 

126 def delitems(self, keys): 

127 """Remove multiple keys from the store""" 

128 self.fs.rm([self._key_to_str(k) for k in keys]) 

129 

130 def _key_to_str(self, key): 

131 """Generate full path for the key""" 

132 if not isinstance(key, str): 

133 # raise TypeError("key must be of type `str`, got `{type(key).__name__}`" 

134 warnings.warn( 

135 "from fsspec 2023.5 onward FSMap non-str keys will raise TypeError", 

136 DeprecationWarning, 

137 ) 

138 if isinstance(key, list): 

139 key = tuple(key) 

140 key = str(key) 

141 return f"{self._root_key_to_str}{key}" 

142 

143 def _str_to_key(self, s): 

144 """Strip path of to leave key name""" 

145 return s[len(self.root) :].lstrip("/") 

146 

147 def __getitem__(self, key, default=None): 

148 """Retrieve data""" 

149 k = self._key_to_str(key) 

150 try: 

151 result = self.fs.cat(k) 

152 except self.missing_exceptions: 

153 if default is not None: 

154 return default 

155 raise KeyError(key) 

156 return result 

157 

158 def pop(self, key, default=None): 

159 """Pop data""" 

160 result = self.__getitem__(key, default) 

161 try: 

162 del self[key] 

163 except KeyError: 

164 pass 

165 return result 

166 

167 def __setitem__(self, key, value): 

168 """Store value in key""" 

169 key = self._key_to_str(key) 

170 self.fs.mkdirs(self.fs._parent(key), exist_ok=True) 

171 self.fs.pipe_file(key, maybe_convert(value)) 

172 

173 def __iter__(self): 

174 return (self._str_to_key(x) for x in self.fs.find(self.root)) 

175 

176 def __len__(self): 

177 return len(self.fs.find(self.root)) 

178 

179 def __delitem__(self, key): 

180 """Remove key""" 

181 try: 

182 self.fs.rm(self._key_to_str(key)) 

183 except: # noqa: E722 

184 raise KeyError 

185 

186 def __contains__(self, key): 

187 """Does key exist in mapping?""" 

188 path = self._key_to_str(key) 

189 return self.fs.exists(path) and self.fs.isfile(path) 

190 

191 def __reduce__(self): 

192 return FSMap, (self.root, self.fs, False, False, self.missing_exceptions) 

193 

194 

195def maybe_convert(value): 

196 if isinstance(value, array.array) or hasattr(value, "__array__"): 

197 # bytes-like things 

198 if hasattr(value, "dtype") and value.dtype.kind in "Mm": 

199 # The buffer interface doesn't support datetime64/timdelta64 numpy 

200 # arrays 

201 value = value.view("int64") 

202 value = bytes(memoryview(value)) 

203 return value 

204 

205 

206def get_mapper( 

207 url="", 

208 check=False, 

209 create=False, 

210 missing_exceptions=None, 

211 alternate_root=None, 

212 **kwargs, 

213): 

214 """Create key-value interface for given URL and options 

215 

216 The URL will be of the form "protocol://location" and point to the root 

217 of the mapper required. All keys will be file-names below this location, 

218 and their values the contents of each key. 

219 

220 Also accepts compound URLs like zip::s3://bucket/file.zip , see ``fsspec.open``. 

221 

222 Parameters 

223 ---------- 

224 url: str 

225 Root URL of mapping 

226 check: bool 

227 Whether to attempt to read from the location before instantiation, to 

228 check that the mapping does exist 

229 create: bool 

230 Whether to make the directory corresponding to the root before 

231 instantiating 

232 missing_exceptions: None or tuple 

233 If given, these exception types will be regarded as missing keys and 

234 return KeyError when trying to read data. By default, you get 

235 (FileNotFoundError, IsADirectoryError, NotADirectoryError) 

236 alternate_root: None or str 

237 In cases of complex URLs, the parser may fail to pick the correct part 

238 for the mapper root, so this arg can override 

239 

240 Returns 

241 ------- 

242 ``FSMap`` instance, the dict-like key-value store. 

243 """ 

244 # Removing protocol here - could defer to each open() on the backend 

245 fs, urlpath = url_to_fs(url, **kwargs) 

246 root = alternate_root if alternate_root is not None else urlpath 

247 return FSMap(root, fs, check, create, missing_exceptions=missing_exceptions)