1"""
2Vendoring of pickleshare, reduced to used functionalities.
3
4---
5
6PickleShare - a small 'shelve' like datastore with concurrency support
7
8Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
9shelve, many processes can access the database simultaneously. Changing a
10value in database is immediately visible to other processes accessing the
11same database.
12
13Concurrency is possible because the values are stored in separate files. Hence
14the "database" is a directory where *all* files are governed by PickleShare.
15
16Example usage::
17
18 from pickleshare import *
19 db = PickleShareDB('~/testpickleshare')
20 db.clear()
21 print "Should be empty:",db.items()
22 db['hello'] = 15
23 db['aku ankka'] = [1,2,313]
24 db['paths/are/ok/key'] = [1,(5,46)]
25 print db.keys()
26 del db['aku ankka']
27
28This module is certainly not ZODB, but can be used for low-load
29(non-mission-critical) situations where tiny code size trumps the
30advanced features of a "real" object database.
31
32Installation guide: pip install pickleshare
33
34Author: Ville Vainio <vivainio@gmail.com>
35License: MIT open source license.
36
37"""
38
39__version__ = "0.7.5"
40
41from pathlib import Path
42
43
44import os, stat, time
45
46try:
47 import collections.abc as collections_abc
48except ImportError:
49 import collections as collections_abc
50try:
51 import cPickle as pickle
52except ImportError:
53 import pickle
54import errno
55import sys
56
57
58def gethashfile(key):
59 return ("%02x" % abs(hash(key) % 256))[-2:]
60
61
62_sentinel = object()
63
64
65class PickleShareDB(collections_abc.MutableMapping):
66 """The main 'connection' object for PickleShare database"""
67
68 def __init__(self, root):
69 """Return a db object that will manage the specied directory"""
70 if not isinstance(root, str):
71 root = str(root)
72 root = os.path.abspath(os.path.expanduser(root))
73 self.root = Path(root)
74 if not self.root.is_dir():
75 # catching the exception is necessary if multiple processes are concurrently trying to create a folder
76 # exists_ok keyword argument of mkdir does the same but only from Python 3.5
77 try:
78 self.root.mkdir(parents=True)
79 except OSError as e:
80 if e.errno != errno.EEXIST:
81 raise
82 # cache has { 'key' : (obj, orig_mod_time) }
83 self.cache = {}
84
85 def __getitem__(self, key):
86 """db['key'] reading"""
87 fil = self.root / key
88 try:
89 mtime = fil.stat()[stat.ST_MTIME]
90 except OSError:
91 raise KeyError(key)
92
93 if fil in self.cache and mtime == self.cache[fil][1]:
94 return self.cache[fil][0]
95 try:
96 # The cached item has expired, need to read
97 with fil.open("rb") as f:
98 obj = pickle.loads(f.read())
99 except:
100 raise KeyError(key)
101
102 self.cache[fil] = (obj, mtime)
103 return obj
104
105 def __setitem__(self, key, value):
106 """db['key'] = 5"""
107 fil = self.root / key
108 parent = fil.parent
109 if parent and not parent.is_dir():
110 parent.mkdir(parents=True)
111 # We specify protocol 2, so that we can mostly go between Python 2
112 # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.
113 with fil.open("wb") as f:
114 pickle.dump(value, f, protocol=2)
115 try:
116 self.cache[fil] = (value, fil.stat().st_mtime)
117 except OSError as e:
118 if e.errno != errno.ENOENT:
119 raise
120
121 def hset(self, hashroot, key, value):
122 """hashed set"""
123 hroot = self.root / hashroot
124 if not hroot.is_dir():
125 hroot.mkdir()
126 hfile = hroot / gethashfile(key)
127 d = self.get(hfile, {})
128 d.update({key: value})
129 self[hfile] = d
130
131 def hget(self, hashroot, key, default=_sentinel, fast_only=True):
132 """hashed get"""
133 hroot = self.root / hashroot
134 hfile = hroot / gethashfile(key)
135
136 d = self.get(hfile, _sentinel)
137 # print "got dict",d,"from",hfile
138 if d is _sentinel:
139 if fast_only:
140 if default is _sentinel:
141 raise KeyError(key)
142
143 return default
144
145 # slow mode ok, works even after hcompress()
146 d = self.hdict(hashroot)
147
148 return d.get(key, default)
149
150 def hdict(self, hashroot):
151 """Get all data contained in hashed category 'hashroot' as dict"""
152 hfiles = self.keys(hashroot + "/*")
153 hfiles.sort()
154 last = len(hfiles) and hfiles[-1] or ""
155 if last.endswith("xx"):
156 # print "using xx"
157 hfiles = [last] + hfiles[:-1]
158
159 all = {}
160
161 for f in hfiles:
162 # print "using",f
163 try:
164 all.update(self[f])
165 except KeyError:
166 print("Corrupt", f, "deleted - hset is not threadsafe!")
167 del self[f]
168
169 self.uncache(f)
170
171 return all
172
173 def hcompress(self, hashroot):
174 """Compress category 'hashroot', so hset is fast again
175
176 hget will fail if fast_only is True for compressed items (that were
177 hset before hcompress).
178
179 """
180 hfiles = self.keys(hashroot + "/*")
181 all = {}
182 for f in hfiles:
183 # print "using",f
184 all.update(self[f])
185 self.uncache(f)
186
187 self[hashroot + "/xx"] = all
188 for f in hfiles:
189 p = self.root / f
190 if p.name == "xx":
191 continue
192 p.unlink()
193
194 def __delitem__(self, key):
195 """del db["key"]"""
196 fil = self.root / key
197 self.cache.pop(fil, None)
198 try:
199 fil.unlink()
200 except OSError:
201 # notfound and permission denied are ok - we
202 # lost, the other process wins the conflict
203 pass
204
205 def _normalized(self, p):
206 """Make a key suitable for user's eyes"""
207 return str(p.relative_to(self.root)).replace("\\", "/")
208
209 def keys(self, globpat=None):
210 """All keys in DB, or all keys matching a glob"""
211
212 if globpat is None:
213 files = self.root.rglob("*")
214 else:
215 files = self.root.glob(globpat)
216 return [self._normalized(p) for p in files if p.is_file()]
217
218 def __iter__(self):
219 return iter(self.keys())
220
221 def __len__(self):
222 return len(self.keys())
223
224 def uncache(self, *items):
225 """Removes all, or specified items from cache
226
227 Use this after reading a large amount of large objects
228 to free up memory, when you won't be needing the objects
229 for a while.
230
231 """
232 if not items:
233 self.cache = {}
234 for it in items:
235 self.cache.pop(it, None)
236
237 def waitget(self, key, maxwaittime=60):
238 """Wait (poll) for a key to get a value
239
240 Will wait for `maxwaittime` seconds before raising a KeyError.
241 The call exits normally if the `key` field in db gets a value
242 within the timeout period.
243
244 Use this for synchronizing different processes or for ensuring
245 that an unfortunately timed "db['key'] = newvalue" operation
246 in another process (which causes all 'get' operation to cause a
247 KeyError for the duration of pickling) won't screw up your program
248 logic.
249 """
250
251 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
252 tries = 0
253 waited = 0
254 while 1:
255 try:
256 val = self[key]
257 return val
258 except KeyError:
259 pass
260
261 if waited > maxwaittime:
262 raise KeyError(key)
263
264 time.sleep(wtimes[tries])
265 waited += wtimes[tries]
266 if tries < len(wtimes) - 1:
267 tries += 1
268
269 def getlink(self, folder):
270 """Get a convenient link for accessing items"""
271 return PickleShareLink(self, folder)
272
273 def __repr__(self):
274 return "PickleShareDB('%s')" % self.root
275
276
277class PickleShareLink:
278 """A shortdand for accessing nested PickleShare data conveniently.
279
280 Created through PickleShareDB.getlink(), example::
281
282 lnk = db.getlink('myobjects/test')
283 lnk.foo = 2
284 lnk.bar = lnk.foo + 5
285
286 """
287
288 def __init__(self, db, keydir):
289 self.__dict__.update(locals())
290
291 def __getattr__(self, key):
292 return self.__dict__["db"][self.__dict__["keydir"] + "/" + key]
293
294 def __setattr__(self, key, val):
295 self.db[self.keydir + "/" + key] = val
296
297 def __repr__(self):
298 db = self.__dict__["db"]
299 keys = db.keys(self.__dict__["keydir"] + "/*")
300 return "<PickleShareLink '%s': %s>" % (
301 self.__dict__["keydir"],
302 ";".join([Path(k).basename() for k in keys]),
303 )