Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pickleshare.py: 22%
194 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
1#!/usr/bin/env python
3""" PickleShare - a small 'shelve' like datastore with concurrency support
5Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
6shelve, many processes can access the database simultaneously. Changing a
7value in database is immediately visible to other processes accessing the
8same database.
10Concurrency is possible because the values are stored in separate files. Hence
11the "database" is a directory where *all* files are governed by PickleShare.
13Example usage::
15 from pickleshare import *
16 db = PickleShareDB('~/testpickleshare')
17 db.clear()
18 print "Should be empty:",db.items()
19 db['hello'] = 15
20 db['aku ankka'] = [1,2,313]
21 db['paths/are/ok/key'] = [1,(5,46)]
22 print db.keys()
23 del db['aku ankka']
25This module is certainly not ZODB, but can be used for low-load
26(non-mission-critical) situations where tiny code size trumps the
27advanced features of a "real" object database.
29Installation guide: pip install pickleshare
31Author: Ville Vainio <vivainio@gmail.com>
32License: MIT open source license.
34"""
36from __future__ import print_function
39__version__ = "0.7.5"
41try:
42 from pathlib import Path
43except ImportError:
44 # Python 2 backport
45 from pathlib2 import Path
47import os,stat,time
48try:
49 import collections.abc as collections_abc
50except ImportError:
51 import collections as collections_abc
52try:
53 import cPickle as pickle
54except ImportError:
55 import pickle
56import errno
57import sys
59if sys.version_info[0] >= 3:
60 string_types = (str,)
61else:
62 string_types = (str, unicode)
64def gethashfile(key):
65 return ("%02x" % abs(hash(key) % 256))[-2:]
67_sentinel = object()
69class PickleShareDB(collections_abc.MutableMapping):
70 """ The main 'connection' object for PickleShare database """
71 def __init__(self,root):
72 """ Return a db object that will manage the specied directory"""
73 if not isinstance(root, string_types):
74 root = str(root)
75 root = os.path.abspath(os.path.expanduser(root))
76 self.root = Path(root)
77 if not self.root.is_dir():
78 # catching the exception is necessary if multiple processes are concurrently trying to create a folder
79 # exists_ok keyword argument of mkdir does the same but only from Python 3.5
80 try:
81 self.root.mkdir(parents=True)
82 except OSError as e:
83 if e.errno != errno.EEXIST:
84 raise
85 # cache has { 'key' : (obj, orig_mod_time) }
86 self.cache = {}
89 def __getitem__(self,key):
90 """ db['key'] reading """
91 fil = self.root / key
92 try:
93 mtime = (fil.stat()[stat.ST_MTIME])
94 except OSError:
95 raise KeyError(key)
97 if fil in self.cache and mtime == self.cache[fil][1]:
98 return self.cache[fil][0]
99 try:
100 # The cached item has expired, need to read
101 with fil.open("rb") as f:
102 obj = pickle.loads(f.read())
103 except:
104 raise KeyError(key)
106 self.cache[fil] = (obj,mtime)
107 return obj
109 def __setitem__(self,key,value):
110 """ db['key'] = 5 """
111 fil = self.root / key
112 parent = fil.parent
113 if parent and not parent.is_dir():
114 parent.mkdir(parents=True)
115 # We specify protocol 2, so that we can mostly go between Python 2
116 # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.
117 with fil.open('wb') as f:
118 pickle.dump(value, f, protocol=2)
119 try:
120 self.cache[fil] = (value, fil.stat().st_mtime)
121 except OSError as e:
122 if e.errno != errno.ENOENT:
123 raise
125 def hset(self, hashroot, key, value):
126 """ hashed set """
127 hroot = self.root / hashroot
128 if not hroot.is_dir():
129 hroot.mkdir()
130 hfile = hroot / gethashfile(key)
131 d = self.get(hfile, {})
132 d.update( {key : value})
133 self[hfile] = d
137 def hget(self, hashroot, key, default = _sentinel, fast_only = True):
138 """ hashed get """
139 hroot = self.root / hashroot
140 hfile = hroot / gethashfile(key)
142 d = self.get(hfile, _sentinel )
143 #print "got dict",d,"from",hfile
144 if d is _sentinel:
145 if fast_only:
146 if default is _sentinel:
147 raise KeyError(key)
149 return default
151 # slow mode ok, works even after hcompress()
152 d = self.hdict(hashroot)
154 return d.get(key, default)
156 def hdict(self, hashroot):
157 """ Get all data contained in hashed category 'hashroot' as dict """
158 hfiles = self.keys(hashroot + "/*")
159 hfiles.sort()
160 last = len(hfiles) and hfiles[-1] or ''
161 if last.endswith('xx'):
162 # print "using xx"
163 hfiles = [last] + hfiles[:-1]
165 all = {}
167 for f in hfiles:
168 # print "using",f
169 try:
170 all.update(self[f])
171 except KeyError:
172 print("Corrupt",f,"deleted - hset is not threadsafe!")
173 del self[f]
175 self.uncache(f)
177 return all
179 def hcompress(self, hashroot):
180 """ Compress category 'hashroot', so hset is fast again
182 hget will fail if fast_only is True for compressed items (that were
183 hset before hcompress).
185 """
186 hfiles = self.keys(hashroot + "/*")
187 all = {}
188 for f in hfiles:
189 # print "using",f
190 all.update(self[f])
191 self.uncache(f)
193 self[hashroot + '/xx'] = all
194 for f in hfiles:
195 p = self.root / f
196 if p.name == 'xx':
197 continue
198 p.unlink()
202 def __delitem__(self,key):
203 """ del db["key"] """
204 fil = self.root / key
205 self.cache.pop(fil,None)
206 try:
207 fil.unlink()
208 except OSError:
209 # notfound and permission denied are ok - we
210 # lost, the other process wins the conflict
211 pass
213 def _normalized(self, p):
214 """ Make a key suitable for user's eyes """
215 return str(p.relative_to(self.root)).replace('\\','/')
217 def keys(self, globpat = None):
218 """ All keys in DB, or all keys matching a glob"""
220 if globpat is None:
221 files = self.root.rglob('*')
222 else:
223 files = self.root.glob(globpat)
224 return [self._normalized(p) for p in files if p.is_file()]
226 def __iter__(self):
227 return iter(self.keys())
229 def __len__(self):
230 return len(self.keys())
232 def uncache(self,*items):
233 """ Removes all, or specified items from cache
235 Use this after reading a large amount of large objects
236 to free up memory, when you won't be needing the objects
237 for a while.
239 """
240 if not items:
241 self.cache = {}
242 for it in items:
243 self.cache.pop(it,None)
245 def waitget(self,key, maxwaittime = 60 ):
246 """ Wait (poll) for a key to get a value
248 Will wait for `maxwaittime` seconds before raising a KeyError.
249 The call exits normally if the `key` field in db gets a value
250 within the timeout period.
252 Use this for synchronizing different processes or for ensuring
253 that an unfortunately timed "db['key'] = newvalue" operation
254 in another process (which causes all 'get' operation to cause a
255 KeyError for the duration of pickling) won't screw up your program
256 logic.
257 """
259 wtimes = [0.2] * 3 + [0.5] * 2 + [1]
260 tries = 0
261 waited = 0
262 while 1:
263 try:
264 val = self[key]
265 return val
266 except KeyError:
267 pass
269 if waited > maxwaittime:
270 raise KeyError(key)
272 time.sleep(wtimes[tries])
273 waited+=wtimes[tries]
274 if tries < len(wtimes) -1:
275 tries+=1
277 def getlink(self,folder):
278 """ Get a convenient link for accessing items """
279 return PickleShareLink(self, folder)
281 def __repr__(self):
282 return "PickleShareDB('%s')" % self.root
286class PickleShareLink:
287 """ A shortdand for accessing nested PickleShare data conveniently.
289 Created through PickleShareDB.getlink(), example::
291 lnk = db.getlink('myobjects/test')
292 lnk.foo = 2
293 lnk.bar = lnk.foo + 5
295 """
296 def __init__(self, db, keydir ):
297 self.__dict__.update(locals())
299 def __getattr__(self,key):
300 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
301 def __setattr__(self,key,val):
302 self.db[self.keydir+'/' + key] = val
303 def __repr__(self):
304 db = self.__dict__['db']
305 keys = db.keys( self.__dict__['keydir'] +"/*")
306 return "<PickleShareLink '%s': %s>" % (
307 self.__dict__['keydir'],
308 ";".join([Path(k).basename() for k in keys]))
310def main():
311 import textwrap
312 usage = textwrap.dedent("""\
313 pickleshare - manage PickleShare databases
315 Usage:
317 pickleshare dump /path/to/db > dump.txt
318 pickleshare load /path/to/db < dump.txt
319 pickleshare test /path/to/db
320 """)
321 DB = PickleShareDB
322 import sys
323 if len(sys.argv) < 2:
324 print(usage)
325 return
327 cmd = sys.argv[1]
328 args = sys.argv[2:]
329 if cmd == 'dump':
330 if not args: args= ['.']
331 db = DB(args[0])
332 import pprint
333 pprint.pprint(db.items())
334 elif cmd == 'load':
335 cont = sys.stdin.read()
336 db = DB(args[0])
337 data = eval(cont)
338 db.clear()
339 for k,v in db.items():
340 db[k] = v
341 elif cmd == 'testwait':
342 db = DB(args[0])
343 db.clear()
344 print(db.waitget('250'))
345 elif cmd == 'test':
346 test()
347 stress()
349if __name__== "__main__":
350 main()