Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pickleshare.py: 22%

1#!/usr/bin/env python

3""" PickleShare - a small 'shelve' like datastore with concurrency support

5Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike

6shelve, many processes can access the database simultaneously. Changing a

7value in database is immediately visible to other processes accessing the

8same database.

10Concurrency is possible because the values are stored in separate files. Hence

11the "database" is a directory where *all* files are governed by PickleShare.

13Example usage::

15 from pickleshare import *

16 db = PickleShareDB('~/testpickleshare')

17 db.clear()

18 print "Should be empty:",db.items()

19 db['hello'] = 15

20 db['aku ankka'] = [1,2,313]

21 db['paths/are/ok/key'] = [1,(5,46)]

22 print db.keys()

23 del db['aku ankka']

25This module is certainly not ZODB, but can be used for low-load

26(non-mission-critical) situations where tiny code size trumps the

27advanced features of a "real" object database.

29Installation guide: pip install pickleshare

31Author: Ville Vainio <vivainio@gmail.com>

32License: MIT open source license.

34"""

36from __future__ import print_function

39__version__ = "0.7.5"

41try:

42 from pathlib import Path

43except ImportError:

44 # Python 2 backport

45 from pathlib2 import Path

47import os,stat,time

48try:

49 import collections.abc as collections_abc

50except ImportError:

51 import collections as collections_abc

52try:

53 import cPickle as pickle

54except ImportError:

55 import pickle

56import errno

57import sys

59if sys.version_info[0] >= 3:

60 string_types = (str,)

61else:

62 string_types = (str, unicode)

64def gethashfile(key):

65 return ("%02x" % abs(hash(key) % 256))[-2:]

67_sentinel = object()

69class PickleShareDB(collections_abc.MutableMapping):

70 """ The main 'connection' object for PickleShare database """

71 def __init__(self,root):

72 """ Return a db object that will manage the specied directory"""

73 if not isinstance(root, string_types):

74 root = str(root)

75 root = os.path.abspath(os.path.expanduser(root))

76 self.root = Path(root)

77 if not self.root.is_dir():

78 # catching the exception is necessary if multiple processes are concurrently trying to create a folder

79 # exists_ok keyword argument of mkdir does the same but only from Python 3.5

80 try:

81 self.root.mkdir(parents=True)

82 except OSError as e:

83 if e.errno != errno.EEXIST:

84 raise

85 # cache has { 'key' : (obj, orig_mod_time) }

86 self.cache = {}

89 def __getitem__(self,key):

90 """ db['key'] reading """

91 fil = self.root / key

92 try:

93 mtime = (fil.stat()[stat.ST_MTIME])

94 except OSError:

95 raise KeyError(key)

97 if fil in self.cache and mtime == self.cache[fil][1]:

98 return self.cache[fil][0]

99 try:

100 # The cached item has expired, need to read

101 with fil.open("rb") as f:

102 obj = pickle.loads(f.read())

103 except:

104 raise KeyError(key)

105

106 self.cache[fil] = (obj,mtime)

107 return obj

108

109 def __setitem__(self,key,value):

110 """ db['key'] = 5 """

111 fil = self.root / key

112 parent = fil.parent

113 if parent and not parent.is_dir():

114 parent.mkdir(parents=True)

115 # We specify protocol 2, so that we can mostly go between Python 2

116 # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.

117 with fil.open('wb') as f:

118 pickle.dump(value, f, protocol=2)

119 try:

120 self.cache[fil] = (value, fil.stat().st_mtime)

121 except OSError as e:

122 if e.errno != errno.ENOENT:

123 raise

124

125 def hset(self, hashroot, key, value):

126 """ hashed set """

127 hroot = self.root / hashroot

128 if not hroot.is_dir():

129 hroot.mkdir()

130 hfile = hroot / gethashfile(key)

131 d = self.get(hfile, {})

132 d.update( {key : value})

133 self[hfile] = d

134

135

136

137 def hget(self, hashroot, key, default = _sentinel, fast_only = True):

138 """ hashed get """

139 hroot = self.root / hashroot

140 hfile = hroot / gethashfile(key)

141

142 d = self.get(hfile, _sentinel )

143 #print "got dict",d,"from",hfile

144 if d is _sentinel:

145 if fast_only:

146 if default is _sentinel:

147 raise KeyError(key)

148

149 return default

150

151 # slow mode ok, works even after hcompress()

152 d = self.hdict(hashroot)

153

154 return d.get(key, default)

155

156 def hdict(self, hashroot):

157 """ Get all data contained in hashed category 'hashroot' as dict """

158 hfiles = self.keys(hashroot + "/*")

159 hfiles.sort()

160 last = len(hfiles) and hfiles[-1] or ''

161 if last.endswith('xx'):

162 # print "using xx"

163 hfiles = [last] + hfiles[:-1]

164

165 all = {}

166

167 for f in hfiles:

168 # print "using",f

169 try:

170 all.update(self[f])

171 except KeyError:

172 print("Corrupt",f,"deleted - hset is not threadsafe!")

173 del self[f]

174

175 self.uncache(f)

176

177 return all

178

179 def hcompress(self, hashroot):

180 """ Compress category 'hashroot', so hset is fast again

181

182 hget will fail if fast_only is True for compressed items (that were

183 hset before hcompress).

184

185 """

186 hfiles = self.keys(hashroot + "/*")

187 all = {}

188 for f in hfiles:

189 # print "using",f

190 all.update(self[f])

191 self.uncache(f)

192

193 self[hashroot + '/xx'] = all

194 for f in hfiles:

195 p = self.root / f

196 if p.name == 'xx':

197 continue

198 p.unlink()

199

200

201

202 def __delitem__(self,key):

203 """ del db["key"] """

204 fil = self.root / key

205 self.cache.pop(fil,None)

206 try:

207 fil.unlink()

208 except OSError:

209 # notfound and permission denied are ok - we

210 # lost, the other process wins the conflict

211 pass

212

213 def _normalized(self, p):

214 """ Make a key suitable for user's eyes """

215 return str(p.relative_to(self.root)).replace('\\','/')

216

217 def keys(self, globpat = None):

218 """ All keys in DB, or all keys matching a glob"""

219

220 if globpat is None:

221 files = self.root.rglob('*')

222 else:

223 files = self.root.glob(globpat)

224 return [self._normalized(p) for p in files if p.is_file()]

225

226 def __iter__(self):

227 return iter(self.keys())

228

229 def __len__(self):

230 return len(self.keys())

231

232 def uncache(self,*items):

233 """ Removes all, or specified items from cache

234

235 Use this after reading a large amount of large objects

236 to free up memory, when you won't be needing the objects

237 for a while.

238

239 """

240 if not items:

241 self.cache = {}

242 for it in items:

243 self.cache.pop(it,None)

244

245 def waitget(self,key, maxwaittime = 60 ):

246 """ Wait (poll) for a key to get a value

247

248 Will wait for `maxwaittime` seconds before raising a KeyError.

249 The call exits normally if the `key` field in db gets a value

250 within the timeout period.

251

252 Use this for synchronizing different processes or for ensuring

253 that an unfortunately timed "db['key'] = newvalue" operation

254 in another process (which causes all 'get' operation to cause a

255 KeyError for the duration of pickling) won't screw up your program

256 logic.

257 """

258

259 wtimes = [0.2] * 3 + [0.5] * 2 + [1]

260 tries = 0

261 waited = 0

262 while 1:

263 try:

264 val = self[key]

265 return val

266 except KeyError:

267 pass

268

269 if waited > maxwaittime:

270 raise KeyError(key)

271

272 time.sleep(wtimes[tries])

273 waited+=wtimes[tries]

274 if tries < len(wtimes) -1:

275 tries+=1

276

277 def getlink(self,folder):

278 """ Get a convenient link for accessing items """

279 return PickleShareLink(self, folder)

280

281 def __repr__(self):

282 return "PickleShareDB('%s')" % self.root

283

284

285

286class PickleShareLink:

287 """ A shortdand for accessing nested PickleShare data conveniently.

288

289 Created through PickleShareDB.getlink(), example::

290

291 lnk = db.getlink('myobjects/test')

292 lnk.foo = 2

293 lnk.bar = lnk.foo + 5

294

295 """

296 def __init__(self, db, keydir ):

297 self.__dict__.update(locals())

298

299 def __getattr__(self,key):

300 return self.__dict__['db'][self.__dict__['keydir']+'/' + key]

301 def __setattr__(self,key,val):

302 self.db[self.keydir+'/' + key] = val

303 def __repr__(self):

304 db = self.__dict__['db']

305 keys = db.keys( self.__dict__['keydir'] +"/*")

306 return "<PickleShareLink '%s': %s>" % (

307 self.__dict__['keydir'],

308 ";".join([Path(k).basename() for k in keys]))

309

310def main():

311 import textwrap

312 usage = textwrap.dedent("""\

313 pickleshare - manage PickleShare databases

314

315 Usage:

316

317 pickleshare dump /path/to/db > dump.txt

318 pickleshare load /path/to/db < dump.txt

319 pickleshare test /path/to/db

320 """)

321 DB = PickleShareDB

322 import sys

323 if len(sys.argv) < 2:

324 print(usage)

325 return

326

327 cmd = sys.argv[1]

328 args = sys.argv[2:]

329 if cmd == 'dump':

330 if not args: args= ['.']

331 db = DB(args[0])

332 import pprint

333 pprint.pprint(db.items())

334 elif cmd == 'load':

335 cont = sys.stdin.read()

336 db = DB(args[0])

337 data = eval(cont)

338 db.clear()

339 for k,v in db.items():

340 db[k] = v

341 elif cmd == 'testwait':

342 db = DB(args[0])

343 db.clear()

344 print(db.waitget('250'))

345 elif cmd == 'test':

346 test()

347 stress()

348

349if __name__== "__main__":

350 main()

351

352