1""" 
    2Vendoring of pickleshare, reduced to used functionalities. 
    3 
    4--- 
    5 
    6PickleShare - a small 'shelve' like datastore with concurrency support 
    7 
    8Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike 
    9shelve, many processes can access the database simultaneously. Changing a 
    10value in database is immediately visible to other processes accessing the 
    11same database. 
    12 
    13Concurrency is possible because the values are stored in separate files. Hence 
    14the "database" is a directory where *all* files are governed by PickleShare. 
    15 
    16Example usage:: 
    17 
    18    from pickleshare import * 
    19    db = PickleShareDB('~/testpickleshare') 
    20    db.clear() 
    21    print "Should be empty:",db.items() 
    22    db['hello'] = 15 
    23    db['aku ankka'] = [1,2,313] 
    24    db['paths/are/ok/key'] = [1,(5,46)] 
    25    print db.keys() 
    26    del db['aku ankka'] 
    27 
    28This module is certainly not ZODB, but can be used for low-load 
    29(non-mission-critical) situations where tiny code size trumps the 
    30advanced features of a "real" object database. 
    31 
    32Installation guide: pip install pickleshare 
    33 
    34Author: Ville Vainio <vivainio@gmail.com> 
    35License: MIT open source license. 
    36 
    37""" 
    38 
    39__version__ = "0.7.5" 
    40 
    41from pathlib import Path 
    42 
    43 
    44import os, stat, time 
    45 
    46try: 
    47    import collections.abc as collections_abc 
    48except ImportError: 
    49    import collections as collections_abc 
    50try: 
    51    import cPickle as pickle 
    52except ImportError: 
    53    import pickle 
    54import errno 
    55import sys 
    56 
    57 
    58def gethashfile(key): 
    59    return ("%02x" % abs(hash(key) % 256))[-2:] 
    60 
    61 
    62_sentinel = object() 
    63 
    64 
    65class PickleShareDB(collections_abc.MutableMapping): 
    66    """The main 'connection' object for PickleShare database""" 
    67 
    68    def __init__(self, root): 
    69        """Return a db object that will manage the specied directory""" 
    70        if not isinstance(root, str): 
    71            root = str(root) 
    72        root = os.path.abspath(os.path.expanduser(root)) 
    73        self.root = Path(root) 
    74        if not self.root.is_dir(): 
    75            # catching the exception is necessary if multiple processes are concurrently trying to create a folder 
    76            # exists_ok keyword argument of mkdir does the same but only from Python 3.5 
    77            try: 
    78                self.root.mkdir(parents=True) 
    79            except OSError as e: 
    80                if e.errno != errno.EEXIST: 
    81                    raise 
    82        # cache has { 'key' : (obj, orig_mod_time) } 
    83        self.cache = {} 
    84 
    85    def __getitem__(self, key): 
    86        """db['key'] reading""" 
    87        fil = self.root / key 
    88        try: 
    89            mtime = fil.stat()[stat.ST_MTIME] 
    90        except OSError: 
    91            raise KeyError(key) 
    92 
    93        if fil in self.cache and mtime == self.cache[fil][1]: 
    94            return self.cache[fil][0] 
    95        try: 
    96            # The cached item has expired, need to read 
    97            with fil.open("rb") as f: 
    98                obj = pickle.loads(f.read()) 
    99        except: 
    100            raise KeyError(key) 
    101 
    102        self.cache[fil] = (obj, mtime) 
    103        return obj 
    104 
    105    def __setitem__(self, key, value): 
    106        """db['key'] = 5""" 
    107        fil = self.root / key 
    108        parent = fil.parent 
    109        if parent and not parent.is_dir(): 
    110            parent.mkdir(parents=True) 
    111        # We specify protocol 2, so that we can mostly go between Python 2 
    112        # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete. 
    113        with fil.open("wb") as f: 
    114            pickle.dump(value, f, protocol=2) 
    115        try: 
    116            self.cache[fil] = (value, fil.stat().st_mtime) 
    117        except OSError as e: 
    118            if e.errno != errno.ENOENT: 
    119                raise 
    120 
    121    def hset(self, hashroot, key, value): 
    122        """hashed set""" 
    123        hroot = self.root / hashroot 
    124        if not hroot.is_dir(): 
    125            hroot.mkdir() 
    126        hfile = hroot / gethashfile(key) 
    127        d = self.get(hfile, {}) 
    128        d.update({key: value}) 
    129        self[hfile] = d 
    130 
    131    def hget(self, hashroot, key, default=_sentinel, fast_only=True): 
    132        """hashed get""" 
    133        hroot = self.root / hashroot 
    134        hfile = hroot / gethashfile(key) 
    135 
    136        d = self.get(hfile, _sentinel) 
    137        # print "got dict",d,"from",hfile 
    138        if d is _sentinel: 
    139            if fast_only: 
    140                if default is _sentinel: 
    141                    raise KeyError(key) 
    142 
    143                return default 
    144 
    145            # slow mode ok, works even after hcompress() 
    146            d = self.hdict(hashroot) 
    147 
    148        return d.get(key, default) 
    149 
    150    def hdict(self, hashroot): 
    151        """Get all data contained in hashed category 'hashroot' as dict""" 
    152        hfiles = self.keys(hashroot + "/*") 
    153        hfiles.sort() 
    154        last = len(hfiles) and hfiles[-1] or "" 
    155        if last.endswith("xx"): 
    156            # print "using xx" 
    157            hfiles = [last] + hfiles[:-1] 
    158 
    159        all = {} 
    160 
    161        for f in hfiles: 
    162            # print "using",f 
    163            try: 
    164                all.update(self[f]) 
    165            except KeyError: 
    166                print("Corrupt", f, "deleted - hset is not threadsafe!") 
    167                del self[f] 
    168 
    169            self.uncache(f) 
    170 
    171        return all 
    172 
    173    def hcompress(self, hashroot): 
    174        """Compress category 'hashroot', so hset is fast again 
    175 
    176        hget will fail if fast_only is True for compressed items (that were 
    177        hset before hcompress). 
    178 
    179        """ 
    180        hfiles = self.keys(hashroot + "/*") 
    181        all = {} 
    182        for f in hfiles: 
    183            # print "using",f 
    184            all.update(self[f]) 
    185            self.uncache(f) 
    186 
    187        self[hashroot + "/xx"] = all 
    188        for f in hfiles: 
    189            p = self.root / f 
    190            if p.name == "xx": 
    191                continue 
    192            p.unlink() 
    193 
    194    def __delitem__(self, key): 
    195        """del db["key"]""" 
    196        fil = self.root / key 
    197        self.cache.pop(fil, None) 
    198        try: 
    199            fil.unlink() 
    200        except OSError: 
    201            # notfound and permission denied are ok - we 
    202            # lost, the other process wins the conflict 
    203            pass 
    204 
    205    def _normalized(self, p): 
    206        """Make a key suitable for user's eyes""" 
    207        return str(p.relative_to(self.root)).replace("\\", "/") 
    208 
    209    def keys(self, globpat=None): 
    210        """All keys in DB, or all keys matching a glob""" 
    211 
    212        if globpat is None: 
    213            files = self.root.rglob("*") 
    214        else: 
    215            files = self.root.glob(globpat) 
    216        return [self._normalized(p) for p in files if p.is_file()] 
    217 
    218    def __iter__(self): 
    219        return iter(self.keys()) 
    220 
    221    def __len__(self): 
    222        return len(self.keys()) 
    223 
    224    def uncache(self, *items): 
    225        """Removes all, or specified items from cache 
    226 
    227        Use this after reading a large amount of large objects 
    228        to free up memory, when you won't be needing the objects 
    229        for a while. 
    230 
    231        """ 
    232        if not items: 
    233            self.cache = {} 
    234        for it in items: 
    235            self.cache.pop(it, None) 
    236 
    237    def waitget(self, key, maxwaittime=60): 
    238        """Wait (poll) for a key to get a value 
    239 
    240        Will wait for `maxwaittime` seconds before raising a KeyError. 
    241        The call exits normally if the `key` field in db gets a value 
    242        within the timeout period. 
    243 
    244        Use this for synchronizing different processes or for ensuring 
    245        that an unfortunately timed "db['key'] = newvalue" operation 
    246        in another process (which causes all 'get' operation to cause a 
    247        KeyError for the duration of pickling) won't screw up your program 
    248        logic. 
    249        """ 
    250 
    251        wtimes = [0.2] * 3 + [0.5] * 2 + [1] 
    252        tries = 0 
    253        waited = 0 
    254        while 1: 
    255            try: 
    256                val = self[key] 
    257                return val 
    258            except KeyError: 
    259                pass 
    260 
    261            if waited > maxwaittime: 
    262                raise KeyError(key) 
    263 
    264            time.sleep(wtimes[tries]) 
    265            waited += wtimes[tries] 
    266            if tries < len(wtimes) - 1: 
    267                tries += 1 
    268 
    269    def getlink(self, folder): 
    270        """Get a convenient link for accessing items""" 
    271        return PickleShareLink(self, folder) 
    272 
    273    def __repr__(self): 
    274        return "PickleShareDB('%s')" % self.root 
    275 
    276 
    277class PickleShareLink: 
    278    """A shortdand for accessing nested PickleShare data conveniently. 
    279 
    280    Created through PickleShareDB.getlink(), example:: 
    281 
    282        lnk = db.getlink('myobjects/test') 
    283        lnk.foo = 2 
    284        lnk.bar = lnk.foo + 5 
    285 
    286    """ 
    287 
    288    def __init__(self, db, keydir): 
    289        self.__dict__.update(locals()) 
    290 
    291    def __getattr__(self, key): 
    292        return self.__dict__["db"][self.__dict__["keydir"] + "/" + key] 
    293 
    294    def __setattr__(self, key, val): 
    295        self.db[self.keydir + "/" + key] = val 
    296 
    297    def __repr__(self): 
    298        db = self.__dict__["db"] 
    299        keys = db.keys(self.__dict__["keydir"] + "/*") 
    300        return "<PickleShareLink '%s': %s>" % ( 
    301            self.__dict__["keydir"], 
    302            ";".join([Path(k).basename() for k in keys]), 
    303        )