Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/indexes.py: 25%
76 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Here is defined the IndexArray class."""
3from bisect import bisect_left, bisect_right
5from .node import NotLoggedMixin
6from .carray import CArray
7from .earray import EArray
8from . import indexesextension
11# Declarations for inheriting
14class CacheArray(indexesextension.CacheArray, NotLoggedMixin, EArray):
15 """Container for keeping index caches of 1st and 2nd level."""
17 # Class identifier.
18 _c_classid = 'CACHEARRAY'
21class LastRowArray(indexesextension.LastRowArray, NotLoggedMixin, CArray):
22 """Container for keeping sorted and indices values of last row of an
23 index."""
25 # Class identifier.
26 _c_classid = 'LASTROWARRAY'
29class IndexArray(indexesextension.IndexArray, NotLoggedMixin, EArray):
30 """Represent the index (sorted or reverse index) dataset in HDF5 file.
32 All NumPy typecodes are supported except for complex datatypes.
34 Parameters
35 ----------
36 parentnode
37 The Index class from which this object will hang off.
39 .. versionchanged:: 3.0
40 Renamed from *parentNode* to *parentnode*.
42 name : str
43 The name of this node in its parent group.
44 atom
45 An Atom object representing the shape and type of the atomic objects to
46 be saved. Only scalar atoms are supported.
47 title
48 Sets a TITLE attribute on the array entity.
49 filters : Filters
50 An instance of the Filters class that provides information about the
51 desired I/O filters to be applied during the life of this object.
52 byteorder
53 The byteroder of the data on-disk.
55 """
57 # Class identifier.
58 _c_classid = 'INDEXARRAY'
60 @property
61 def chunksize(self):
62 """The chunksize for this object."""
63 return self.chunkshape[1]
65 @property
66 def slicesize(self):
67 """The slicesize for this object."""
68 return self.shape[1]
70 def __init__(self, parentnode, name,
71 atom=None, title="",
72 filters=None, byteorder=None):
73 """Create an IndexArray instance."""
75 self._v_pathname = parentnode._g_join(name)
76 if atom is not None:
77 # The shape and chunkshape needs to be fixed here
78 if name == "sorted":
79 reduction = parentnode.reduction
80 shape = (0, parentnode.slicesize // reduction)
81 chunkshape = (1, parentnode.chunksize // reduction)
82 else:
83 shape = (0, parentnode.slicesize)
84 chunkshape = (1, parentnode.chunksize)
85 else:
86 # The shape and chunkshape will be read from disk later on
87 shape = None
88 chunkshape = None
90 super().__init__(
91 parentnode, name, atom, shape, title, filters,
92 chunkshape=chunkshape, byteorder=byteorder)
94 # This version of searchBin uses both ranges (1st level) and
95 # bounds (2nd level) caches. It uses a cache for boundary rows,
96 # but not for 'sorted' rows (this is only supported for the
97 # 'optimized' types).
98 def _search_bin(self, nrow, item):
99 item1, item2 = item
100 result1 = -1
101 result2 = -1
102 hi = self.shape[1]
103 ranges = self._v_parent.rvcache
104 boundscache = self.boundscache
105 # First, look at the beginning of the slice
106 begin = ranges[nrow, 0]
107 # Look for items at the beginning of sorted slices
108 if item1 <= begin:
109 result1 = 0
110 if item2 < begin:
111 result2 = 0
112 if result1 >= 0 and result2 >= 0:
113 return (result1, result2)
114 # Then, look for items at the end of the sorted slice
115 end = ranges[nrow, 1]
116 if result1 < 0:
117 if item1 > end:
118 result1 = hi
119 if result2 < 0:
120 if item2 >= end:
121 result2 = hi
122 if result1 >= 0 and result2 >= 0:
123 return (result1, result2)
124 # Finally, do a lookup for item1 and item2 if they were not found
125 # Lookup in the middle of slice for item1
126 chunksize = self.chunksize # Number of elements/chunksize
127 nchunk = -1
128 # Try to get the bounds row from the LRU cache
129 nslot = boundscache.getslot(nrow)
130 if nslot >= 0:
131 # Cache hit. Use the row kept there.
132 bounds = boundscache.getitem(nslot)
133 else:
134 # No luck with cached data. Read the row and put it in the cache.
135 bounds = self._v_parent.bounds[nrow]
136 size = bounds.size * bounds.itemsize
137 boundscache.setitem(nrow, bounds, size)
138 if result1 < 0:
139 # Search the appropriate chunk in bounds cache
140 nchunk = bisect_left(bounds, item1)
141 chunk = self._read_sorted_slice(nrow, chunksize * nchunk,
142 chunksize * (nchunk + 1))
143 result1 = indexesextension._bisect_left(chunk, item1, chunksize)
144 result1 += chunksize * nchunk
145 # Lookup in the middle of slice for item2
146 if result2 < 0:
147 # Search the appropriate chunk in bounds cache
148 nchunk2 = bisect_right(bounds, item2)
149 if nchunk2 != nchunk:
150 chunk = self._read_sorted_slice(nrow, chunksize * nchunk2,
151 chunksize * (nchunk2 + 1))
152 result2 = indexesextension._bisect_right(chunk, item2, chunksize)
153 result2 += chunksize * nchunk2
154 return (result1, result2)
156 def __str__(self):
157 """A compact representation of this class"""
158 return f"IndexArray(path={self._v_pathname})"
160 def __repr__(self):
161 """A verbose representation of this class."""
163 return f"""{self}
164 atom = {self.atom!r}
165 shape = {self.shape}
166 nrows = {self.nrows}
167 chunksize = {self.chunksize}
168 slicesize = {self.slicesize}
169 byteorder = {self.byteorder!r}"""