Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/indexes.py: 25%

1"""Here is defined the IndexArray class."""

3from bisect import bisect_left, bisect_right

5from .node import NotLoggedMixin

6from .carray import CArray

7from .earray import EArray

8from . import indexesextension

11# Declarations for inheriting

14class CacheArray(indexesextension.CacheArray, NotLoggedMixin, EArray):

15 """Container for keeping index caches of 1st and 2nd level."""

17 # Class identifier.

18 _c_classid = 'CACHEARRAY'

21class LastRowArray(indexesextension.LastRowArray, NotLoggedMixin, CArray):

22 """Container for keeping sorted and indices values of last row of an

23 index."""

25 # Class identifier.

26 _c_classid = 'LASTROWARRAY'

29class IndexArray(indexesextension.IndexArray, NotLoggedMixin, EArray):

30 """Represent the index (sorted or reverse index) dataset in HDF5 file.

32 All NumPy typecodes are supported except for complex datatypes.

34 Parameters

35 ----------

36 parentnode

37 The Index class from which this object will hang off.

39 .. versionchanged:: 3.0

40 Renamed from *parentNode* to *parentnode*.

42 name : str

43 The name of this node in its parent group.

44 atom

45 An Atom object representing the shape and type of the atomic objects to

46 be saved. Only scalar atoms are supported.

47 title

48 Sets a TITLE attribute on the array entity.

49 filters : Filters

50 An instance of the Filters class that provides information about the

51 desired I/O filters to be applied during the life of this object.

52 byteorder

53 The byteroder of the data on-disk.

55 """

57 # Class identifier.

58 _c_classid = 'INDEXARRAY'

60 @property

61 def chunksize(self):

62 """The chunksize for this object."""

63 return self.chunkshape[1]

65 @property

66 def slicesize(self):

67 """The slicesize for this object."""

68 return self.shape[1]

70 def __init__(self, parentnode, name,

71 atom=None, title="",

72 filters=None, byteorder=None):

73 """Create an IndexArray instance."""

75 self._v_pathname = parentnode._g_join(name)

76 if atom is not None:

77 # The shape and chunkshape needs to be fixed here

78 if name == "sorted":

79 reduction = parentnode.reduction

80 shape = (0, parentnode.slicesize // reduction)

81 chunkshape = (1, parentnode.chunksize // reduction)

82 else:

83 shape = (0, parentnode.slicesize)

84 chunkshape = (1, parentnode.chunksize)

85 else:

86 # The shape and chunkshape will be read from disk later on

87 shape = None

88 chunkshape = None

90 super().__init__(

91 parentnode, name, atom, shape, title, filters,

92 chunkshape=chunkshape, byteorder=byteorder)

94 # This version of searchBin uses both ranges (1st level) and

95 # bounds (2nd level) caches. It uses a cache for boundary rows,

96 # but not for 'sorted' rows (this is only supported for the

97 # 'optimized' types).

98 def _search_bin(self, nrow, item):

99 item1, item2 = item

100 result1 = -1

101 result2 = -1

102 hi = self.shape[1]

103 ranges = self._v_parent.rvcache

104 boundscache = self.boundscache

105 # First, look at the beginning of the slice

106 begin = ranges[nrow, 0]

107 # Look for items at the beginning of sorted slices

108 if item1 <= begin:

109 result1 = 0

110 if item2 < begin:

111 result2 = 0

112 if result1 >= 0 and result2 >= 0:

113 return (result1, result2)

114 # Then, look for items at the end of the sorted slice

115 end = ranges[nrow, 1]

116 if result1 < 0:

117 if item1 > end:

118 result1 = hi

119 if result2 < 0:

120 if item2 >= end:

121 result2 = hi

122 if result1 >= 0 and result2 >= 0:

123 return (result1, result2)

124 # Finally, do a lookup for item1 and item2 if they were not found

125 # Lookup in the middle of slice for item1

126 chunksize = self.chunksize # Number of elements/chunksize

127 nchunk = -1

128 # Try to get the bounds row from the LRU cache

129 nslot = boundscache.getslot(nrow)

130 if nslot >= 0:

131 # Cache hit. Use the row kept there.

132 bounds = boundscache.getitem(nslot)

133 else:

134 # No luck with cached data. Read the row and put it in the cache.

135 bounds = self._v_parent.bounds[nrow]

136 size = bounds.size * bounds.itemsize

137 boundscache.setitem(nrow, bounds, size)

138 if result1 < 0:

139 # Search the appropriate chunk in bounds cache

140 nchunk = bisect_left(bounds, item1)

141 chunk = self._read_sorted_slice(nrow, chunksize * nchunk,

142 chunksize * (nchunk + 1))

143 result1 = indexesextension._bisect_left(chunk, item1, chunksize)

144 result1 += chunksize * nchunk

145 # Lookup in the middle of slice for item2

146 if result2 < 0:

147 # Search the appropriate chunk in bounds cache

148 nchunk2 = bisect_right(bounds, item2)

149 if nchunk2 != nchunk:

150 chunk = self._read_sorted_slice(nrow, chunksize * nchunk2,

151 chunksize * (nchunk2 + 1))

152 result2 = indexesextension._bisect_right(chunk, item2, chunksize)

153 result2 += chunksize * nchunk2

154 return (result1, result2)

155

156 def __str__(self):

157 """A compact representation of this class"""

158 return f"IndexArray(path={self._v_pathname})"

159

160 def __repr__(self):

161 """A verbose representation of this class."""

162

163 return f"""{self}

164 atom = {self.atom!r}

165 shape = {self.shape}

166 nrows = {self.nrows}

167 chunksize = {self.chunksize}

168 slicesize = {self.slicesize}

169 byteorder = {self.byteorder!r}"""