Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/indexes.py: 25%

76 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Here is defined the IndexArray class.""" 

2 

3from bisect import bisect_left, bisect_right 

4 

5from .node import NotLoggedMixin 

6from .carray import CArray 

7from .earray import EArray 

8from . import indexesextension 

9 

10 

11# Declarations for inheriting 

12 

13 

14class CacheArray(indexesextension.CacheArray, NotLoggedMixin, EArray): 

15 """Container for keeping index caches of 1st and 2nd level.""" 

16 

17 # Class identifier. 

18 _c_classid = 'CACHEARRAY' 

19 

20 

21class LastRowArray(indexesextension.LastRowArray, NotLoggedMixin, CArray): 

22 """Container for keeping sorted and indices values of last row of an 

23 index.""" 

24 

25 # Class identifier. 

26 _c_classid = 'LASTROWARRAY' 

27 

28 

29class IndexArray(indexesextension.IndexArray, NotLoggedMixin, EArray): 

30 """Represent the index (sorted or reverse index) dataset in HDF5 file. 

31 

32 All NumPy typecodes are supported except for complex datatypes. 

33 

34 Parameters 

35 ---------- 

36 parentnode 

37 The Index class from which this object will hang off. 

38 

39 .. versionchanged:: 3.0 

40 Renamed from *parentNode* to *parentnode*. 

41 

42 name : str 

43 The name of this node in its parent group. 

44 atom 

45 An Atom object representing the shape and type of the atomic objects to 

46 be saved. Only scalar atoms are supported. 

47 title 

48 Sets a TITLE attribute on the array entity. 

49 filters : Filters 

50 An instance of the Filters class that provides information about the 

51 desired I/O filters to be applied during the life of this object. 

52 byteorder 

53 The byteroder of the data on-disk. 

54 

55 """ 

56 

57 # Class identifier. 

58 _c_classid = 'INDEXARRAY' 

59 

60 @property 

61 def chunksize(self): 

62 """The chunksize for this object.""" 

63 return self.chunkshape[1] 

64 

65 @property 

66 def slicesize(self): 

67 """The slicesize for this object.""" 

68 return self.shape[1] 

69 

70 def __init__(self, parentnode, name, 

71 atom=None, title="", 

72 filters=None, byteorder=None): 

73 """Create an IndexArray instance.""" 

74 

75 self._v_pathname = parentnode._g_join(name) 

76 if atom is not None: 

77 # The shape and chunkshape needs to be fixed here 

78 if name == "sorted": 

79 reduction = parentnode.reduction 

80 shape = (0, parentnode.slicesize // reduction) 

81 chunkshape = (1, parentnode.chunksize // reduction) 

82 else: 

83 shape = (0, parentnode.slicesize) 

84 chunkshape = (1, parentnode.chunksize) 

85 else: 

86 # The shape and chunkshape will be read from disk later on 

87 shape = None 

88 chunkshape = None 

89 

90 super().__init__( 

91 parentnode, name, atom, shape, title, filters, 

92 chunkshape=chunkshape, byteorder=byteorder) 

93 

94 # This version of searchBin uses both ranges (1st level) and 

95 # bounds (2nd level) caches. It uses a cache for boundary rows, 

96 # but not for 'sorted' rows (this is only supported for the 

97 # 'optimized' types). 

98 def _search_bin(self, nrow, item): 

99 item1, item2 = item 

100 result1 = -1 

101 result2 = -1 

102 hi = self.shape[1] 

103 ranges = self._v_parent.rvcache 

104 boundscache = self.boundscache 

105 # First, look at the beginning of the slice 

106 begin = ranges[nrow, 0] 

107 # Look for items at the beginning of sorted slices 

108 if item1 <= begin: 

109 result1 = 0 

110 if item2 < begin: 

111 result2 = 0 

112 if result1 >= 0 and result2 >= 0: 

113 return (result1, result2) 

114 # Then, look for items at the end of the sorted slice 

115 end = ranges[nrow, 1] 

116 if result1 < 0: 

117 if item1 > end: 

118 result1 = hi 

119 if result2 < 0: 

120 if item2 >= end: 

121 result2 = hi 

122 if result1 >= 0 and result2 >= 0: 

123 return (result1, result2) 

124 # Finally, do a lookup for item1 and item2 if they were not found 

125 # Lookup in the middle of slice for item1 

126 chunksize = self.chunksize # Number of elements/chunksize 

127 nchunk = -1 

128 # Try to get the bounds row from the LRU cache 

129 nslot = boundscache.getslot(nrow) 

130 if nslot >= 0: 

131 # Cache hit. Use the row kept there. 

132 bounds = boundscache.getitem(nslot) 

133 else: 

134 # No luck with cached data. Read the row and put it in the cache. 

135 bounds = self._v_parent.bounds[nrow] 

136 size = bounds.size * bounds.itemsize 

137 boundscache.setitem(nrow, bounds, size) 

138 if result1 < 0: 

139 # Search the appropriate chunk in bounds cache 

140 nchunk = bisect_left(bounds, item1) 

141 chunk = self._read_sorted_slice(nrow, chunksize * nchunk, 

142 chunksize * (nchunk + 1)) 

143 result1 = indexesextension._bisect_left(chunk, item1, chunksize) 

144 result1 += chunksize * nchunk 

145 # Lookup in the middle of slice for item2 

146 if result2 < 0: 

147 # Search the appropriate chunk in bounds cache 

148 nchunk2 = bisect_right(bounds, item2) 

149 if nchunk2 != nchunk: 

150 chunk = self._read_sorted_slice(nrow, chunksize * nchunk2, 

151 chunksize * (nchunk2 + 1)) 

152 result2 = indexesextension._bisect_right(chunk, item2, chunksize) 

153 result2 += chunksize * nchunk2 

154 return (result1, result2) 

155 

156 def __str__(self): 

157 """A compact representation of this class""" 

158 return f"IndexArray(path={self._v_pathname})" 

159 

160 def __repr__(self): 

161 """A verbose representation of this class.""" 

162 

163 return f"""{self} 

164 atom = {self.atom!r} 

165 shape = {self.shape} 

166 nrows = {self.nrows} 

167 chunksize = {self.chunksize} 

168 slicesize = {self.slicesize} 

169 byteorder = {self.byteorder!r}"""