Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/earray.py: 21%

53 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Here is defined the EArray class.""" 

2 

3import numpy as np 

4 

5from .utils import convert_to_np_atom2, SizeType 

6from .carray import CArray 

7 

8 

9# default version for EARRAY objects 

10# obversion = "1.0" # initial version 

11# obversion = "1.1" # support for complex datatypes 

12# obversion = "1.2" # This adds support for time datatypes. 

13# obversion = "1.3" # This adds support for enumerated datatypes. 

14obversion = "1.4" # Numeric and numarray flavors are gone. 

15 

16 

17class EArray(CArray): 

18 """This class represents extendable, homogeneous datasets in an HDF5 file. 

19 

20 The main difference between an EArray and a CArray (see 

21 :ref:`CArrayClassDescr`), from which it inherits, is that the former 

22 can be enlarged along one of its dimensions, the *enlargeable 

23 dimension*. That means that the :attr:`Leaf.extdim` attribute (see 

24 :class:`Leaf`) of any EArray instance will always be non-negative. 

25 Multiple enlargeable dimensions might be supported in the future. 

26 

27 New rows can be added to the end of an enlargeable array by using the 

28 :meth:`EArray.append` method. 

29 

30 Parameters 

31 ---------- 

32 parentnode 

33 The parent :class:`Group` object. 

34 

35 .. versionchanged:: 3.0 

36 Renamed from *parentNode* to *parentnode*. 

37 

38 name : str 

39 The name of this node in its parent group. 

40 

41 atom 

42 An `Atom` instance representing the *type* and *shape* 

43 of the atomic objects to be saved. 

44 

45 shape 

46 The shape of the new array. One (and only one) of 

47 the shape dimensions *must* be 0. The dimension being 0 

48 means that the resulting `EArray` object can be extended 

49 along it. Multiple enlargeable dimensions are not supported 

50 right now. 

51 

52 title 

53 A description for this node (it sets the ``TITLE`` 

54 HDF5 attribute on disk). 

55 

56 filters 

57 An instance of the `Filters` class that provides information 

58 about the desired I/O filters to be applied during the life 

59 of this object. 

60 

61 expectedrows 

62 A user estimate about the number of row elements that will 

63 be added to the growable dimension in the `EArray` node. 

64 If not provided, the default value is ``EXPECTED_ROWS_EARRAY`` 

65 (see ``tables/parameters.py``). If you plan to create either 

66 a much smaller or a much bigger `EArray` try providing a guess; 

67 this will optimize the HDF5 B-Tree creation and management 

68 process time and the amount of memory used. 

69 

70 chunkshape 

71 The shape of the data chunk to be read or written in a single 

72 HDF5 I/O operation. Filters are applied to those chunks of data. 

73 The dimensionality of `chunkshape` must be the same as that of 

74 `shape` (beware: no dimension should be 0 this time!). 

75 If ``None``, a sensible value is calculated based on the 

76 `expectedrows` parameter (which is recommended). 

77 

78 byteorder 

79 The byteorder of the data *on disk*, specified as 'little' or 

80 'big'. If this is not specified, the byteorder is that of the 

81 platform. 

82 

83 track_times 

84 Whether time data associated with the leaf are recorded (object 

85 access time, raw data modification time, metadata change time, object 

86 birth time); default True. Semantics of these times depend on their 

87 implementation in the HDF5 library: refer to documentation of the 

88 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata 

89 change time) is implemented. 

90 

91 .. versionadded:: 3.4.3 

92 

93 Examples 

94 -------- 

95 

96 See below a small example of the use of the `EArray` class. The 

97 code is available in ``examples/earray1.py``:: 

98 

99 import numpy as np 

100 import tables as tb 

101 

102 fileh = tb.open_file('earray1.h5', mode='w') 

103 a = tb.StringAtom(itemsize=8) 

104 

105 # Use ``a`` as the object type for the enlargeable array. 

106 array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,), 

107 \"Chars\") 

108 array_c.append(np.array(['a'*2, 'b'*4], dtype='S8')) 

109 array_c.append(np.array(['a'*6, 'b'*8, 'c'*10], dtype='S8')) 

110 

111 # Read the string ``EArray`` we have created on disk. 

112 for s in array_c: 

113 print('array_c[%s] => %r' % (array_c.nrow, s)) 

114 # Close the file. 

115 fileh.close() 

116 

117 The output for the previous script is something like:: 

118 

119 array_c[0] => 'aa' 

120 array_c[1] => 'bbbb' 

121 array_c[2] => 'aaaaaa' 

122 array_c[3] => 'bbbbbbbb' 

123 array_c[4] => 'cccccccc' 

124 

125 """ 

126 

127 # Class identifier. 

128 _c_classid = 'EARRAY' 

129 

130 def __init__(self, parentnode, name, 

131 atom=None, shape=None, title="", 

132 filters=None, expectedrows=None, 

133 chunkshape=None, byteorder=None, 

134 _log=True, track_times=True): 

135 

136 # Specific of EArray 

137 if expectedrows is None: 

138 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY'] 

139 self._v_expectedrows = expectedrows 

140 """The expected number of rows to be stored in the array.""" 

141 

142 # Call the parent (CArray) init code 

143 super().__init__(parentnode, name, atom, shape, title, filters, 

144 chunkshape, byteorder, _log, track_times) 

145 

146 def _g_create(self): 

147 """Create a new array in file (specific part).""" 

148 

149 # Pre-conditions and extdim computation 

150 zerodims = np.sum(np.array(self.shape) == 0) 

151 if zerodims > 0: 

152 if zerodims == 1: 

153 self.extdim = list(self.shape).index(0) 

154 else: 

155 raise NotImplementedError( 

156 "Multiple enlargeable (0-)dimensions are not " 

157 "supported.") 

158 else: 

159 raise ValueError( 

160 "When creating EArrays, you need to set one of " 

161 "the dimensions of the Atom instance to zero.") 

162 

163 # Finish the common part of the creation process 

164 return self._g_create_common(self._v_expectedrows) 

165 

166 def _check_shape_append(self, nparr): 

167 """Test that nparr shape is consistent with underlying EArray.""" 

168 

169 # The arrays conforms self expandibility? 

170 myrank = len(self.shape) 

171 narank = len(nparr.shape) - len(self.atom.shape) 

172 if myrank != narank: 

173 raise ValueError(("the ranks of the appended object (%d) and the " 

174 "``%s`` EArray (%d) differ") 

175 % (narank, self._v_pathname, myrank)) 

176 for i in range(myrank): 

177 if i != self.extdim and self.shape[i] != nparr.shape[i]: 

178 raise ValueError(("the shapes of the appended object and the " 

179 "``%s`` EArray differ in non-enlargeable " 

180 "dimension %d") % (self._v_pathname, i)) 

181 

182 def append(self, sequence): 

183 """Add a sequence of data to the end of the dataset. 

184 

185 The sequence must have the same type as the array; otherwise a 

186 TypeError is raised. In the same way, the dimensions of the 

187 sequence must conform to the shape of the array, that is, all 

188 dimensions must match, with the exception of the enlargeable 

189 dimension, which can be of any length (even 0!). If the shape 

190 of the sequence is invalid, a ValueError is raised. 

191 

192 """ 

193 

194 self._g_check_open() 

195 self._v_file._check_writable() 

196 

197 # Convert the sequence into a NumPy object 

198 nparr = convert_to_np_atom2(sequence, self.atom) 

199 # Check if it has a consistent shape with underlying EArray 

200 self._check_shape_append(nparr) 

201 # If the size of the nparr is zero, don't do anything else 

202 if nparr.size > 0: 

203 self._append(nparr) 

204 

205 def _g_copy_with_stats(self, group, name, start, stop, step, 

206 title, filters, chunkshape, _log, **kwargs): 

207 """Private part of Leaf.copy() for each kind of leaf.""" 

208 

209 (start, stop, step) = self._process_range_read(start, stop, step) 

210 # Build the new EArray object 

211 maindim = self.maindim 

212 shape = list(self.shape) 

213 shape[maindim] = 0 

214 # The number of final rows 

215 nrows = len(range(start, stop, step)) 

216 # Build the new EArray object 

217 object = EArray( 

218 group, name, atom=self.atom, shape=shape, title=title, 

219 filters=filters, expectedrows=nrows, chunkshape=chunkshape, 

220 _log=_log) 

221 # Now, fill the new earray with values from source 

222 nrowsinbuf = self.nrowsinbuf 

223 # The slices parameter for self.__getitem__ 

224 slices = [slice(0, dim, 1) for dim in self.shape] 

225 # This is a hack to prevent doing unnecessary conversions 

226 # when copying buffers 

227 self._v_convert = False 

228 # Start the copy itself 

229 for start2 in range(start, stop, step * nrowsinbuf): 

230 # Save the records on disk 

231 stop2 = start2 + step * nrowsinbuf 

232 if stop2 > stop: 

233 stop2 = stop 

234 # Set the proper slice in the extensible dimension 

235 slices[maindim] = slice(start2, stop2, step) 

236 object._append(self.__getitem__(tuple(slices))) 

237 # Active the conversion again (default) 

238 self._v_convert = True 

239 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.itemsize 

240 

241 return (object, nbytes)