Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/carray.py: 14%

86 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Here is defined the CArray class.""" 

2 

3import sys 

4 

5import numpy as np 

6 

7from .atom import Atom 

8from .array import Array 

9from .utils import correct_byteorder, SizeType 

10 

11 

12# default version for CARRAY objects 

13# obversion = "1.0" # Support for time & enumerated datatypes. 

14obversion = "1.1" # Numeric and numarray flavors are gone. 

15 

16 

17class CArray(Array): 

18 """This class represents homogeneous datasets in an HDF5 file. 

19 

20 The difference between a CArray and a normal Array (see 

21 :ref:`ArrayClassDescr`), from which it inherits, is that a CArray 

22 has a chunked layout and, as a consequence, it supports compression. 

23 You can use datasets of this class to easily save or load arrays to 

24 or from disk, with compression support included. 

25 

26 CArray includes all the instance variables and methods of Array. 

27 Only those with different behavior are mentioned here. 

28 

29 Parameters 

30 ---------- 

31 parentnode 

32 The parent :class:`Group` object. 

33 

34 .. versionchanged:: 3.0 

35 Renamed from *parentNode* to *parentnode*. 

36 

37 name : str 

38 The name of this node in its parent group. 

39 atom 

40 An `Atom` instance representing the *type* and *shape* of 

41 the atomic objects to be saved. 

42 

43 shape 

44 The shape of the new array. 

45 

46 title 

47 A description for this node (it sets the ``TITLE`` HDF5 

48 attribute on disk). 

49 

50 filters 

51 An instance of the `Filters` class that provides 

52 information about the desired I/O filters to be applied 

53 during the life of this object. 

54 

55 chunkshape 

56 The shape of the data chunk to be read or written in a 

57 single HDF5 I/O operation. Filters are applied to those 

58 chunks of data. The dimensionality of `chunkshape` must 

59 be the same as that of `shape`. If ``None``, a sensible 

60 value is calculated (which is recommended). 

61 

62 byteorder 

63 The byteorder of the data *on disk*, specified as 'little' 

64 or 'big'. If this is not specified, the byteorder is that 

65 of the platform. 

66 

67 track_times 

68 Whether time data associated with the leaf are recorded (object 

69 access time, raw data modification time, metadata change time, object 

70 birth time); default True. Semantics of these times depend on their 

71 implementation in the HDF5 library: refer to documentation of the 

72 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata 

73 change time) is implemented. 

74 

75 .. versionadded:: 3.4.3 

76 

77 Examples 

78 -------- 

79 

80 See below a small example of the use of the `CArray` class. 

81 The code is available in ``examples/carray1.py``:: 

82 

83 import numpy as np 

84 import tables as tb 

85 

86 fileName = 'carray1.h5' 

87 shape = (200, 300) 

88 atom = tb.UInt8Atom() 

89 filters = tb.Filters(complevel=5, complib='zlib') 

90 

91 h5f = tb.open_file(fileName, 'w') 

92 ca = h5f.create_carray(h5f.root, 'carray', atom, shape, 

93 filters=filters) 

94 

95 # Fill a hyperslab in ``ca``. 

96 ca[10:60, 20:70] = np.ones((50, 50)) 

97 h5f.close() 

98 

99 # Re-open a read another hyperslab 

100 h5f = tb.open_file(fileName) 

101 print(h5f) 

102 print(h5f.root.carray[8:12, 18:22]) 

103 h5f.close() 

104 

105 The output for the previous script is something like:: 

106 

107 carray1.h5 (File) '' 

108 Last modif.: 'Thu Apr 12 10:15:38 2007' 

109 Object Tree: 

110 / (RootGroup) '' 

111 /carray (CArray(200, 300), shuffle, zlib(5)) '' 

112 

113 [[0 0 0 0] 

114 [0 0 0 0] 

115 [0 0 1 1] 

116 [0 0 1 1]] 

117 

118 """ 

119 

120 # Class identifier. 

121 _c_classid = 'CARRAY' 

122 

123 def __init__(self, parentnode, name, 

124 atom=None, shape=None, 

125 title="", filters=None, 

126 chunkshape=None, byteorder=None, 

127 _log=True, track_times=True): 

128 

129 self.atom = atom 

130 """An `Atom` instance representing the shape, type of the atomic 

131 objects to be saved. 

132 """ 

133 self.shape = None 

134 """The shape of the stored array.""" 

135 self.extdim = -1 # `CArray` objects are not enlargeable by default 

136 """The index of the enlargeable dimension.""" 

137 

138 # Other private attributes 

139 self._v_version = None 

140 """The object version of this array.""" 

141 self._v_new = new = atom is not None 

142 """Is this the first time the node has been created?""" 

143 self._v_new_title = title 

144 """New title for this node.""" 

145 self._v_convert = True 

146 """Whether the ``Array`` object must be converted or not.""" 

147 self._v_chunkshape = chunkshape 

148 """Private storage for the `chunkshape` property of the leaf.""" 

149 

150 # Miscellaneous iteration rubbish. 

151 self._start = None 

152 """Starting row for the current iteration.""" 

153 self._stop = None 

154 """Stopping row for the current iteration.""" 

155 self._step = None 

156 """Step size for the current iteration.""" 

157 self._nrowsread = None 

158 """Number of rows read up to the current state of iteration.""" 

159 self._startb = None 

160 """Starting row for current buffer.""" 

161 self._stopb = None 

162 """Stopping row for current buffer. """ 

163 self._row = None 

164 """Current row in iterators (sentinel).""" 

165 self._init = False 

166 """Whether we are in the middle of an iteration or not (sentinel).""" 

167 self.listarr = None 

168 """Current buffer in iterators.""" 

169 

170 if new: 

171 if not isinstance(atom, Atom): 

172 raise ValueError("atom parameter should be an instance of " 

173 "tables.Atom and you passed a %s." % 

174 type(atom)) 

175 if shape is None: 

176 raise ValueError("you must specify a non-empty shape") 

177 try: 

178 shape = tuple(shape) 

179 except TypeError: 

180 raise TypeError("`shape` parameter must be a sequence " 

181 "and you passed a %s" % type(shape)) 

182 self.shape = tuple(SizeType(s) for s in shape) 

183 

184 if chunkshape is not None: 

185 try: 

186 chunkshape = tuple(chunkshape) 

187 except TypeError: 

188 raise TypeError( 

189 "`chunkshape` parameter must be a sequence " 

190 "and you passed a %s" % type(chunkshape)) 

191 if len(shape) != len(chunkshape): 

192 raise ValueError(f"the shape ({shape}) and chunkshape " 

193 f"({chunkshape}) ranks must be equal.") 

194 elif min(chunkshape) < 1: 

195 raise ValueError("chunkshape parameter cannot have " 

196 "zero-dimensions.") 

197 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) 

198 

199 # The `Array` class is not abstract enough! :( 

200 super(Array, self).__init__(parentnode, name, new, filters, 

201 byteorder, _log, track_times) 

202 

203 def _g_create(self): 

204 """Create a new array in file (specific part).""" 

205 

206 if min(self.shape) < 1: 

207 raise ValueError( 

208 "shape parameter cannot have zero-dimensions.") 

209 # Finish the common part of creation process 

210 return self._g_create_common(self.nrows) 

211 

212 def _g_create_common(self, expectedrows): 

213 """Create a new array in file (common part).""" 

214 

215 self._v_version = obversion 

216 

217 if self._v_chunkshape is None: 

218 # Compute the optimal chunk size 

219 self._v_chunkshape = self._calc_chunkshape( 

220 expectedrows, self.rowsize, self.atom.size) 

221 # Compute the optimal nrowsinbuf 

222 self.nrowsinbuf = self._calc_nrowsinbuf() 

223 # Correct the byteorder if needed 

224 if self.byteorder is None: 

225 self.byteorder = correct_byteorder(self.atom.type, sys.byteorder) 

226 

227 try: 

228 # ``self._v_objectid`` needs to be set because would be 

229 # needed for setting attributes in some descendants later 

230 # on 

231 self._v_objectid = self._create_carray(self._v_new_title) 

232 except Exception: # XXX 

233 # Problems creating the Array on disk. Close node and re-raise. 

234 self.close(flush=0) 

235 raise 

236 

237 return self._v_objectid 

238 

239 def _g_copy_with_stats(self, group, name, start, stop, step, 

240 title, filters, chunkshape, _log, **kwargs): 

241 """Private part of Leaf.copy() for each kind of leaf.""" 

242 

243 (start, stop, step) = self._process_range_read(start, stop, step) 

244 maindim = self.maindim 

245 shape = list(self.shape) 

246 shape[maindim] = len(range(start, stop, step)) 

247 # Now, fill the new carray with values from source 

248 nrowsinbuf = self.nrowsinbuf 

249 # The slices parameter for self.__getitem__ 

250 slices = [slice(0, dim, 1) for dim in self.shape] 

251 # This is a hack to prevent doing unnecessary conversions 

252 # when copying buffers 

253 self._v_convert = False 

254 # Build the new CArray object 

255 object = CArray(group, name, atom=self.atom, shape=shape, 

256 title=title, filters=filters, chunkshape=chunkshape, 

257 _log=_log) 

258 # Start the copy itself 

259 for start2 in range(start, stop, step * nrowsinbuf): 

260 # Save the records on disk 

261 stop2 = start2 + step * nrowsinbuf 

262 if stop2 > stop: 

263 stop2 = stop 

264 # Set the proper slice in the main dimension 

265 slices[maindim] = slice(start2, stop2, step) 

266 start3 = (start2 - start) // step 

267 stop3 = start3 + nrowsinbuf 

268 if stop3 > shape[maindim]: 

269 stop3 = shape[maindim] 

270 # The next line should be generalised if, in the future, 

271 # maindim is designed to be different from 0 in CArrays. 

272 # See ticket #199. 

273 object[start3:stop3] = self.__getitem__(tuple(slices)) 

274 # Activate the conversion again (default) 

275 self._v_convert = True 

276 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.size 

277 

278 return (object, nbytes)