Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/carray.py: 14%

1"""Here is defined the CArray class."""

3import sys

5import numpy as np

7from .atom import Atom

8from .array import Array

9from .utils import correct_byteorder, SizeType

12# default version for CARRAY objects

13# obversion = "1.0" # Support for time & enumerated datatypes.

14obversion = "1.1" # Numeric and numarray flavors are gone.

17class CArray(Array):

18 """This class represents homogeneous datasets in an HDF5 file.

20 The difference between a CArray and a normal Array (see

21 :ref:`ArrayClassDescr`), from which it inherits, is that a CArray

22 has a chunked layout and, as a consequence, it supports compression.

23 You can use datasets of this class to easily save or load arrays to

24 or from disk, with compression support included.

26 CArray includes all the instance variables and methods of Array.

27 Only those with different behavior are mentioned here.

29 Parameters

30 ----------

31 parentnode

32 The parent :class:`Group` object.

34 .. versionchanged:: 3.0

35 Renamed from *parentNode* to *parentnode*.

37 name : str

38 The name of this node in its parent group.

39 atom

40 An `Atom` instance representing the *type* and *shape* of

41 the atomic objects to be saved.

43 shape

44 The shape of the new array.

46 title

47 A description for this node (it sets the ``TITLE`` HDF5

48 attribute on disk).

50 filters

51 An instance of the `Filters` class that provides

52 information about the desired I/O filters to be applied

53 during the life of this object.

55 chunkshape

56 The shape of the data chunk to be read or written in a

57 single HDF5 I/O operation. Filters are applied to those

58 chunks of data. The dimensionality of `chunkshape` must

59 be the same as that of `shape`. If ``None``, a sensible

60 value is calculated (which is recommended).

62 byteorder

63 The byteorder of the data *on disk*, specified as 'little'

64 or 'big'. If this is not specified, the byteorder is that

65 of the platform.

67 track_times

68 Whether time data associated with the leaf are recorded (object

69 access time, raw data modification time, metadata change time, object

70 birth time); default True. Semantics of these times depend on their

71 implementation in the HDF5 library: refer to documentation of the

72 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata

73 change time) is implemented.

75 .. versionadded:: 3.4.3

77 Examples

78 --------

80 See below a small example of the use of the `CArray` class.

81 The code is available in ``examples/carray1.py``::

83 import numpy as np

84 import tables as tb

86 fileName = 'carray1.h5'

87 shape = (200, 300)

88 atom = tb.UInt8Atom()

89 filters = tb.Filters(complevel=5, complib='zlib')

91 h5f = tb.open_file(fileName, 'w')

92 ca = h5f.create_carray(h5f.root, 'carray', atom, shape,

93 filters=filters)

95 # Fill a hyperslab in ``ca``.

96 ca[10:60, 20:70] = np.ones((50, 50))

97 h5f.close()

99 # Re-open a read another hyperslab

100 h5f = tb.open_file(fileName)

101 print(h5f)

102 print(h5f.root.carray[8:12, 18:22])

103 h5f.close()

104

105 The output for the previous script is something like::

106

107 carray1.h5 (File) ''

108 Last modif.: 'Thu Apr 12 10:15:38 2007'

109 Object Tree:

110 / (RootGroup) ''

111 /carray (CArray(200, 300), shuffle, zlib(5)) ''

112

113 [[0 0 0 0]

114 [0 0 0 0]

115 [0 0 1 1]

116 [0 0 1 1]]

117

118 """

119

120 # Class identifier.

121 _c_classid = 'CARRAY'

122

123 def __init__(self, parentnode, name,

124 atom=None, shape=None,

125 title="", filters=None,

126 chunkshape=None, byteorder=None,

127 _log=True, track_times=True):

128

129 self.atom = atom

130 """An `Atom` instance representing the shape, type of the atomic

131 objects to be saved.

132 """

133 self.shape = None

134 """The shape of the stored array."""

135 self.extdim = -1 # `CArray` objects are not enlargeable by default

136 """The index of the enlargeable dimension."""

137

138 # Other private attributes

139 self._v_version = None

140 """The object version of this array."""

141 self._v_new = new = atom is not None

142 """Is this the first time the node has been created?"""

143 self._v_new_title = title

144 """New title for this node."""

145 self._v_convert = True

146 """Whether the ``Array`` object must be converted or not."""

147 self._v_chunkshape = chunkshape

148 """Private storage for the `chunkshape` property of the leaf."""

149

150 # Miscellaneous iteration rubbish.

151 self._start = None

152 """Starting row for the current iteration."""

153 self._stop = None

154 """Stopping row for the current iteration."""

155 self._step = None

156 """Step size for the current iteration."""

157 self._nrowsread = None

158 """Number of rows read up to the current state of iteration."""

159 self._startb = None

160 """Starting row for current buffer."""

161 self._stopb = None

162 """Stopping row for current buffer. """

163 self._row = None

164 """Current row in iterators (sentinel)."""

165 self._init = False

166 """Whether we are in the middle of an iteration or not (sentinel)."""

167 self.listarr = None

168 """Current buffer in iterators."""

169

170 if new:

171 if not isinstance(atom, Atom):

172 raise ValueError("atom parameter should be an instance of "

173 "tables.Atom and you passed a %s." %

174 type(atom))

175 if shape is None:

176 raise ValueError("you must specify a non-empty shape")

177 try:

178 shape = tuple(shape)

179 except TypeError:

180 raise TypeError("`shape` parameter must be a sequence "

181 "and you passed a %s" % type(shape))

182 self.shape = tuple(SizeType(s) for s in shape)

183

184 if chunkshape is not None:

185 try:

186 chunkshape = tuple(chunkshape)

187 except TypeError:

188 raise TypeError(

189 "`chunkshape` parameter must be a sequence "

190 "and you passed a %s" % type(chunkshape))

191 if len(shape) != len(chunkshape):

192 raise ValueError(f"the shape ({shape}) and chunkshape "

193 f"({chunkshape}) ranks must be equal.")

194 elif min(chunkshape) < 1:

195 raise ValueError("chunkshape parameter cannot have "

196 "zero-dimensions.")

197 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

198

199 # The `Array` class is not abstract enough! :(

200 super(Array, self).__init__(parentnode, name, new, filters,

201 byteorder, _log, track_times)

202

203 def _g_create(self):

204 """Create a new array in file (specific part)."""

205

206 if min(self.shape) < 1:

207 raise ValueError(

208 "shape parameter cannot have zero-dimensions.")

209 # Finish the common part of creation process

210 return self._g_create_common(self.nrows)

211

212 def _g_create_common(self, expectedrows):

213 """Create a new array in file (common part)."""

214

215 self._v_version = obversion

216

217 if self._v_chunkshape is None:

218 # Compute the optimal chunk size

219 self._v_chunkshape = self._calc_chunkshape(

220 expectedrows, self.rowsize, self.atom.size)

221 # Compute the optimal nrowsinbuf

222 self.nrowsinbuf = self._calc_nrowsinbuf()

223 # Correct the byteorder if needed

224 if self.byteorder is None:

225 self.byteorder = correct_byteorder(self.atom.type, sys.byteorder)

226

227 try:

228 # ``self._v_objectid`` needs to be set because would be

229 # needed for setting attributes in some descendants later

230 # on

231 self._v_objectid = self._create_carray(self._v_new_title)

232 except Exception: # XXX

233 # Problems creating the Array on disk. Close node and re-raise.

234 self.close(flush=0)

235 raise

236

237 return self._v_objectid

238

239 def _g_copy_with_stats(self, group, name, start, stop, step,

240 title, filters, chunkshape, _log, **kwargs):

241 """Private part of Leaf.copy() for each kind of leaf."""

242

243 (start, stop, step) = self._process_range_read(start, stop, step)

244 maindim = self.maindim

245 shape = list(self.shape)

246 shape[maindim] = len(range(start, stop, step))

247 # Now, fill the new carray with values from source

248 nrowsinbuf = self.nrowsinbuf

249 # The slices parameter for self.__getitem__

250 slices = [slice(0, dim, 1) for dim in self.shape]

251 # This is a hack to prevent doing unnecessary conversions

252 # when copying buffers

253 self._v_convert = False

254 # Build the new CArray object

255 object = CArray(group, name, atom=self.atom, shape=shape,

256 title=title, filters=filters, chunkshape=chunkshape,

257 _log=_log)

258 # Start the copy itself

259 for start2 in range(start, stop, step * nrowsinbuf):

260 # Save the records on disk

261 stop2 = start2 + step * nrowsinbuf

262 if stop2 > stop:

263 stop2 = stop

264 # Set the proper slice in the main dimension

265 slices[maindim] = slice(start2, stop2, step)

266 start3 = (start2 - start) // step

267 stop3 = start3 + nrowsinbuf

268 if stop3 > shape[maindim]:

269 stop3 = shape[maindim]

270 # The next line should be generalised if, in the future,

271 # maindim is designed to be different from 0 in CArrays.

272 # See ticket #199.

273 object[start3:stop3] = self.__getitem__(tuple(slices))

274 # Activate the conversion again (default)

275 self._v_convert = True

276 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.size

277

278 return (object, nbytes)