Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/earray.py: 21%

1"""Here is defined the EArray class."""

3import numpy as np

5from .utils import convert_to_np_atom2, SizeType

6from .carray import CArray

9# default version for EARRAY objects

10# obversion = "1.0" # initial version

11# obversion = "1.1" # support for complex datatypes

12# obversion = "1.2" # This adds support for time datatypes.

13# obversion = "1.3" # This adds support for enumerated datatypes.

14obversion = "1.4" # Numeric and numarray flavors are gone.

17class EArray(CArray):

18 """This class represents extendable, homogeneous datasets in an HDF5 file.

20 The main difference between an EArray and a CArray (see

21 :ref:`CArrayClassDescr`), from which it inherits, is that the former

22 can be enlarged along one of its dimensions, the *enlargeable

23 dimension*. That means that the :attr:`Leaf.extdim` attribute (see

24 :class:`Leaf`) of any EArray instance will always be non-negative.

25 Multiple enlargeable dimensions might be supported in the future.

27 New rows can be added to the end of an enlargeable array by using the

28 :meth:`EArray.append` method.

30 Parameters

31 ----------

32 parentnode

33 The parent :class:`Group` object.

35 .. versionchanged:: 3.0

36 Renamed from *parentNode* to *parentnode*.

38 name : str

39 The name of this node in its parent group.

41 atom

42 An `Atom` instance representing the *type* and *shape*

43 of the atomic objects to be saved.

45 shape

46 The shape of the new array. One (and only one) of

47 the shape dimensions *must* be 0. The dimension being 0

48 means that the resulting `EArray` object can be extended

49 along it. Multiple enlargeable dimensions are not supported

50 right now.

52 title

53 A description for this node (it sets the ``TITLE``

54 HDF5 attribute on disk).

56 filters

57 An instance of the `Filters` class that provides information

58 about the desired I/O filters to be applied during the life

59 of this object.

61 expectedrows

62 A user estimate about the number of row elements that will

63 be added to the growable dimension in the `EArray` node.

64 If not provided, the default value is ``EXPECTED_ROWS_EARRAY``

65 (see ``tables/parameters.py``). If you plan to create either

66 a much smaller or a much bigger `EArray` try providing a guess;

67 this will optimize the HDF5 B-Tree creation and management

68 process time and the amount of memory used.

70 chunkshape

71 The shape of the data chunk to be read or written in a single

72 HDF5 I/O operation. Filters are applied to those chunks of data.

73 The dimensionality of `chunkshape` must be the same as that of

74 `shape` (beware: no dimension should be 0 this time!).

75 If ``None``, a sensible value is calculated based on the

76 `expectedrows` parameter (which is recommended).

78 byteorder

79 The byteorder of the data *on disk*, specified as 'little' or

80 'big'. If this is not specified, the byteorder is that of the

81 platform.

83 track_times

84 Whether time data associated with the leaf are recorded (object

85 access time, raw data modification time, metadata change time, object

86 birth time); default True. Semantics of these times depend on their

87 implementation in the HDF5 library: refer to documentation of the

88 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata

89 change time) is implemented.

91 .. versionadded:: 3.4.3

93 Examples

94 --------

96 See below a small example of the use of the `EArray` class. The

97 code is available in ``examples/earray1.py``::

99 import numpy as np

100 import tables as tb

101

102 fileh = tb.open_file('earray1.h5', mode='w')

103 a = tb.StringAtom(itemsize=8)

104

105 # Use ``a`` as the object type for the enlargeable array.

106 array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,),

107 \"Chars\")

108 array_c.append(np.array(['a'*2, 'b'*4], dtype='S8'))

109 array_c.append(np.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))

110

111 # Read the string ``EArray`` we have created on disk.

112 for s in array_c:

113 print('array_c[%s] => %r' % (array_c.nrow, s))

114 # Close the file.

115 fileh.close()

116

117 The output for the previous script is something like::

118

119 array_c[0] => 'aa'

120 array_c[1] => 'bbbb'

121 array_c[2] => 'aaaaaa'

122 array_c[3] => 'bbbbbbbb'

123 array_c[4] => 'cccccccc'

124

125 """

126

127 # Class identifier.

128 _c_classid = 'EARRAY'

129

130 def __init__(self, parentnode, name,

131 atom=None, shape=None, title="",

132 filters=None, expectedrows=None,

133 chunkshape=None, byteorder=None,

134 _log=True, track_times=True):

135

136 # Specific of EArray

137 if expectedrows is None:

138 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY']

139 self._v_expectedrows = expectedrows

140 """The expected number of rows to be stored in the array."""

141

142 # Call the parent (CArray) init code

143 super().__init__(parentnode, name, atom, shape, title, filters,

144 chunkshape, byteorder, _log, track_times)

145

146 def _g_create(self):

147 """Create a new array in file (specific part)."""

148

149 # Pre-conditions and extdim computation

150 zerodims = np.sum(np.array(self.shape) == 0)

151 if zerodims > 0:

152 if zerodims == 1:

153 self.extdim = list(self.shape).index(0)

154 else:

155 raise NotImplementedError(

156 "Multiple enlargeable (0-)dimensions are not "

157 "supported.")

158 else:

159 raise ValueError(

160 "When creating EArrays, you need to set one of "

161 "the dimensions of the Atom instance to zero.")

162

163 # Finish the common part of the creation process

164 return self._g_create_common(self._v_expectedrows)

165

166 def _check_shape_append(self, nparr):

167 """Test that nparr shape is consistent with underlying EArray."""

168

169 # The arrays conforms self expandibility?

170 myrank = len(self.shape)

171 narank = len(nparr.shape) - len(self.atom.shape)

172 if myrank != narank:

173 raise ValueError(("the ranks of the appended object (%d) and the "

174 "``%s`` EArray (%d) differ")

175 % (narank, self._v_pathname, myrank))

176 for i in range(myrank):

177 if i != self.extdim and self.shape[i] != nparr.shape[i]:

178 raise ValueError(("the shapes of the appended object and the "

179 "``%s`` EArray differ in non-enlargeable "

180 "dimension %d") % (self._v_pathname, i))

181

182 def append(self, sequence):

183 """Add a sequence of data to the end of the dataset.

184

185 The sequence must have the same type as the array; otherwise a

186 TypeError is raised. In the same way, the dimensions of the

187 sequence must conform to the shape of the array, that is, all

188 dimensions must match, with the exception of the enlargeable

189 dimension, which can be of any length (even 0!). If the shape

190 of the sequence is invalid, a ValueError is raised.

191

192 """

193

194 self._g_check_open()

195 self._v_file._check_writable()

196

197 # Convert the sequence into a NumPy object

198 nparr = convert_to_np_atom2(sequence, self.atom)

199 # Check if it has a consistent shape with underlying EArray

200 self._check_shape_append(nparr)

201 # If the size of the nparr is zero, don't do anything else

202 if nparr.size > 0:

203 self._append(nparr)

204

205 def _g_copy_with_stats(self, group, name, start, stop, step,

206 title, filters, chunkshape, _log, **kwargs):

207 """Private part of Leaf.copy() for each kind of leaf."""

208

209 (start, stop, step) = self._process_range_read(start, stop, step)

210 # Build the new EArray object

211 maindim = self.maindim

212 shape = list(self.shape)

213 shape[maindim] = 0

214 # The number of final rows

215 nrows = len(range(start, stop, step))

216 # Build the new EArray object

217 object = EArray(

218 group, name, atom=self.atom, shape=shape, title=title,

219 filters=filters, expectedrows=nrows, chunkshape=chunkshape,

220 _log=_log)

221 # Now, fill the new earray with values from source

222 nrowsinbuf = self.nrowsinbuf

223 # The slices parameter for self.__getitem__

224 slices = [slice(0, dim, 1) for dim in self.shape]

225 # This is a hack to prevent doing unnecessary conversions

226 # when copying buffers

227 self._v_convert = False

228 # Start the copy itself

229 for start2 in range(start, stop, step * nrowsinbuf):

230 # Save the records on disk

231 stop2 = start2 + step * nrowsinbuf

232 if stop2 > stop:

233 stop2 = stop

234 # Set the proper slice in the extensible dimension

235 slices[maindim] = slice(start2, stop2, step)

236 object._append(self.__getitem__(tuple(slices)))

237 # Active the conversion again (default)

238 self._v_convert = True

239 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.itemsize

240

241 return (object, nbytes)