Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/earray.py: 21%
53 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Here is defined the EArray class."""
3import numpy as np
5from .utils import convert_to_np_atom2, SizeType
6from .carray import CArray
9# default version for EARRAY objects
10# obversion = "1.0" # initial version
11# obversion = "1.1" # support for complex datatypes
12# obversion = "1.2" # This adds support for time datatypes.
13# obversion = "1.3" # This adds support for enumerated datatypes.
14obversion = "1.4" # Numeric and numarray flavors are gone.
17class EArray(CArray):
18 """This class represents extendable, homogeneous datasets in an HDF5 file.
20 The main difference between an EArray and a CArray (see
21 :ref:`CArrayClassDescr`), from which it inherits, is that the former
22 can be enlarged along one of its dimensions, the *enlargeable
23 dimension*. That means that the :attr:`Leaf.extdim` attribute (see
24 :class:`Leaf`) of any EArray instance will always be non-negative.
25 Multiple enlargeable dimensions might be supported in the future.
27 New rows can be added to the end of an enlargeable array by using the
28 :meth:`EArray.append` method.
30 Parameters
31 ----------
32 parentnode
33 The parent :class:`Group` object.
35 .. versionchanged:: 3.0
36 Renamed from *parentNode* to *parentnode*.
38 name : str
39 The name of this node in its parent group.
41 atom
42 An `Atom` instance representing the *type* and *shape*
43 of the atomic objects to be saved.
45 shape
46 The shape of the new array. One (and only one) of
47 the shape dimensions *must* be 0. The dimension being 0
48 means that the resulting `EArray` object can be extended
49 along it. Multiple enlargeable dimensions are not supported
50 right now.
52 title
53 A description for this node (it sets the ``TITLE``
54 HDF5 attribute on disk).
56 filters
57 An instance of the `Filters` class that provides information
58 about the desired I/O filters to be applied during the life
59 of this object.
61 expectedrows
62 A user estimate about the number of row elements that will
63 be added to the growable dimension in the `EArray` node.
64 If not provided, the default value is ``EXPECTED_ROWS_EARRAY``
65 (see ``tables/parameters.py``). If you plan to create either
66 a much smaller or a much bigger `EArray` try providing a guess;
67 this will optimize the HDF5 B-Tree creation and management
68 process time and the amount of memory used.
70 chunkshape
71 The shape of the data chunk to be read or written in a single
72 HDF5 I/O operation. Filters are applied to those chunks of data.
73 The dimensionality of `chunkshape` must be the same as that of
74 `shape` (beware: no dimension should be 0 this time!).
75 If ``None``, a sensible value is calculated based on the
76 `expectedrows` parameter (which is recommended).
78 byteorder
79 The byteorder of the data *on disk*, specified as 'little' or
80 'big'. If this is not specified, the byteorder is that of the
81 platform.
83 track_times
84 Whether time data associated with the leaf are recorded (object
85 access time, raw data modification time, metadata change time, object
86 birth time); default True. Semantics of these times depend on their
87 implementation in the HDF5 library: refer to documentation of the
88 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata
89 change time) is implemented.
91 .. versionadded:: 3.4.3
93 Examples
94 --------
96 See below a small example of the use of the `EArray` class. The
97 code is available in ``examples/earray1.py``::
99 import numpy as np
100 import tables as tb
102 fileh = tb.open_file('earray1.h5', mode='w')
103 a = tb.StringAtom(itemsize=8)
105 # Use ``a`` as the object type for the enlargeable array.
106 array_c = fileh.create_earray(fileh.root, 'array_c', a, (0,),
107 \"Chars\")
108 array_c.append(np.array(['a'*2, 'b'*4], dtype='S8'))
109 array_c.append(np.array(['a'*6, 'b'*8, 'c'*10], dtype='S8'))
111 # Read the string ``EArray`` we have created on disk.
112 for s in array_c:
113 print('array_c[%s] => %r' % (array_c.nrow, s))
114 # Close the file.
115 fileh.close()
117 The output for the previous script is something like::
119 array_c[0] => 'aa'
120 array_c[1] => 'bbbb'
121 array_c[2] => 'aaaaaa'
122 array_c[3] => 'bbbbbbbb'
123 array_c[4] => 'cccccccc'
125 """
127 # Class identifier.
128 _c_classid = 'EARRAY'
130 def __init__(self, parentnode, name,
131 atom=None, shape=None, title="",
132 filters=None, expectedrows=None,
133 chunkshape=None, byteorder=None,
134 _log=True, track_times=True):
136 # Specific of EArray
137 if expectedrows is None:
138 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_EARRAY']
139 self._v_expectedrows = expectedrows
140 """The expected number of rows to be stored in the array."""
142 # Call the parent (CArray) init code
143 super().__init__(parentnode, name, atom, shape, title, filters,
144 chunkshape, byteorder, _log, track_times)
146 def _g_create(self):
147 """Create a new array in file (specific part)."""
149 # Pre-conditions and extdim computation
150 zerodims = np.sum(np.array(self.shape) == 0)
151 if zerodims > 0:
152 if zerodims == 1:
153 self.extdim = list(self.shape).index(0)
154 else:
155 raise NotImplementedError(
156 "Multiple enlargeable (0-)dimensions are not "
157 "supported.")
158 else:
159 raise ValueError(
160 "When creating EArrays, you need to set one of "
161 "the dimensions of the Atom instance to zero.")
163 # Finish the common part of the creation process
164 return self._g_create_common(self._v_expectedrows)
166 def _check_shape_append(self, nparr):
167 """Test that nparr shape is consistent with underlying EArray."""
169 # The arrays conforms self expandibility?
170 myrank = len(self.shape)
171 narank = len(nparr.shape) - len(self.atom.shape)
172 if myrank != narank:
173 raise ValueError(("the ranks of the appended object (%d) and the "
174 "``%s`` EArray (%d) differ")
175 % (narank, self._v_pathname, myrank))
176 for i in range(myrank):
177 if i != self.extdim and self.shape[i] != nparr.shape[i]:
178 raise ValueError(("the shapes of the appended object and the "
179 "``%s`` EArray differ in non-enlargeable "
180 "dimension %d") % (self._v_pathname, i))
182 def append(self, sequence):
183 """Add a sequence of data to the end of the dataset.
185 The sequence must have the same type as the array; otherwise a
186 TypeError is raised. In the same way, the dimensions of the
187 sequence must conform to the shape of the array, that is, all
188 dimensions must match, with the exception of the enlargeable
189 dimension, which can be of any length (even 0!). If the shape
190 of the sequence is invalid, a ValueError is raised.
192 """
194 self._g_check_open()
195 self._v_file._check_writable()
197 # Convert the sequence into a NumPy object
198 nparr = convert_to_np_atom2(sequence, self.atom)
199 # Check if it has a consistent shape with underlying EArray
200 self._check_shape_append(nparr)
201 # If the size of the nparr is zero, don't do anything else
202 if nparr.size > 0:
203 self._append(nparr)
205 def _g_copy_with_stats(self, group, name, start, stop, step,
206 title, filters, chunkshape, _log, **kwargs):
207 """Private part of Leaf.copy() for each kind of leaf."""
209 (start, stop, step) = self._process_range_read(start, stop, step)
210 # Build the new EArray object
211 maindim = self.maindim
212 shape = list(self.shape)
213 shape[maindim] = 0
214 # The number of final rows
215 nrows = len(range(start, stop, step))
216 # Build the new EArray object
217 object = EArray(
218 group, name, atom=self.atom, shape=shape, title=title,
219 filters=filters, expectedrows=nrows, chunkshape=chunkshape,
220 _log=_log)
221 # Now, fill the new earray with values from source
222 nrowsinbuf = self.nrowsinbuf
223 # The slices parameter for self.__getitem__
224 slices = [slice(0, dim, 1) for dim in self.shape]
225 # This is a hack to prevent doing unnecessary conversions
226 # when copying buffers
227 self._v_convert = False
228 # Start the copy itself
229 for start2 in range(start, stop, step * nrowsinbuf):
230 # Save the records on disk
231 stop2 = start2 + step * nrowsinbuf
232 if stop2 > stop:
233 stop2 = stop
234 # Set the proper slice in the extensible dimension
235 slices[maindim] = slice(start2, stop2, step)
236 object._append(self.__getitem__(tuple(slices)))
237 # Active the conversion again (default)
238 self._v_convert = True
239 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.itemsize
241 return (object, nbytes)