Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/carray.py: 14%
86 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Here is defined the CArray class."""
3import sys
5import numpy as np
7from .atom import Atom
8from .array import Array
9from .utils import correct_byteorder, SizeType
12# default version for CARRAY objects
13# obversion = "1.0" # Support for time & enumerated datatypes.
14obversion = "1.1" # Numeric and numarray flavors are gone.
17class CArray(Array):
18 """This class represents homogeneous datasets in an HDF5 file.
20 The difference between a CArray and a normal Array (see
21 :ref:`ArrayClassDescr`), from which it inherits, is that a CArray
22 has a chunked layout and, as a consequence, it supports compression.
23 You can use datasets of this class to easily save or load arrays to
24 or from disk, with compression support included.
26 CArray includes all the instance variables and methods of Array.
27 Only those with different behavior are mentioned here.
29 Parameters
30 ----------
31 parentnode
32 The parent :class:`Group` object.
34 .. versionchanged:: 3.0
35 Renamed from *parentNode* to *parentnode*.
37 name : str
38 The name of this node in its parent group.
39 atom
40 An `Atom` instance representing the *type* and *shape* of
41 the atomic objects to be saved.
43 shape
44 The shape of the new array.
46 title
47 A description for this node (it sets the ``TITLE`` HDF5
48 attribute on disk).
50 filters
51 An instance of the `Filters` class that provides
52 information about the desired I/O filters to be applied
53 during the life of this object.
55 chunkshape
56 The shape of the data chunk to be read or written in a
57 single HDF5 I/O operation. Filters are applied to those
58 chunks of data. The dimensionality of `chunkshape` must
59 be the same as that of `shape`. If ``None``, a sensible
60 value is calculated (which is recommended).
62 byteorder
63 The byteorder of the data *on disk*, specified as 'little'
64 or 'big'. If this is not specified, the byteorder is that
65 of the platform.
67 track_times
68 Whether time data associated with the leaf are recorded (object
69 access time, raw data modification time, metadata change time, object
70 birth time); default True. Semantics of these times depend on their
71 implementation in the HDF5 library: refer to documentation of the
72 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata
73 change time) is implemented.
75 .. versionadded:: 3.4.3
77 Examples
78 --------
80 See below a small example of the use of the `CArray` class.
81 The code is available in ``examples/carray1.py``::
83 import numpy as np
84 import tables as tb
86 fileName = 'carray1.h5'
87 shape = (200, 300)
88 atom = tb.UInt8Atom()
89 filters = tb.Filters(complevel=5, complib='zlib')
91 h5f = tb.open_file(fileName, 'w')
92 ca = h5f.create_carray(h5f.root, 'carray', atom, shape,
93 filters=filters)
95 # Fill a hyperslab in ``ca``.
96 ca[10:60, 20:70] = np.ones((50, 50))
97 h5f.close()
99 # Re-open a read another hyperslab
100 h5f = tb.open_file(fileName)
101 print(h5f)
102 print(h5f.root.carray[8:12, 18:22])
103 h5f.close()
105 The output for the previous script is something like::
107 carray1.h5 (File) ''
108 Last modif.: 'Thu Apr 12 10:15:38 2007'
109 Object Tree:
110 / (RootGroup) ''
111 /carray (CArray(200, 300), shuffle, zlib(5)) ''
113 [[0 0 0 0]
114 [0 0 0 0]
115 [0 0 1 1]
116 [0 0 1 1]]
118 """
120 # Class identifier.
121 _c_classid = 'CARRAY'
123 def __init__(self, parentnode, name,
124 atom=None, shape=None,
125 title="", filters=None,
126 chunkshape=None, byteorder=None,
127 _log=True, track_times=True):
129 self.atom = atom
130 """An `Atom` instance representing the shape, type of the atomic
131 objects to be saved.
132 """
133 self.shape = None
134 """The shape of the stored array."""
135 self.extdim = -1 # `CArray` objects are not enlargeable by default
136 """The index of the enlargeable dimension."""
138 # Other private attributes
139 self._v_version = None
140 """The object version of this array."""
141 self._v_new = new = atom is not None
142 """Is this the first time the node has been created?"""
143 self._v_new_title = title
144 """New title for this node."""
145 self._v_convert = True
146 """Whether the ``Array`` object must be converted or not."""
147 self._v_chunkshape = chunkshape
148 """Private storage for the `chunkshape` property of the leaf."""
150 # Miscellaneous iteration rubbish.
151 self._start = None
152 """Starting row for the current iteration."""
153 self._stop = None
154 """Stopping row for the current iteration."""
155 self._step = None
156 """Step size for the current iteration."""
157 self._nrowsread = None
158 """Number of rows read up to the current state of iteration."""
159 self._startb = None
160 """Starting row for current buffer."""
161 self._stopb = None
162 """Stopping row for current buffer. """
163 self._row = None
164 """Current row in iterators (sentinel)."""
165 self._init = False
166 """Whether we are in the middle of an iteration or not (sentinel)."""
167 self.listarr = None
168 """Current buffer in iterators."""
170 if new:
171 if not isinstance(atom, Atom):
172 raise ValueError("atom parameter should be an instance of "
173 "tables.Atom and you passed a %s." %
174 type(atom))
175 if shape is None:
176 raise ValueError("you must specify a non-empty shape")
177 try:
178 shape = tuple(shape)
179 except TypeError:
180 raise TypeError("`shape` parameter must be a sequence "
181 "and you passed a %s" % type(shape))
182 self.shape = tuple(SizeType(s) for s in shape)
184 if chunkshape is not None:
185 try:
186 chunkshape = tuple(chunkshape)
187 except TypeError:
188 raise TypeError(
189 "`chunkshape` parameter must be a sequence "
190 "and you passed a %s" % type(chunkshape))
191 if len(shape) != len(chunkshape):
192 raise ValueError(f"the shape ({shape}) and chunkshape "
193 f"({chunkshape}) ranks must be equal.")
194 elif min(chunkshape) < 1:
195 raise ValueError("chunkshape parameter cannot have "
196 "zero-dimensions.")
197 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)
199 # The `Array` class is not abstract enough! :(
200 super(Array, self).__init__(parentnode, name, new, filters,
201 byteorder, _log, track_times)
203 def _g_create(self):
204 """Create a new array in file (specific part)."""
206 if min(self.shape) < 1:
207 raise ValueError(
208 "shape parameter cannot have zero-dimensions.")
209 # Finish the common part of creation process
210 return self._g_create_common(self.nrows)
212 def _g_create_common(self, expectedrows):
213 """Create a new array in file (common part)."""
215 self._v_version = obversion
217 if self._v_chunkshape is None:
218 # Compute the optimal chunk size
219 self._v_chunkshape = self._calc_chunkshape(
220 expectedrows, self.rowsize, self.atom.size)
221 # Compute the optimal nrowsinbuf
222 self.nrowsinbuf = self._calc_nrowsinbuf()
223 # Correct the byteorder if needed
224 if self.byteorder is None:
225 self.byteorder = correct_byteorder(self.atom.type, sys.byteorder)
227 try:
228 # ``self._v_objectid`` needs to be set because would be
229 # needed for setting attributes in some descendants later
230 # on
231 self._v_objectid = self._create_carray(self._v_new_title)
232 except Exception: # XXX
233 # Problems creating the Array on disk. Close node and re-raise.
234 self.close(flush=0)
235 raise
237 return self._v_objectid
239 def _g_copy_with_stats(self, group, name, start, stop, step,
240 title, filters, chunkshape, _log, **kwargs):
241 """Private part of Leaf.copy() for each kind of leaf."""
243 (start, stop, step) = self._process_range_read(start, stop, step)
244 maindim = self.maindim
245 shape = list(self.shape)
246 shape[maindim] = len(range(start, stop, step))
247 # Now, fill the new carray with values from source
248 nrowsinbuf = self.nrowsinbuf
249 # The slices parameter for self.__getitem__
250 slices = [slice(0, dim, 1) for dim in self.shape]
251 # This is a hack to prevent doing unnecessary conversions
252 # when copying buffers
253 self._v_convert = False
254 # Build the new CArray object
255 object = CArray(group, name, atom=self.atom, shape=shape,
256 title=title, filters=filters, chunkshape=chunkshape,
257 _log=_log)
258 # Start the copy itself
259 for start2 in range(start, stop, step * nrowsinbuf):
260 # Save the records on disk
261 stop2 = start2 + step * nrowsinbuf
262 if stop2 > stop:
263 stop2 = stop
264 # Set the proper slice in the main dimension
265 slices[maindim] = slice(start2, stop2, step)
266 start3 = (start2 - start) // step
267 stop3 = start3 + nrowsinbuf
268 if stop3 > shape[maindim]:
269 stop3 = shape[maindim]
270 # The next line should be generalised if, in the future,
271 # maindim is designed to be different from 0 in CArrays.
272 # See ticket #199.
273 object[start3:stop3] = self.__getitem__(tuple(slices))
274 # Activate the conversion again (default)
275 self._v_convert = True
276 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.size
278 return (object, nbytes)