Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/array.py: 11%
392 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Here is defined the Array class."""
3import operator
4import sys
5import numpy as np
7from . import hdf5extension
8from .filters import Filters
9from .flavor import flavor_of, array_as_internal, internal_to_flavor
10from .leaf import Leaf
11from .utils import (is_idx, convert_to_np_atom2, SizeType, lazyattr,
12 byteorders, quantize)
15# default version for ARRAY objects
16# obversion = "1.0" # initial version
17# obversion = "2.0" # Added an optional EXTDIM attribute
18# obversion = "2.1" # Added support for complex datatypes
19# obversion = "2.2" # This adds support for time datatypes.
20# obversion = "2.3" # This adds support for enumerated datatypes.
21obversion = "2.4" # Numeric and numarray flavors are gone.
24class Array(hdf5extension.Array, Leaf):
25 """This class represents homogeneous datasets in an HDF5 file.
27 This class provides methods to write or read data to or from array objects
28 in the file. This class does not allow you neither to enlarge nor compress
29 the datasets on disk; use the EArray class (see :ref:`EArrayClassDescr`) if
30 you want enlargeable dataset support or compression features, or CArray
31 (see :ref:`CArrayClassDescr`) if you just want compression.
33 An interesting property of the Array class is that it remembers the
34 *flavor* of the object that has been saved so that if you saved, for
35 example, a list, you will get a list during readings afterwards; if you
36 saved a NumPy array, you will get a NumPy object, and so forth.
38 Note that this class inherits all the public attributes and methods that
39 Leaf (see :ref:`LeafClassDescr`) already provides. However, as Array
40 instances have no internal I/O buffers, it is not necessary to use the
41 flush() method they inherit from Leaf in order to save their internal state
42 to disk. When a writing method call returns, all the data is already on
43 disk.
45 Parameters
46 ----------
47 parentnode
48 The parent :class:`Group` object.
50 .. versionchanged:: 3.0
51 Renamed from *parentNode* to *parentnode*
53 name : str
54 The name of this node in its parent group.
55 obj
56 The array or scalar to be saved. Accepted types are NumPy
57 arrays and scalars as well as native Python sequences and
58 scalars, provided that values are regular (i.e. they are not
59 like ``[[1,2],2]``) and homogeneous (i.e. all the elements are
60 of the same type).
62 .. versionchanged:: 3.0
63 Renamed form *object* into *obj*.
64 title
65 A description for this node (it sets the ``TITLE`` HDF5 attribute on
66 disk).
67 byteorder
68 The byteorder of the data *on disk*, specified as 'little' or 'big'.
69 If this is not specified, the byteorder is that of the given `object`.
70 track_times
71 Whether time data associated with the leaf are recorded (object
72 access time, raw data modification time, metadata change time, object
73 birth time); default True. Semantics of these times depend on their
74 implementation in the HDF5 library: refer to documentation of the
75 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata
76 change time) is implemented.
78 .. versionadded:: 3.4.3
80 """
82 # Class identifier.
83 _c_classid = 'ARRAY'
85 @lazyattr
86 def dtype(self):
87 """The NumPy ``dtype`` that most closely matches this array."""
88 return self.atom.dtype
90 @property
91 def nrows(self):
92 """The number of rows in the array."""
93 if self.shape == ():
94 return SizeType(1) # scalar case
95 else:
96 return self.shape[self.maindim]
98 @property
99 def rowsize(self):
100 """The size of the rows in bytes in dimensions orthogonal to
101 *maindim*."""
102 maindim = self.maindim
103 rowsize = self.atom.size
104 for i, dim in enumerate(self.shape):
105 if i != maindim:
106 rowsize *= dim
107 return rowsize
109 @property
110 def size_in_memory(self):
111 """The size of this array's data in bytes when it is fully loaded into
112 memory."""
113 return self.nrows * self.rowsize
115 def __init__(self, parentnode, name,
116 obj=None, title="",
117 byteorder=None, _log=True, _atom=None,
118 track_times=True):
120 self._v_version = None
121 """The object version of this array."""
122 self._v_new = new = obj is not None
123 """Is this the first time the node has been created?"""
124 self._v_new_title = title
125 """New title for this node."""
126 self._obj = obj
127 """The object to be stored in the array. It can be any of numpy,
128 list, tuple, string, integer of floating point types, provided
129 that they are regular (i.e. they are not like ``[[1, 2], 2]``).
131 .. versionchanged:: 3.0
132 Renamed form *_object* into *_obj*.
134 """
136 self._v_convert = True
137 """Whether the ``Array`` object must be converted or not."""
139 # Miscellaneous iteration rubbish.
140 self._start = None
141 """Starting row for the current iteration."""
142 self._stop = None
143 """Stopping row for the current iteration."""
144 self._step = None
145 """Step size for the current iteration."""
146 self._nrowsread = None
147 """Number of rows read up to the current state of iteration."""
148 self._startb = None
149 """Starting row for current buffer."""
150 self._stopb = None
151 """Stopping row for current buffer. """
152 self._row = None
153 """Current row in iterators (sentinel)."""
154 self._init = False
155 """Whether we are in the middle of an iteration or not (sentinel)."""
156 self.listarr = None
157 """Current buffer in iterators."""
159 # Documented (*public*) attributes.
160 self.atom = _atom
161 """An Atom (see :ref:`AtomClassDescr`) instance representing the *type*
162 and *shape* of the atomic objects to be saved.
163 """
164 self.shape = None
165 """The shape of the stored array."""
166 self.nrow = None
167 """On iterators, this is the index of the current row."""
168 self.extdim = -1 # ordinary arrays are not enlargeable
169 """The index of the enlargeable dimension."""
171 # Ordinary arrays have no filters: leaf is created with default ones.
172 super().__init__(parentnode, name, new, Filters(), byteorder, _log,
173 track_times)
175 def _g_create(self):
176 """Save a new array in file."""
178 self._v_version = obversion
179 try:
180 # `Leaf._g_post_init_hook()` should be setting the flavor on disk.
181 self._flavor = flavor = flavor_of(self._obj)
182 nparr = array_as_internal(self._obj, flavor)
183 except Exception: # XXX
184 # Problems converting data. Close the node and re-raise exception.
185 self.close(flush=0)
186 raise
188 # Raise an error in case of unsupported object
189 if nparr.dtype.kind in ['V', 'U', 'O']: # in void, unicode, object
190 raise TypeError("Array objects cannot currently deal with void, "
191 "unicode or object arrays")
193 # Decrease the number of references to the object
194 self._obj = None
196 # Fix the byteorder of data
197 nparr = self._g_fix_byteorder_data(nparr, nparr.dtype.byteorder)
199 # Create the array on-disk
200 try:
201 # ``self._v_objectid`` needs to be set because would be
202 # needed for setting attributes in some descendants later
203 # on
204 (self._v_objectid, self.shape, self.atom) = self._create_array(
205 nparr, self._v_new_title, self.atom)
206 except Exception: # XXX
207 # Problems creating the Array on disk. Close node and re-raise.
208 self.close(flush=0)
209 raise
211 # Compute the optimal buffer size
212 self.nrowsinbuf = self._calc_nrowsinbuf()
213 # Arrays don't have chunkshapes (so, set it to None)
214 self._v_chunkshape = None
216 return self._v_objectid
218 def _g_open(self):
219 """Get the metadata info for an array in file."""
221 (oid, self.atom, self.shape, self._v_chunkshape) = self._open_array()
223 self.nrowsinbuf = self._calc_nrowsinbuf()
225 return oid
227 def get_enum(self):
228 """Get the enumerated type associated with this array.
230 If this array is of an enumerated type, the corresponding Enum instance
231 (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated
232 type, a TypeError is raised.
234 """
236 if self.atom.kind != 'enum':
237 raise TypeError("array ``%s`` is not of an enumerated type"
238 % self._v_pathname)
240 return self.atom.enum
242 def iterrows(self, start=None, stop=None, step=None):
243 """Iterate over the rows of the array.
245 This method returns an iterator yielding an object of the current
246 flavor for each selected row in the array. The returned rows are taken
247 from the *main dimension*.
249 If a range is not supplied, *all the rows* in the array are iterated
250 upon - you can also use the :meth:`Array.__iter__` special method for
251 that purpose. If you only want to iterate over a given *range of rows*
252 in the array, you may use the start, stop and step parameters.
254 Examples
255 --------
257 ::
259 result = [row for row in arrayInstance.iterrows(step=4)]
261 .. versionchanged:: 3.0
262 If the *start* parameter is provided and *stop* is None then the
263 array is iterated from *start* to the last line.
264 In PyTables < 3.0 only one element was returned.
266 """
268 try:
269 (self._start, self._stop, self._step) = self._process_range(
270 start, stop, step)
271 except IndexError:
272 # If problems with indexes, silently return the null tuple
273 return ()
274 self._init_loop()
275 return self
277 def __iter__(self):
278 """Iterate over the rows of the array.
280 This is equivalent to calling :meth:`Array.iterrows` with default
281 arguments, i.e. it iterates over *all the rows* in the array.
283 Examples
284 --------
286 ::
288 result = [row[2] for row in array]
290 Which is equivalent to::
292 result = [row[2] for row in array.iterrows()]
294 """
296 if not self._init:
297 # If the iterator is called directly, assign default variables
298 self._start = 0
299 self._stop = self.nrows
300 self._step = 1
301 # and initialize the loop
302 self._init_loop()
303 return self
305 def _init_loop(self):
306 """Initialization for the __iter__ iterator."""
308 self._nrowsread = self._start
309 self._startb = self._start
310 self._row = -1 # Sentinel
311 self._init = True # Sentinel
312 self.nrow = SizeType(self._start - self._step) # row number
314 def __next__(self):
315 """Get the next element of the array during an iteration.
317 The element is returned as an object of the current flavor.
319 """
321 # this could probably be sped up for long iterations by reusing the
322 # listarr buffer
323 if self._nrowsread >= self._stop:
324 self._init = False
325 self.listarr = None # fixes issue #308
326 raise StopIteration # end of iteration
327 else:
328 # Read a chunk of rows
329 if self._row + 1 >= self.nrowsinbuf or self._row < 0:
330 self._stopb = self._startb + self._step * self.nrowsinbuf
331 # Protection for reading more elements than needed
332 if self._stopb > self._stop:
333 self._stopb = self._stop
334 listarr = self._read(self._startb, self._stopb, self._step)
335 # Swap the axes to easy the return of elements
336 if self.extdim > 0:
337 listarr = listarr.swapaxes(self.extdim, 0)
338 self.listarr = internal_to_flavor(listarr, self.flavor)
339 self._row = -1
340 self._startb = self._stopb
341 self._row += 1
342 self.nrow += self._step
343 self._nrowsread += self._step
344 # Fixes bug #968132
345 # if self.listarr.shape:
346 if self.shape:
347 return self.listarr[self._row]
348 else:
349 return self.listarr # Scalar case
351 def _interpret_indexing(self, keys):
352 """Internal routine used by __getitem__ and __setitem__"""
354 maxlen = len(self.shape)
355 shape = (maxlen,)
356 startl = np.empty(shape=shape, dtype=SizeType)
357 stopl = np.empty(shape=shape, dtype=SizeType)
358 stepl = np.empty(shape=shape, dtype=SizeType)
359 stop_None = np.zeros(shape=shape, dtype=SizeType)
360 if not isinstance(keys, tuple):
361 keys = (keys,)
362 nkeys = len(keys)
363 dim = 0
364 # Here is some problem when dealing with [...,...] params
365 # but this is a bit weird way to pass parameters anyway
366 for key in keys:
367 ellipsis = 0 # Sentinel
368 if isinstance(key, type(Ellipsis)):
369 ellipsis = 1
370 for diml in range(dim, len(self.shape) - (nkeys - dim) + 1):
371 startl[dim] = 0
372 stopl[dim] = self.shape[diml]
373 stepl[dim] = 1
374 dim += 1
375 elif dim >= maxlen:
376 raise IndexError("Too many indices for object '%s'" %
377 self._v_pathname)
378 elif is_idx(key):
379 key = operator.index(key)
381 # Protection for index out of range
382 if key >= self.shape[dim]:
383 raise IndexError("Index out of range")
384 if key < 0:
385 # To support negative values (Fixes bug #968149)
386 key += self.shape[dim]
387 start, stop, step = self._process_range(
388 key, key + 1, 1, dim=dim)
389 stop_None[dim] = 1
390 elif isinstance(key, slice):
391 start, stop, step = self._process_range(
392 key.start, key.stop, key.step, dim=dim)
393 else:
394 raise TypeError("Non-valid index or slice: %s" % key)
395 if not ellipsis:
396 startl[dim] = start
397 stopl[dim] = stop
398 stepl[dim] = step
399 dim += 1
401 # Complete the other dimensions, if needed
402 if dim < len(self.shape):
403 for diml in range(dim, len(self.shape)):
404 startl[dim] = 0
405 stopl[dim] = self.shape[diml]
406 stepl[dim] = 1
407 dim += 1
409 # Compute the shape for the container properly. Fixes #1288792
410 shape = []
411 for dim in range(len(self.shape)):
412 new_dim = len(range(startl[dim], stopl[dim], stepl[dim]))
413 if not (new_dim == 1 and stop_None[dim]):
414 shape.append(new_dim)
416 return startl, stopl, stepl, shape
418 def _fancy_selection(self, args):
419 """Performs a NumPy-style fancy selection in `self`.
421 Implements advanced NumPy-style selection operations in
422 addition to the standard slice-and-int behavior.
424 Indexing arguments may be ints, slices or lists of indices.
426 Note: This is a backport from the h5py project.
428 """
430 # Internal functions
432 def validate_number(num, length):
433 """Validate a list member for the given axis length."""
435 try:
436 num = int(num)
437 except TypeError:
438 raise TypeError("Illegal index: %r" % num)
439 if num > length - 1:
440 raise IndexError("Index out of bounds: %d" % num)
442 def expand_ellipsis(args, rank):
443 """Expand ellipsis objects and fill in missing axes."""
445 n_el = sum(1 for arg in args if arg is Ellipsis)
446 if n_el > 1:
447 raise IndexError("Only one ellipsis may be used.")
448 elif n_el == 0 and len(args) != rank:
449 args = args + (Ellipsis,)
451 final_args = []
452 n_args = len(args)
453 for idx, arg in enumerate(args):
454 if arg is Ellipsis:
455 final_args.extend((slice(None),) * (rank - n_args + 1))
456 else:
457 final_args.append(arg)
459 if len(final_args) > rank:
460 raise IndexError("Too many indices.")
462 return final_args
464 def translate_slice(exp, length):
465 """Given a slice object, return a 3-tuple (start, count, step)
467 This is for for use with the hyperslab selection routines.
469 """
471 start, stop, step = exp.start, exp.stop, exp.step
472 if start is None:
473 start = 0
474 else:
475 start = int(start)
476 if stop is None:
477 stop = length
478 else:
479 stop = int(stop)
480 if step is None:
481 step = 1
482 else:
483 step = int(step)
485 if step < 1:
486 raise IndexError("Step must be >= 1 (got %d)" % step)
487 if stop == start:
488 raise IndexError("Zero-length selections are not allowed")
489 if stop < start:
490 raise IndexError("Reverse-order selections are not allowed")
491 if start < 0:
492 start = length + start
493 if stop < 0:
494 stop = length + stop
496 if not 0 <= start <= (length - 1):
497 raise IndexError(
498 "Start index %s out of range (0-%d)" % (start, length - 1))
499 if not 1 <= stop <= length:
500 raise IndexError(
501 "Stop index %s out of range (1-%d)" % (stop, length))
503 count = (stop - start) // step
504 if (stop - start) % step != 0:
505 count += 1
507 if start + count > length:
508 raise IndexError(
509 "Selection out of bounds (%d; axis has %d)" %
510 (start + count, length))
512 return start, count, step
514 # Main code for _fancy_selection
515 mshape = []
516 selection = []
518 if not isinstance(args, tuple):
519 args = (args,)
521 args = expand_ellipsis(args, len(self.shape))
523 list_seen = False
524 reorder = None
525 for idx, (exp, length) in enumerate(zip(args, self.shape)):
526 if isinstance(exp, slice):
527 start, count, step = translate_slice(exp, length)
528 selection.append((start, count, step, idx, "AND"))
529 mshape.append(count)
530 else:
531 try:
532 exp = list(exp)
533 except TypeError:
534 exp = [exp] # Handle scalar index as a list of length 1
535 mshape.append(0) # Keep track of scalar index for NumPy
536 else:
537 mshape.append(len(exp))
538 if len(exp) == 0:
539 raise IndexError(
540 "Empty selections are not allowed (axis %d)" % idx)
541 elif len(exp) > 1:
542 if list_seen:
543 raise IndexError("Only one selection list is allowed")
544 else:
545 list_seen = True
546 else:
547 if (not isinstance(exp[0], (int, np.integer)) or
548 (isinstance(exp[0], np.ndarray) and not
549 np.issubdtype(exp[0].dtype, np.integer))):
550 raise TypeError("Only integer coordinates allowed.")
552 nexp = np.asarray(exp, dtype="i8")
553 # Convert negative values
554 nexp = np.where(nexp < 0, length + nexp, nexp)
555 # Check whether the list is ordered or not
556 # (only one unordered list is allowed)
557 if len(nexp) != len(np.unique(nexp)):
558 raise IndexError(
559 "Selection lists cannot have repeated values")
560 neworder = nexp.argsort()
561 if (neworder.shape != (len(exp),) or
562 np.sum(np.abs(neworder - np.arange(len(exp)))) != 0):
563 if reorder is not None:
564 raise IndexError(
565 "Only one selection list can be unordered")
566 corrected_idx = sum(1 for x in mshape if x != 0) - 1
567 reorder = (corrected_idx, neworder)
568 nexp = nexp[neworder]
569 for select_idx in range(len(nexp) + 1):
570 # This crazy piece of code performs a list selection
571 # using HDF5 hyperslabs.
572 # For each index, perform a "NOTB" selection on every
573 # portion of *this axis* which falls *outside* the list
574 # selection. For this to work, the input array MUST be
575 # monotonically increasing.
576 if select_idx < len(nexp):
577 validate_number(nexp[select_idx], length)
578 if select_idx == 0:
579 start = 0
580 count = nexp[0]
581 elif select_idx == len(nexp):
582 start = nexp[-1] + 1
583 count = length - start
584 else:
585 start = nexp[select_idx - 1] + 1
586 count = nexp[select_idx] - start
587 if count > 0:
588 selection.append((start, count, 1, idx, "NOTB"))
590 mshape = tuple(x for x in mshape if x != 0)
591 return selection, reorder, mshape
593 def __getitem__(self, key):
594 """Get a row, a range of rows or a slice from the array.
596 The set of tokens allowed for the key is the same as that for extended
597 slicing in Python (including the Ellipsis or ... token). The result is
598 an object of the current flavor; its shape depends on the kind of slice
599 used as key and the shape of the array itself.
601 Furthermore, NumPy-style fancy indexing, where a list of indices in a
602 certain axis is specified, is also supported. Note that only one list
603 per selection is supported right now. Finally, NumPy-style point and
604 boolean selections are supported as well.
606 Examples
607 --------
609 ::
611 array1 = array[4] # simple selection
612 array2 = array[4:1000:2] # slice selection
613 array3 = array[1, ..., ::2, 1:4, 4:] # general slice selection
614 array4 = array[1, [1,5,10], ..., -1] # fancy selection
615 array5 = array[np.where(array[:] > 4)] # point selection
616 array6 = array[array[:] > 4] # boolean selection
618 """
620 self._g_check_open()
622 try:
623 # First, try with a regular selection
624 startl, stopl, stepl, shape = self._interpret_indexing(key)
625 arr = self._read_slice(startl, stopl, stepl, shape)
626 except TypeError:
627 # Then, try with a point-wise selection
628 try:
629 coords = self._point_selection(key)
630 arr = self._read_coords(coords)
631 except TypeError:
632 # Finally, try with a fancy selection
633 selection, reorder, shape = self._fancy_selection(key)
634 arr = self._read_selection(selection, reorder, shape)
636 if self.flavor == "numpy" or not self._v_convert:
637 return arr
639 return internal_to_flavor(arr, self.flavor)
641 def __setitem__(self, key, value):
642 """Set a row, a range of rows or a slice in the array.
644 It takes different actions depending on the type of the key parameter:
645 if it is an integer, the corresponding array row is set to value (the
646 value is broadcast when needed). If key is a slice, the row slice
647 determined by it is set to value (as usual, if the slice to be updated
648 exceeds the actual shape of the array, only the values in the existing
649 range are updated).
651 If value is a multidimensional object, then its shape must be
652 compatible with the shape determined by key, otherwise, a ValueError
653 will be raised.
655 Furthermore, NumPy-style fancy indexing, where a list of indices in a
656 certain axis is specified, is also supported. Note that only one list
657 per selection is supported right now. Finally, NumPy-style point and
658 boolean selections are supported as well.
660 Examples
661 --------
663 ::
665 a1[0] = 333 # assign an integer to a Integer Array row
666 a2[0] = 'b' # assign a string to a string Array row
667 a3[1:4] = 5 # broadcast 5 to slice 1:4
668 a4[1:4:2] = 'xXx' # broadcast 'xXx' to slice 1:4:2
670 # General slice update (a5.shape = (4,3,2,8,5,10).
671 a5[1, ..., ::2, 1:4, 4:] = numpy.arange(1728, shape=(4,3,2,4,3,6))
672 a6[1, [1,5,10], ..., -1] = arr # fancy selection
673 a7[np.where(a6[:] > 4)] = 4 # point selection + broadcast
674 a8[arr > 4] = arr2 # boolean selection
676 """
678 self._g_check_open()
680 # Create an array compliant with the specified slice
681 nparr = convert_to_np_atom2(value, self.atom)
682 if nparr.size == 0:
683 return
685 # truncate data if least_significant_digit filter is set
686 # TODO: add the least_significant_digit attribute to the array on disk
687 if (self.filters.least_significant_digit is not None and
688 not np.issubdtype(nparr.dtype, np.signedinteger)):
689 nparr = quantize(nparr, self.filters.least_significant_digit)
691 try:
692 startl, stopl, stepl, shape = self._interpret_indexing(key)
693 self._write_slice(startl, stopl, stepl, shape, nparr)
694 except TypeError:
695 # Then, try with a point-wise selection
696 try:
697 coords = self._point_selection(key)
698 self._write_coords(coords, nparr)
699 except TypeError:
700 selection, reorder, shape = self._fancy_selection(key)
701 self._write_selection(selection, reorder, shape, nparr)
703 def _check_shape(self, nparr, slice_shape):
704 """Test that nparr shape is consistent with underlying object.
706 If not, try creating a new nparr object, using broadcasting if
707 necessary.
709 """
711 if nparr.shape != (slice_shape + self.atom.dtype.shape):
712 # Create an array compliant with the specified shape
713 narr = np.empty(shape=slice_shape, dtype=self.atom.dtype)
715 # Assign the value to it. It will raise a ValueError exception
716 # if the objects cannot be broadcast to a single shape.
717 narr[...] = nparr
718 return narr
719 else:
720 return nparr
722 def _read_slice(self, startl, stopl, stepl, shape):
723 """Read a slice based on `startl`, `stopl` and `stepl`."""
725 nparr = np.empty(dtype=self.atom.dtype, shape=shape)
726 # Protection against reading empty arrays
727 if 0 not in shape:
728 # Arrays that have non-zero dimensionality
729 self._g_read_slice(startl, stopl, stepl, nparr)
730 # For zero-shaped arrays, return the scalar
731 if nparr.shape == ():
732 nparr = nparr[()]
733 return nparr
735 def _read_coords(self, coords):
736 """Read a set of points defined by `coords`."""
738 nparr = np.empty(dtype=self.atom.dtype, shape=len(coords))
739 if len(coords) > 0:
740 self._g_read_coords(coords, nparr)
741 # For zero-shaped arrays, return the scalar
742 if nparr.shape == ():
743 nparr = nparr[()]
744 return nparr
746 def _read_selection(self, selection, reorder, shape):
747 """Read a `selection`.
749 Reorder if necessary.
751 """
753 # Create the container for the slice
754 nparr = np.empty(dtype=self.atom.dtype, shape=shape)
755 # Arrays that have non-zero dimensionality
756 self._g_read_selection(selection, nparr)
757 # For zero-shaped arrays, return the scalar
758 if nparr.shape == ():
759 nparr = nparr[()]
760 elif reorder is not None:
761 # We need to reorder the array
762 idx, neworder = reorder
763 k = [slice(None)] * len(shape)
764 k[idx] = neworder.argsort()
765 # Apparently, a copy is not needed here, but doing it
766 # for symmetry with the `_write_selection()` method.
767 nparr = nparr[tuple(k)].copy()
768 return nparr
770 def _write_slice(self, startl, stopl, stepl, shape, nparr):
771 """Write `nparr` in a slice based on `startl`, `stopl` and `stepl`."""
773 nparr = self._check_shape(nparr, tuple(shape))
774 countl = ((stopl - startl - 1) // stepl) + 1
775 self._g_write_slice(startl, stepl, countl, nparr)
777 def _write_coords(self, coords, nparr):
778 """Write `nparr` values in points defined by `coords` coordinates."""
780 if len(coords) > 0:
781 nparr = self._check_shape(nparr, (len(coords),))
782 self._g_write_coords(coords, nparr)
784 def _write_selection(self, selection, reorder, shape, nparr):
785 """Write `nparr` in `selection`.
787 Reorder if necessary.
789 """
791 nparr = self._check_shape(nparr, tuple(shape))
792 # Check whether we should reorder the array
793 if reorder is not None:
794 idx, neworder = reorder
795 k = [slice(None)] * len(shape)
796 k[idx] = neworder
797 # For a reason a don't understand well, we need a copy of
798 # the reordered array
799 nparr = nparr[tuple(k)].copy()
800 self._g_write_selection(selection, nparr)
802 def _read(self, start, stop, step, out=None):
803 """Read the array from disk without slice or flavor processing."""
805 nrowstoread = len(range(start, stop, step))
806 shape = list(self.shape)
807 if shape:
808 shape[self.maindim] = nrowstoread
809 if out is None:
810 arr = np.empty(dtype=self.atom.dtype, shape=shape)
811 else:
812 bytes_required = self.rowsize * nrowstoread
813 # if buffer is too small, it will segfault
814 if bytes_required != out.nbytes:
815 raise ValueError(f'output array size invalid, got {out.nbytes}'
816 f' bytes, need {bytes_required} bytes')
817 if not out.flags['C_CONTIGUOUS']:
818 raise ValueError('output array not C contiguous')
819 arr = out
820 # Protection against reading empty arrays
821 if 0 not in shape:
822 # Arrays that have non-zero dimensionality
823 self._read_array(start, stop, step, arr)
824 # data is always read in the system byteorder
825 # if the out array's byteorder is different, do a byteswap
826 if (out is not None and
827 byteorders[arr.dtype.byteorder] != sys.byteorder):
828 arr.byteswap(True)
829 return arr
831 def read(self, start=None, stop=None, step=None, out=None):
832 """Get data in the array as an object of the current flavor.
834 The start, stop and step parameters can be used to select only a
835 *range of rows* in the array. Their meanings are the same as in
836 the built-in range() Python function, except that negative values
837 of step are not allowed yet. Moreover, if only start is specified,
838 then stop will be set to start + 1. If you do not specify neither
839 start nor stop, then *all the rows* in the array are selected.
841 The out parameter may be used to specify a NumPy array to receive
842 the output data. Note that the array must have the same size as
843 the data selected with the other parameters. Note that the array's
844 datatype is not checked and no type casting is performed, so if it
845 does not match the datatype on disk, the output will not be correct.
846 Also, this parameter is only valid when the array's flavor is set
847 to 'numpy'. Otherwise, a TypeError will be raised.
849 When data is read from disk in NumPy format, the output will be
850 in the current system's byteorder, regardless of how it is stored
851 on disk.
852 The exception is when an output buffer is supplied, in which case
853 the output will be in the byteorder of that output buffer.
855 .. versionchanged:: 3.0
856 Added the *out* parameter.
858 """
860 self._g_check_open()
861 if out is not None and self.flavor != 'numpy':
862 msg = ("Optional 'out' argument may only be supplied if array "
863 "flavor is 'numpy', currently is {}").format(self.flavor)
864 raise TypeError(msg)
865 (start, stop, step) = self._process_range_read(start, stop, step)
866 arr = self._read(start, stop, step, out)
867 return internal_to_flavor(arr, self.flavor)
869 def _g_copy_with_stats(self, group, name, start, stop, step,
870 title, filters, chunkshape, _log, **kwargs):
871 """Private part of Leaf.copy() for each kind of leaf."""
873 # Compute the correct indices.
874 (start, stop, step) = self._process_range_read(start, stop, step)
875 # Get the slice of the array
876 # (non-buffered version)
877 if self.shape:
878 arr = self[start:stop:step]
879 else:
880 arr = self[()]
881 # Build the new Array object. Use the _atom reserved keyword
882 # just in case the array is being copied from a native HDF5
883 # with atomic types different from scalars.
884 # For details, see #275 of trac.
885 object_ = Array(group, name, arr, title=title, _log=_log,
886 _atom=self.atom)
887 nbytes = np.prod(self.shape, dtype=SizeType) * self.atom.size
889 return (object_, nbytes)
891 def __repr__(self):
892 """This provides more metainfo in addition to standard __str__"""
894 return f"""{self}
895 atom := {self.atom!r}
896 maindim := {self.maindim!r}
897 flavor := {self.flavor!r}
898 byteorder := {self.byteorder!r}
899 chunkshape := {self.chunkshape!r}"""
902class ImageArray(Array):
903 """Array containing an image.
905 This class has no additional behaviour or functionality compared to
906 that of an ordinary array. It simply enables the user to open an
907 ``IMAGE`` HDF5 node as a normal `Array` node in PyTables.
909 """
911 # Class identifier.
912 _c_classid = 'IMAGE'