Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/vlarray.py: 14%
278 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Here is defined the VLArray class."""
3import operator
4import sys
5import numpy as np
7from . import hdf5extension
8from .atom import ObjectAtom, VLStringAtom, VLUnicodeAtom
9from .flavor import internal_to_flavor
10from .leaf import Leaf, calc_chunksize
11from .utils import (
12 convert_to_np_atom, convert_to_np_atom2, idx2long, correct_byteorder,
13 SizeType, is_idx, lazyattr)
16# default version for VLARRAY objects
17# obversion = "1.0" # initial version
18# obversion = "1.0" # add support for complex datatypes
19# obversion = "1.1" # This adds support for time datatypes.
20# obversion = "1.2" # This adds support for enumerated datatypes.
21# obversion = "1.3" # Introduced 'PSEUDOATOM' attribute.
22obversion = "1.4" # Numeric and numarray flavors are gone.
25class VLArray(hdf5extension.VLArray, Leaf):
26 """This class represents variable length (ragged) arrays in an HDF5 file.
28 Instances of this class represent array objects in the object tree
29 with the property that their rows can have a *variable* number of
30 homogeneous elements, called *atoms*. Like Table datasets (see
31 :ref:`TableClassDescr`), variable length arrays can have only one
32 dimension, and the elements (atoms) of their rows can be fully
33 multidimensional.
35 When reading a range of rows from a VLArray, you will *always* get
36 a Python list of objects of the current flavor (each of them for a
37 row), which may have different lengths.
39 This class provides methods to write or read data to or from
40 variable length array objects in the file. Note that it also
41 inherits all the public attributes and methods that Leaf (see
42 :ref:`LeafClassDescr`) already provides.
44 .. note::
46 VLArray objects also support compression although compression
47 is only performed on the data structures used internally by
48 the HDF5 to take references of the location of the variable
49 length data. Data itself (the raw data) are not compressed
50 or filtered.
52 Please refer to the `VLTypes Technical Note
53 <https://support.hdfgroup.org/HDF5/doc/TechNotes/VLTypes.html>`_
54 for more details on the topic.
56 Parameters
57 ----------
58 parentnode
59 The parent :class:`Group` object.
60 name : str
61 The name of this node in its parent group.
62 atom
63 An `Atom` instance representing the *type* and *shape* of the atomic
64 objects to be saved.
65 title
66 A description for this node (it sets the ``TITLE`` HDF5 attribute on
67 disk).
68 filters
69 An instance of the `Filters` class that provides information about the
70 desired I/O filters to be applied during the life of this object.
71 expectedrows
72 A user estimate about the number of row elements that will
73 be added to the growable dimension in the `VLArray` node.
74 If not provided, the default value is ``EXPECTED_ROWS_VLARRAY``
75 (see ``tables/parameters.py``). If you plan to create either
76 a much smaller or a much bigger `VLArray` try providing a guess;
77 this will optimize the HDF5 B-Tree creation and management
78 process time and the amount of memory used.
80 .. versionadded:: 3.0
82 chunkshape
83 The shape of the data chunk to be read or written in a single HDF5 I/O
84 operation. Filters are applied to those chunks of data. The
85 dimensionality of `chunkshape` must be 1. If ``None``, a sensible
86 value is calculated (which is recommended).
87 byteorder
88 The byteorder of the data *on disk*, specified as 'little' or 'big'.
89 If this is not specified, the byteorder is that of the platform.
91 track_times
92 Whether time data associated with the leaf are recorded (object
93 access time, raw data modification time, metadata change time, object
94 birth time); default True. Semantics of these times depend on their
95 implementation in the HDF5 library: refer to documentation of the
96 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata
97 change time) is implemented.
99 .. versionadded:: 3.4.3
102 .. versionchanged:: 3.0
103 *parentNode* renamed into *parentnode*.
105 .. versionchanged:: 3.0
106 The *expectedsizeinMB* parameter has been replaced by *expectedrows*.
108 Examples
109 --------
110 See below a small example of the use of the VLArray class. The code is
111 available in :file:`examples/vlarray1.py`::
113 import numpy as np
114 import tables as tb
116 # Create a VLArray:
117 fileh = tb.open_file('vlarray1.h5', mode='w')
118 vlarray = fileh.create_vlarray(
119 fileh.root,
120 'vlarray1',
121 tb.Int32Atom(shape=()),
122 "ragged array of ints",
123 filters=tb.Filters(1))
125 # Append some (variable length) rows:
126 vlarray.append(np.array([5, 6]))
127 vlarray.append(np.array([5, 6, 7]))
128 vlarray.append([5, 6, 9, 8])
130 # Now, read it through an iterator:
131 print('-->', vlarray.title)
132 for x in vlarray:
133 print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x))
135 # Now, do the same with native Python strings.
136 vlarray2 = fileh.create_vlarray(
137 fileh.root,
138 'vlarray2',
139 tb.StringAtom(itemsize=2),
140 "ragged array of strings",
141 filters=tb.Filters(1))
142 vlarray2.flavor = 'python'
144 # Append some (variable length) rows:
145 print('-->', vlarray2.title)
146 vlarray2.append(['5', '66'])
147 vlarray2.append(['5', '6', '77'])
148 vlarray2.append(['5', '6', '9', '88'])
150 # Now, read it through an iterator:
151 for x in vlarray2:
152 print('%s[%d]--> %s' % (vlarray2.name, vlarray2.nrow, x))
154 # Close the file.
155 fileh.close()
157 The output for the previous script is something like::
159 --> ragged array of ints
160 vlarray1[0]--> [5 6]
161 vlarray1[1]--> [5 6 7]
162 vlarray1[2]--> [5 6 9 8]
163 --> ragged array of strings
164 vlarray2[0]--> ['5', '66']
165 vlarray2[1]--> ['5', '6', '77']
166 vlarray2[2]--> ['5', '6', '9', '88']
169 .. rubric:: VLArray attributes
171 The instance variables below are provided in addition to those in
172 Leaf (see :ref:`LeafClassDescr`).
174 .. attribute:: atom
176 An Atom (see :ref:`AtomClassDescr`)
177 instance representing the *type* and
178 *shape* of the atomic objects to be
179 saved. You may use a *pseudo-atom* for
180 storing a serialized object or variable length string per row.
182 .. attribute:: flavor
184 The type of data object read from this leaf.
186 Please note that when reading several rows of VLArray data,
187 the flavor only applies to the *components* of the returned
188 Python list, not to the list itself.
190 .. attribute:: nrow
192 On iterators, this is the index of the current row.
194 .. attribute:: nrows
196 The current number of rows in the array.
198 .. attribute:: extdim
200 The index of the enlargeable dimension (always 0 for vlarrays).
202 """
204 # Class identifier.
205 _c_classid = 'VLARRAY'
207 @lazyattr
208 def dtype(self):
209 """The NumPy ``dtype`` that most closely matches this array."""
210 return self.atom.dtype
212 @property
213 def shape(self):
214 """The shape of the stored array."""
215 return (self.nrows,)
217 @property
218 def size_on_disk(self):
219 """
220 The HDF5 library does not include a function to determine size_on_disk
221 for variable-length arrays. Accessing this attribute will raise a
222 NotImplementedError.
223 """
224 raise NotImplementedError('size_on_disk not implemented for VLArrays')
226 @property
227 def size_in_memory(self):
228 """
229 The size of this array's data in bytes when it is fully loaded
230 into memory.
232 .. note::
234 When data is stored in a VLArray using the ObjectAtom type,
235 it is first serialized using pickle, and then converted to
236 a NumPy array suitable for storage in an HDF5 file.
237 This attribute will return the size of that NumPy
238 representation. If you wish to know the size of the Python
239 objects after they are loaded from disk, you can use this
240 `ActiveState recipe
241 <http://code.activestate.com/recipes/577504/>`_.
242 """
243 return self._get_memory_size()
245 def __init__(self, parentnode, name, atom=None, title="",
246 filters=None, expectedrows=None,
247 chunkshape=None, byteorder=None,
248 _log=True, track_times=True):
250 self._v_version = None
251 """The object version of this array."""
253 self._v_new = new = atom is not None
254 """Is this the first time the node has been created?"""
256 self._v_new_title = title
257 """New title for this node."""
259 self._v_new_filters = filters
260 """New filter properties for this array."""
262 if expectedrows is None:
263 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_VLARRAY']
264 self._v_expectedrows = expectedrows
265 """The expected number of rows to be stored in the array.
267 .. versionadded:: 3.0
269 """
271 self._v_chunkshape = None
272 """Private storage for the `chunkshape` property of Leaf."""
274 # Miscellaneous iteration rubbish.
275 self._start = None
276 """Starting row for the current iteration."""
278 self._stop = None
279 """Stopping row for the current iteration."""
281 self._step = None
282 """Step size for the current iteration."""
284 self._nrowsread = None
285 """Number of rows read up to the current state of iteration."""
287 self._startb = None
288 """Starting row for current buffer."""
290 self._stopb = None
291 """Stopping row for current buffer. """
293 self._row = None
294 """Current row in iterators (sentinel)."""
296 self._init = False
297 """Whether we are in the middle of an iteration or not (sentinel)."""
299 self.listarr = None
300 """Current buffer in iterators."""
302 # Documented (*public*) attributes.
303 self.atom = atom
304 """
305 An Atom (see :ref:`AtomClassDescr`) instance representing the
306 *type* and *shape* of the atomic objects to be saved. You may
307 use a *pseudo-atom* for storing a serialized object or
308 variable length string per row.
309 """
310 self.nrow = None
311 """On iterators, this is the index of the current row."""
313 self.nrows = None
314 """The current number of rows in the array."""
316 self.extdim = 0 # VLArray only have one dimension currently
317 """The index of the enlargeable dimension (always 0 for vlarrays)."""
319 # Check the chunkshape parameter
320 if new and chunkshape is not None:
321 if isinstance(chunkshape, (int, np.integer)):
322 chunkshape = (chunkshape,)
323 try:
324 chunkshape = tuple(chunkshape)
325 except TypeError:
326 raise TypeError(
327 "`chunkshape` parameter must be an integer or sequence "
328 "and you passed a %s" % type(chunkshape))
329 if len(chunkshape) != 1:
330 raise ValueError("`chunkshape` rank (length) must be 1: %r"
331 % (chunkshape,))
332 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)
334 super().__init__(parentnode, name, new, filters,
335 byteorder, _log, track_times)
337 def _g_post_init_hook(self):
338 super()._g_post_init_hook()
339 self.nrowsinbuf = 100 # maybe enough for most applications
341 # This is too specific for moving it into Leaf
342 def _calc_chunkshape(self, expectedrows):
343 """Calculate the size for the HDF5 chunk."""
345 # For computing the chunkshape for HDF5 VL types, we have to
346 # choose the itemsize of the *each* element of the atom and
347 # not the size of the entire atom. I don't know why this
348 # should be like this, perhaps I should report this to the
349 # HDF5 list.
350 # F. Alted 2006-11-23
351 # elemsize = self.atom.atomsize()
352 elemsize = self._basesize
354 # AV 2013-05-03
355 # This is just a quick workaround tha allows to change the API for
356 # PyTables 3.0 release and remove the expected_mb parameter.
357 # The algorithm for computing the chunkshape should be rewritten as
358 # requested by gh-35.
359 expected_mb = expectedrows * elemsize / 1024 ** 2
361 chunksize = calc_chunksize(expected_mb)
363 # Set the chunkshape
364 chunkshape = chunksize // elemsize
365 # Safeguard against itemsizes being extremely large
366 if chunkshape == 0:
367 chunkshape = 1
368 return (SizeType(chunkshape),)
370 def _g_create(self):
371 """Create a variable length array (ragged array)."""
373 atom = self.atom
374 self._v_version = obversion
375 # Check for zero dims in atom shape (not allowed in VLArrays)
376 zerodims = np.sum(np.array(atom.shape) == 0)
377 if zerodims > 0:
378 raise ValueError("When creating VLArrays, none of the dimensions "
379 "of the Atom instance can be zero.")
381 if not hasattr(atom, 'size'): # it is a pseudo-atom
382 self._atomicdtype = atom.base.dtype
383 self._atomicsize = atom.base.size
384 self._basesize = atom.base.itemsize
385 else:
386 self._atomicdtype = atom.dtype
387 self._atomicsize = atom.size
388 self._basesize = atom.itemsize
389 self._atomictype = atom.type
390 self._atomicshape = atom.shape
392 # Compute the optimal chunkshape, if needed
393 if self._v_chunkshape is None:
394 self._v_chunkshape = self._calc_chunkshape(self._v_expectedrows)
396 self.nrows = SizeType(0) # No rows at creation time
398 # Correct the byteorder if needed
399 if self.byteorder is None:
400 self.byteorder = correct_byteorder(atom.type, sys.byteorder)
402 # After creating the vlarray, ``self._v_objectid`` needs to be
403 # set because it is needed for setting attributes afterwards.
404 self._v_objectid = self._create_array(self._v_new_title)
406 # Add an attribute in case we have a pseudo-atom so that we
407 # can retrieve the proper class after a re-opening operation.
408 if not hasattr(atom, 'size'): # it is a pseudo-atom
409 self.attrs.PSEUDOATOM = atom.kind
411 return self._v_objectid
413 def _g_open(self):
414 """Get the metadata info for an array in file."""
416 self._v_objectid, self.nrows, self._v_chunkshape, atom = \
417 self._open_array()
419 # Check if the atom can be a PseudoAtom
420 if "PSEUDOATOM" in self.attrs:
421 kind = self.attrs.PSEUDOATOM
422 if kind == 'vlstring':
423 atom = VLStringAtom()
424 elif kind == 'vlunicode':
425 atom = VLUnicodeAtom()
426 elif kind == 'object':
427 atom = ObjectAtom()
428 else:
429 raise ValueError(
430 "pseudo-atom name ``%s`` not known." % kind)
431 elif self._v_file.format_version[:1] == "1":
432 flavor1x = self.attrs.FLAVOR
433 if flavor1x == "VLString":
434 atom = VLStringAtom()
435 elif flavor1x == "Object":
436 atom = ObjectAtom()
438 self.atom = atom
439 return self._v_objectid
441 def _getnobjects(self, nparr):
442 """Return the number of objects in a NumPy array."""
444 # Check for zero dimensionality array
445 zerodims = np.sum(np.array(nparr.shape) == 0)
446 if zerodims > 0:
447 # No objects to be added
448 return 0
449 shape = nparr.shape
450 atom_shape = self.atom.shape
451 shapelen = len(nparr.shape)
452 if isinstance(atom_shape, tuple):
453 atomshapelen = len(self.atom.shape)
454 else:
455 atom_shape = (self.atom.shape,)
456 atomshapelen = 1
457 diflen = shapelen - atomshapelen
458 if shape == atom_shape:
459 nobjects = 1
460 elif (diflen == 1 and shape[diflen:] == atom_shape):
461 # Check if the leading dimensions are all ones
462 # if shape[:diflen-1] == (1,)*(diflen-1):
463 # nobjects = shape[diflen-1]
464 # shape = shape[diflen:]
465 # It's better to accept only inputs with the exact dimensionality
466 # i.e. a dimensionality only 1 element larger than atom
467 nobjects = shape[0]
468 shape = shape[1:]
469 elif atom_shape == (1,) and shapelen == 1:
470 # Case where shape = (N,) and shape_atom = 1 or (1,)
471 nobjects = shape[0]
472 else:
473 raise ValueError("The object '%s' is composed of elements with "
474 "shape '%s', which is not compatible with the "
475 "atom shape ('%s')." % (nparr, shape, atom_shape))
476 return nobjects
478 def get_enum(self):
479 """Get the enumerated type associated with this array.
481 If this array is of an enumerated type, the corresponding Enum instance
482 (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated
483 type, a TypeError is raised.
485 """
487 if self.atom.kind != 'enum':
488 raise TypeError("array ``%s`` is not of an enumerated type"
489 % self._v_pathname)
491 return self.atom.enum
493 def append(self, sequence):
494 """Add a sequence of data to the end of the dataset.
496 This method appends the objects in the sequence to a *single row* in
497 this array. The type and shape of individual objects must be compliant
498 with the atoms in the array. In the case of serialized objects and
499 variable length strings, the object or string to append is itself the
500 sequence.
502 """
504 self._g_check_open()
505 self._v_file._check_writable()
507 # Prepare the sequence to convert it into a NumPy object
508 atom = self.atom
509 if not hasattr(atom, 'size'): # it is a pseudo-atom
510 sequence = atom.toarray(sequence)
511 statom = atom.base
512 else:
513 try: # fastest check in most cases
514 len(sequence)
515 except TypeError:
516 raise TypeError("argument is not a sequence")
517 statom = atom
519 if len(sequence) > 0:
520 # The sequence needs to be copied to make the operation safe
521 # to in-place conversion.
522 nparr = convert_to_np_atom2(sequence, statom)
523 nobjects = self._getnobjects(nparr)
524 else:
525 nobjects = 0
526 nparr = None
528 self._append(nparr, nobjects)
529 self.nrows += 1
531 def iterrows(self, start=None, stop=None, step=None):
532 """Iterate over the rows of the array.
534 This method returns an iterator yielding an object of the current
535 flavor for each selected row in the array.
537 If a range is not supplied, *all the rows* in the array are iterated
538 upon. You can also use the :meth:`VLArray.__iter__` special method for
539 that purpose. If you only want to iterate over a given *range of rows*
540 in the array, you may use the start, stop and step parameters.
542 Examples
543 --------
545 ::
547 for row in vlarray.iterrows(step=4):
548 print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, row))
550 .. versionchanged:: 3.0
551 If the *start* parameter is provided and *stop* is None then the
552 array is iterated from *start* to the last line.
553 In PyTables < 3.0 only one element was returned.
555 """
557 (self._start, self._stop, self._step) = self._process_range(
558 start, stop, step)
559 self._init_loop()
560 return self
562 def __iter__(self):
563 """Iterate over the rows of the array.
565 This is equivalent to calling :meth:`VLArray.iterrows` with default
566 arguments, i.e. it iterates over *all the rows* in the array.
568 Examples
569 --------
571 ::
573 result = [row for row in vlarray]
575 Which is equivalent to::
577 result = [row for row in vlarray.iterrows()]
579 """
581 if not self._init:
582 # If the iterator is called directly, assign default variables
583 self._start = 0
584 self._stop = self.nrows
585 self._step = 1
586 # and initialize the loop
587 self._init_loop()
589 return self
591 def _init_loop(self):
592 """Initialization for the __iter__ iterator."""
594 self._nrowsread = self._start
595 self._startb = self._start
596 self._row = -1 # Sentinel
597 self._init = True # Sentinel
598 self.nrow = SizeType(self._start - self._step) # row number
600 def __next__(self):
601 """Get the next element of the array during an iteration.
603 The element is returned as a list of objects of the current
604 flavor.
606 """
608 if self._nrowsread >= self._stop:
609 self._init = False
610 raise StopIteration # end of iteration
611 else:
612 # Read a chunk of rows
613 if self._row + 1 >= self.nrowsinbuf or self._row < 0:
614 self._stopb = self._startb + self._step * self.nrowsinbuf
615 self.listarr = self.read(self._startb, self._stopb, self._step)
616 self._row = -1
617 self._startb = self._stopb
618 self._row += 1
619 self.nrow += self._step
620 self._nrowsread += self._step
621 return self.listarr[self._row]
623 def __getitem__(self, key):
624 """Get a row or a range of rows from the array.
626 If key argument is an integer, the corresponding array row is returned
627 as an object of the current flavor. If key is a slice, the range of
628 rows determined by it is returned as a list of objects of the current
629 flavor.
631 In addition, NumPy-style point selections are supported. In
632 particular, if key is a list of row coordinates, the set of rows
633 determined by it is returned. Furthermore, if key is an array of
634 boolean values, only the coordinates where key is True are returned.
635 Note that for the latter to work it is necessary that key list would
636 contain exactly as many rows as the array has.
638 Examples
639 --------
641 ::
643 a_row = vlarray[4]
644 a_list = vlarray[4:1000:2]
645 a_list2 = vlarray[[0,2]] # get list of coords
646 a_list3 = vlarray[[0,-2]] # negative values accepted
647 a_list4 = vlarray[numpy.array([True,...,False])] # array of bools
649 """
651 self._g_check_open()
652 if is_idx(key):
653 key = operator.index(key)
655 # Index out of range protection
656 if key >= self.nrows:
657 raise IndexError("Index out of range")
658 if key < 0:
659 # To support negative values
660 key += self.nrows
661 (start, stop, step) = self._process_range(key, key + 1, 1)
662 return self.read(start, stop, step)[0]
663 elif isinstance(key, slice):
664 start, stop, step = self._process_range(
665 key.start, key.stop, key.step)
666 return self.read(start, stop, step)
667 # Try with a boolean or point selection
668 elif type(key) in (list, tuple) or isinstance(key, np.ndarray):
669 coords = self._point_selection(key)
670 return self._read_coordinates(coords)
671 else:
672 raise IndexError(f"Invalid index or slice: {key!r}")
674 def _assign_values(self, coords, values):
675 """Assign the `values` to the positions stated in `coords`."""
677 for nrow, value in zip(coords, values):
678 if nrow >= self.nrows:
679 raise IndexError("First index out of range")
680 if nrow < 0:
681 # To support negative values
682 nrow += self.nrows
683 object_ = value
684 # Prepare the object to convert it into a NumPy object
685 atom = self.atom
686 if not hasattr(atom, 'size'): # it is a pseudo-atom
687 object_ = atom.toarray(object_)
688 statom = atom.base
689 else:
690 statom = atom
691 value = convert_to_np_atom(object_, statom)
692 nobjects = self._getnobjects(value)
694 # Get the previous value
695 nrow = idx2long(
696 nrow) # To convert any possible numpy scalar value
697 nparr = self._read_array(nrow, nrow + 1, 1)[0]
698 nobjects = len(nparr)
699 if len(value) > nobjects:
700 raise ValueError("Length of value (%s) is larger than number "
701 "of elements in row (%s)" % (len(value),
702 nobjects))
703 try:
704 nparr[:] = value
705 except Exception as exc: # XXX
706 raise ValueError("Value parameter:\n'%r'\n"
707 "cannot be converted into an array object "
708 "compliant vlarray[%s] row: \n'%r'\n"
709 "The error was: <%s>" % (value, nrow,
710 nparr[:], exc))
712 if nparr.size > 0:
713 self._modify(nrow, nparr, nobjects)
715 def __setitem__(self, key, value):
716 """Set a row, or set of rows, in the array.
718 It takes different actions depending on the type of the *key*
719 parameter: if it is an integer, the corresponding table row is
720 set to *value* (a record or sequence capable of being converted
721 to the table structure). If *key* is a slice, the row slice
722 determined by it is set to *value* (a record array or sequence
723 of rows capable of being converted to the table structure).
725 In addition, NumPy-style point selections are supported. In
726 particular, if key is a list of row coordinates, the set of rows
727 determined by it is set to value. Furthermore, if key is an array of
728 boolean values, only the coordinates where key is True are set to
729 values from value. Note that for the latter to work it is necessary
730 that key list would contain exactly as many rows as the table has.
732 .. note::
734 When updating the rows of a VLArray object which uses a
735 pseudo-atom, there is a problem: you can only update values
736 with *exactly* the same size in bytes than the original row.
737 This is very difficult to meet with object pseudo-atoms,
738 because :mod:`pickle` applied on a Python object does not
739 guarantee to return the same number of bytes than over another
740 object, even if they are of the same class.
741 This effectively limits the kinds of objects than can be
742 updated in variable-length arrays.
744 Examples
745 --------
747 ::
749 vlarray[0] = vlarray[0] * 2 + 3
750 vlarray[99] = arange(96) * 2 + 3
752 # Negative values for the index are supported.
753 vlarray[-99] = vlarray[5] * 2 + 3
754 vlarray[1:30:2] = list_of_rows
755 vlarray[[1,3]] = new_1_and_3_rows
757 """
759 self._g_check_open()
760 self._v_file._check_writable()
762 if is_idx(key):
763 # If key is not a sequence, convert to it
764 coords = [key]
765 value = [value]
766 elif isinstance(key, slice):
767 start, stop, step = self._process_range(
768 key.start, key.stop, key.step)
769 coords = range(start, stop, step)
770 # Try with a boolean or point selection
771 elif type(key) in (list, tuple) or isinstance(key, np.ndarray):
772 coords = self._point_selection(key)
773 else:
774 raise IndexError(f"Invalid index or slice: {key!r}")
776 # Do the assignment row by row
777 self._assign_values(coords, value)
779 # Accessor for the _read_array method in superclass
780 def read(self, start=None, stop=None, step=1):
781 """Get data in the array as a list of objects of the current flavor.
783 Please note that, as the lengths of the different rows are variable,
784 the returned value is a *Python list* (not an array of the current
785 flavor), with as many entries as specified rows in the range
786 parameters.
788 The start, stop and step parameters can be used to select only a
789 *range of rows* in the array. Their meanings are the same as in
790 the built-in range() Python function, except that negative values
791 of step are not allowed yet. Moreover, if only start is specified,
792 then stop will be set to start + 1. If you do not specify neither
793 start nor stop, then *all the rows* in the array are selected.
795 """
797 self._g_check_open()
798 start, stop, step = self._process_range_read(start, stop, step)
799 if start == stop:
800 listarr = []
801 else:
802 listarr = self._read_array(start, stop, step)
804 atom = self.atom
805 if not hasattr(atom, 'size'): # it is a pseudo-atom
806 outlistarr = [atom.fromarray(arr) for arr in listarr]
807 else:
808 # Convert the list to the right flavor
809 flavor = self.flavor
810 outlistarr = [internal_to_flavor(arr, flavor) for arr in listarr]
811 return outlistarr
813 def _read_coordinates(self, coords):
814 """Read rows specified in `coords`."""
815 rows = []
816 for coord in coords:
817 rows.append(self.read(int(coord), int(coord) + 1, 1)[0])
818 return rows
820 def _g_copy_with_stats(self, group, name, start, stop, step,
821 title, filters, chunkshape, _log, **kwargs):
822 """Private part of Leaf.copy() for each kind of leaf."""
824 # Build the new VLArray object
825 object = VLArray(
826 group, name, self.atom, title=title, filters=filters,
827 expectedrows=self._v_expectedrows, chunkshape=chunkshape,
828 _log=_log)
830 # Now, fill the new vlarray with values from the old one
831 # This is not buffered because we cannot forsee the length
832 # of each record. So, the safest would be a copy row by row.
833 # In the future, some analysis can be done in order to buffer
834 # the copy process.
835 nrowsinbuf = 1
836 (start, stop, step) = self._process_range_read(start, stop, step)
837 # Optimized version (no conversions, no type and shape checks, etc...)
838 nrowscopied = SizeType(0)
839 nbytes = 0
840 if not hasattr(self.atom, 'size'): # it is a pseudo-atom
841 atomsize = self.atom.base.size
842 else:
843 atomsize = self.atom.size
844 for start2 in range(start, stop, step * nrowsinbuf):
845 # Save the records on disk
846 stop2 = start2 + step * nrowsinbuf
847 if stop2 > stop:
848 stop2 = stop
849 nparr = self._read_array(start=start2, stop=stop2, step=step)[0]
850 nobjects = nparr.shape[0]
851 object._append(nparr, nobjects)
852 nbytes += nobjects * atomsize
853 nrowscopied += 1
854 object.nrows = nrowscopied
855 return (object, nbytes)
857 def __repr__(self):
858 """This provides more metainfo in addition to standard __str__"""
860 return f"""{self}
861 atom = {self.atom!r}
862 byteorder = {self.byteorder!r}
863 nrows = {self.nrows}
864 flavor = {self.flavor!r}"""