Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/vlarray.py: 14%

1"""Here is defined the VLArray class."""

3import operator

4import sys

5import numpy as np

7from . import hdf5extension

8from .atom import ObjectAtom, VLStringAtom, VLUnicodeAtom

9from .flavor import internal_to_flavor

10from .leaf import Leaf, calc_chunksize

11from .utils import (

12 convert_to_np_atom, convert_to_np_atom2, idx2long, correct_byteorder,

13 SizeType, is_idx, lazyattr)

16# default version for VLARRAY objects

17# obversion = "1.0" # initial version

18# obversion = "1.0" # add support for complex datatypes

19# obversion = "1.1" # This adds support for time datatypes.

20# obversion = "1.2" # This adds support for enumerated datatypes.

21# obversion = "1.3" # Introduced 'PSEUDOATOM' attribute.

22obversion = "1.4" # Numeric and numarray flavors are gone.

25class VLArray(hdf5extension.VLArray, Leaf):

26 """This class represents variable length (ragged) arrays in an HDF5 file.

28 Instances of this class represent array objects in the object tree

29 with the property that their rows can have a *variable* number of

30 homogeneous elements, called *atoms*. Like Table datasets (see

31 :ref:`TableClassDescr`), variable length arrays can have only one

32 dimension, and the elements (atoms) of their rows can be fully

33 multidimensional.

35 When reading a range of rows from a VLArray, you will *always* get

36 a Python list of objects of the current flavor (each of them for a

37 row), which may have different lengths.

39 This class provides methods to write or read data to or from

40 variable length array objects in the file. Note that it also

41 inherits all the public attributes and methods that Leaf (see

42 :ref:`LeafClassDescr`) already provides.

44 .. note::

46 VLArray objects also support compression although compression

47 is only performed on the data structures used internally by

48 the HDF5 to take references of the location of the variable

49 length data. Data itself (the raw data) are not compressed

50 or filtered.

52 Please refer to the `VLTypes Technical Note

53 <https://support.hdfgroup.org/HDF5/doc/TechNotes/VLTypes.html>`_

54 for more details on the topic.

56 Parameters

57 ----------

58 parentnode

59 The parent :class:`Group` object.

60 name : str

61 The name of this node in its parent group.

62 atom

63 An `Atom` instance representing the *type* and *shape* of the atomic

64 objects to be saved.

65 title

66 A description for this node (it sets the ``TITLE`` HDF5 attribute on

67 disk).

68 filters

69 An instance of the `Filters` class that provides information about the

70 desired I/O filters to be applied during the life of this object.

71 expectedrows

72 A user estimate about the number of row elements that will

73 be added to the growable dimension in the `VLArray` node.

74 If not provided, the default value is ``EXPECTED_ROWS_VLARRAY``

75 (see ``tables/parameters.py``). If you plan to create either

76 a much smaller or a much bigger `VLArray` try providing a guess;

77 this will optimize the HDF5 B-Tree creation and management

78 process time and the amount of memory used.

80 .. versionadded:: 3.0

82 chunkshape

83 The shape of the data chunk to be read or written in a single HDF5 I/O

84 operation. Filters are applied to those chunks of data. The

85 dimensionality of `chunkshape` must be 1. If ``None``, a sensible

86 value is calculated (which is recommended).

87 byteorder

88 The byteorder of the data *on disk*, specified as 'little' or 'big'.

89 If this is not specified, the byteorder is that of the platform.

91 track_times

92 Whether time data associated with the leaf are recorded (object

93 access time, raw data modification time, metadata change time, object

94 birth time); default True. Semantics of these times depend on their

95 implementation in the HDF5 library: refer to documentation of the

96 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata

97 change time) is implemented.

99 .. versionadded:: 3.4.3

100

101

102 .. versionchanged:: 3.0

103 *parentNode* renamed into *parentnode*.

104

105 .. versionchanged:: 3.0

106 The *expectedsizeinMB* parameter has been replaced by *expectedrows*.

107

108 Examples

109 --------

110 See below a small example of the use of the VLArray class. The code is

111 available in :file:`examples/vlarray1.py`::

112

113 import numpy as np

114 import tables as tb

115

116 # Create a VLArray:

117 fileh = tb.open_file('vlarray1.h5', mode='w')

118 vlarray = fileh.create_vlarray(

119 fileh.root,

120 'vlarray1',

121 tb.Int32Atom(shape=()),

122 "ragged array of ints",

123 filters=tb.Filters(1))

124

125 # Append some (variable length) rows:

126 vlarray.append(np.array([5, 6]))

127 vlarray.append(np.array([5, 6, 7]))

128 vlarray.append([5, 6, 9, 8])

129

130 # Now, read it through an iterator:

131 print('-->', vlarray.title)

132 for x in vlarray:

133 print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x))

134

135 # Now, do the same with native Python strings.

136 vlarray2 = fileh.create_vlarray(

137 fileh.root,

138 'vlarray2',

139 tb.StringAtom(itemsize=2),

140 "ragged array of strings",

141 filters=tb.Filters(1))

142 vlarray2.flavor = 'python'

143

144 # Append some (variable length) rows:

145 print('-->', vlarray2.title)

146 vlarray2.append(['5', '66'])

147 vlarray2.append(['5', '6', '77'])

148 vlarray2.append(['5', '6', '9', '88'])

149

150 # Now, read it through an iterator:

151 for x in vlarray2:

152 print('%s[%d]--> %s' % (vlarray2.name, vlarray2.nrow, x))

153

154 # Close the file.

155 fileh.close()

156

157 The output for the previous script is something like::

158

159 --> ragged array of ints

160 vlarray1[0]--> [5 6]

161 vlarray1[1]--> [5 6 7]

162 vlarray1[2]--> [5 6 9 8]

163 --> ragged array of strings

164 vlarray2[0]--> ['5', '66']

165 vlarray2[1]--> ['5', '6', '77']

166 vlarray2[2]--> ['5', '6', '9', '88']

167

168

169 .. rubric:: VLArray attributes

170

171 The instance variables below are provided in addition to those in

172 Leaf (see :ref:`LeafClassDescr`).

173

174 .. attribute:: atom

175

176 An Atom (see :ref:`AtomClassDescr`)

177 instance representing the *type* and

178 *shape* of the atomic objects to be

179 saved. You may use a *pseudo-atom* for

180 storing a serialized object or variable length string per row.

181

182 .. attribute:: flavor

183

184 The type of data object read from this leaf.

185

186 Please note that when reading several rows of VLArray data,

187 the flavor only applies to the *components* of the returned

188 Python list, not to the list itself.

189

190 .. attribute:: nrow

191

192 On iterators, this is the index of the current row.

193

194 .. attribute:: nrows

195

196 The current number of rows in the array.

197

198 .. attribute:: extdim

199

200 The index of the enlargeable dimension (always 0 for vlarrays).

201

202 """

203

204 # Class identifier.

205 _c_classid = 'VLARRAY'

206

207 @lazyattr

208 def dtype(self):

209 """The NumPy ``dtype`` that most closely matches this array."""

210 return self.atom.dtype

211

212 @property

213 def shape(self):

214 """The shape of the stored array."""

215 return (self.nrows,)

216

217 @property

218 def size_on_disk(self):

219 """

220 The HDF5 library does not include a function to determine size_on_disk

221 for variable-length arrays. Accessing this attribute will raise a

222 NotImplementedError.

223 """

224 raise NotImplementedError('size_on_disk not implemented for VLArrays')

225

226 @property

227 def size_in_memory(self):

228 """

229 The size of this array's data in bytes when it is fully loaded

230 into memory.

231

232 .. note::

233

234 When data is stored in a VLArray using the ObjectAtom type,

235 it is first serialized using pickle, and then converted to

236 a NumPy array suitable for storage in an HDF5 file.

237 This attribute will return the size of that NumPy

238 representation. If you wish to know the size of the Python

239 objects after they are loaded from disk, you can use this

240 `ActiveState recipe

241 <http://code.activestate.com/recipes/577504/>`_.

242 """

243 return self._get_memory_size()

244

245 def __init__(self, parentnode, name, atom=None, title="",

246 filters=None, expectedrows=None,

247 chunkshape=None, byteorder=None,

248 _log=True, track_times=True):

249

250 self._v_version = None

251 """The object version of this array."""

252

253 self._v_new = new = atom is not None

254 """Is this the first time the node has been created?"""

255

256 self._v_new_title = title

257 """New title for this node."""

258

259 self._v_new_filters = filters

260 """New filter properties for this array."""

261

262 if expectedrows is None:

263 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_VLARRAY']

264 self._v_expectedrows = expectedrows

265 """The expected number of rows to be stored in the array.

266

267 .. versionadded:: 3.0

268

269 """

270

271 self._v_chunkshape = None

272 """Private storage for the `chunkshape` property of Leaf."""

273

274 # Miscellaneous iteration rubbish.

275 self._start = None

276 """Starting row for the current iteration."""

277

278 self._stop = None

279 """Stopping row for the current iteration."""

280

281 self._step = None

282 """Step size for the current iteration."""

283

284 self._nrowsread = None

285 """Number of rows read up to the current state of iteration."""

286

287 self._startb = None

288 """Starting row for current buffer."""

289

290 self._stopb = None

291 """Stopping row for current buffer. """

292

293 self._row = None

294 """Current row in iterators (sentinel)."""

295

296 self._init = False

297 """Whether we are in the middle of an iteration or not (sentinel)."""

298

299 self.listarr = None

300 """Current buffer in iterators."""

301

302 # Documented (*public*) attributes.

303 self.atom = atom

304 """

305 An Atom (see :ref:`AtomClassDescr`) instance representing the

306 *type* and *shape* of the atomic objects to be saved. You may

307 use a *pseudo-atom* for storing a serialized object or

308 variable length string per row.

309 """

310 self.nrow = None

311 """On iterators, this is the index of the current row."""

312

313 self.nrows = None

314 """The current number of rows in the array."""

315

316 self.extdim = 0 # VLArray only have one dimension currently

317 """The index of the enlargeable dimension (always 0 for vlarrays)."""

318

319 # Check the chunkshape parameter

320 if new and chunkshape is not None:

321 if isinstance(chunkshape, (int, np.integer)):

322 chunkshape = (chunkshape,)

323 try:

324 chunkshape = tuple(chunkshape)

325 except TypeError:

326 raise TypeError(

327 "`chunkshape` parameter must be an integer or sequence "

328 "and you passed a %s" % type(chunkshape))

329 if len(chunkshape) != 1:

330 raise ValueError("`chunkshape` rank (length) must be 1: %r"

331 % (chunkshape,))

332 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape)

333

334 super().__init__(parentnode, name, new, filters,

335 byteorder, _log, track_times)

336

337 def _g_post_init_hook(self):

338 super()._g_post_init_hook()

339 self.nrowsinbuf = 100 # maybe enough for most applications

340

341 # This is too specific for moving it into Leaf

342 def _calc_chunkshape(self, expectedrows):

343 """Calculate the size for the HDF5 chunk."""

344

345 # For computing the chunkshape for HDF5 VL types, we have to

346 # choose the itemsize of the *each* element of the atom and

347 # not the size of the entire atom. I don't know why this

348 # should be like this, perhaps I should report this to the

349 # HDF5 list.

350 # F. Alted 2006-11-23

351 # elemsize = self.atom.atomsize()

352 elemsize = self._basesize

353

354 # AV 2013-05-03

355 # This is just a quick workaround tha allows to change the API for

356 # PyTables 3.0 release and remove the expected_mb parameter.

357 # The algorithm for computing the chunkshape should be rewritten as

358 # requested by gh-35.

359 expected_mb = expectedrows * elemsize / 1024 ** 2

360

361 chunksize = calc_chunksize(expected_mb)

362

363 # Set the chunkshape

364 chunkshape = chunksize // elemsize

365 # Safeguard against itemsizes being extremely large

366 if chunkshape == 0:

367 chunkshape = 1

368 return (SizeType(chunkshape),)

369

370 def _g_create(self):

371 """Create a variable length array (ragged array)."""

372

373 atom = self.atom

374 self._v_version = obversion

375 # Check for zero dims in atom shape (not allowed in VLArrays)

376 zerodims = np.sum(np.array(atom.shape) == 0)

377 if zerodims > 0:

378 raise ValueError("When creating VLArrays, none of the dimensions "

379 "of the Atom instance can be zero.")

380

381 if not hasattr(atom, 'size'): # it is a pseudo-atom

382 self._atomicdtype = atom.base.dtype

383 self._atomicsize = atom.base.size

384 self._basesize = atom.base.itemsize

385 else:

386 self._atomicdtype = atom.dtype

387 self._atomicsize = atom.size

388 self._basesize = atom.itemsize

389 self._atomictype = atom.type

390 self._atomicshape = atom.shape

391

392 # Compute the optimal chunkshape, if needed

393 if self._v_chunkshape is None:

394 self._v_chunkshape = self._calc_chunkshape(self._v_expectedrows)

395

396 self.nrows = SizeType(0) # No rows at creation time

397

398 # Correct the byteorder if needed

399 if self.byteorder is None:

400 self.byteorder = correct_byteorder(atom.type, sys.byteorder)

401

402 # After creating the vlarray, ``self._v_objectid`` needs to be

403 # set because it is needed for setting attributes afterwards.

404 self._v_objectid = self._create_array(self._v_new_title)

405

406 # Add an attribute in case we have a pseudo-atom so that we

407 # can retrieve the proper class after a re-opening operation.

408 if not hasattr(atom, 'size'): # it is a pseudo-atom

409 self.attrs.PSEUDOATOM = atom.kind

410

411 return self._v_objectid

412

413 def _g_open(self):

414 """Get the metadata info for an array in file."""

415

416 self._v_objectid, self.nrows, self._v_chunkshape, atom = \

417 self._open_array()

418

419 # Check if the atom can be a PseudoAtom

420 if "PSEUDOATOM" in self.attrs:

421 kind = self.attrs.PSEUDOATOM

422 if kind == 'vlstring':

423 atom = VLStringAtom()

424 elif kind == 'vlunicode':

425 atom = VLUnicodeAtom()

426 elif kind == 'object':

427 atom = ObjectAtom()

428 else:

429 raise ValueError(

430 "pseudo-atom name ``%s`` not known." % kind)

431 elif self._v_file.format_version[:1] == "1":

432 flavor1x = self.attrs.FLAVOR

433 if flavor1x == "VLString":

434 atom = VLStringAtom()

435 elif flavor1x == "Object":

436 atom = ObjectAtom()

437

438 self.atom = atom

439 return self._v_objectid

440

441 def _getnobjects(self, nparr):

442 """Return the number of objects in a NumPy array."""

443

444 # Check for zero dimensionality array

445 zerodims = np.sum(np.array(nparr.shape) == 0)

446 if zerodims > 0:

447 # No objects to be added

448 return 0

449 shape = nparr.shape

450 atom_shape = self.atom.shape

451 shapelen = len(nparr.shape)

452 if isinstance(atom_shape, tuple):

453 atomshapelen = len(self.atom.shape)

454 else:

455 atom_shape = (self.atom.shape,)

456 atomshapelen = 1

457 diflen = shapelen - atomshapelen

458 if shape == atom_shape:

459 nobjects = 1

460 elif (diflen == 1 and shape[diflen:] == atom_shape):

461 # Check if the leading dimensions are all ones

462 # if shape[:diflen-1] == (1,)*(diflen-1):

463 # nobjects = shape[diflen-1]

464 # shape = shape[diflen:]

465 # It's better to accept only inputs with the exact dimensionality

466 # i.e. a dimensionality only 1 element larger than atom

467 nobjects = shape[0]

468 shape = shape[1:]

469 elif atom_shape == (1,) and shapelen == 1:

470 # Case where shape = (N,) and shape_atom = 1 or (1,)

471 nobjects = shape[0]

472 else:

473 raise ValueError("The object '%s' is composed of elements with "

474 "shape '%s', which is not compatible with the "

475 "atom shape ('%s')." % (nparr, shape, atom_shape))

476 return nobjects

477

478 def get_enum(self):

479 """Get the enumerated type associated with this array.

480

481 If this array is of an enumerated type, the corresponding Enum instance

482 (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated

483 type, a TypeError is raised.

484

485 """

486

487 if self.atom.kind != 'enum':

488 raise TypeError("array ``%s`` is not of an enumerated type"

489 % self._v_pathname)

490

491 return self.atom.enum

492

493 def append(self, sequence):

494 """Add a sequence of data to the end of the dataset.

495

496 This method appends the objects in the sequence to a *single row* in

497 this array. The type and shape of individual objects must be compliant

498 with the atoms in the array. In the case of serialized objects and

499 variable length strings, the object or string to append is itself the

500 sequence.

501

502 """

503

504 self._g_check_open()

505 self._v_file._check_writable()

506

507 # Prepare the sequence to convert it into a NumPy object

508 atom = self.atom

509 if not hasattr(atom, 'size'): # it is a pseudo-atom

510 sequence = atom.toarray(sequence)

511 statom = atom.base

512 else:

513 try: # fastest check in most cases

514 len(sequence)

515 except TypeError:

516 raise TypeError("argument is not a sequence")

517 statom = atom

518

519 if len(sequence) > 0:

520 # The sequence needs to be copied to make the operation safe

521 # to in-place conversion.

522 nparr = convert_to_np_atom2(sequence, statom)

523 nobjects = self._getnobjects(nparr)

524 else:

525 nobjects = 0

526 nparr = None

527

528 self._append(nparr, nobjects)

529 self.nrows += 1

530

531 def iterrows(self, start=None, stop=None, step=None):

532 """Iterate over the rows of the array.

533

534 This method returns an iterator yielding an object of the current

535 flavor for each selected row in the array.

536

537 If a range is not supplied, *all the rows* in the array are iterated

538 upon. You can also use the :meth:`VLArray.__iter__` special method for

539 that purpose. If you only want to iterate over a given *range of rows*

540 in the array, you may use the start, stop and step parameters.

541

542 Examples

543 --------

544

545 ::

546

547 for row in vlarray.iterrows(step=4):

548 print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, row))

549

550 .. versionchanged:: 3.0

551 If the *start* parameter is provided and *stop* is None then the

552 array is iterated from *start* to the last line.

553 In PyTables < 3.0 only one element was returned.

554

555 """

556

557 (self._start, self._stop, self._step) = self._process_range(

558 start, stop, step)

559 self._init_loop()

560 return self

561

562 def __iter__(self):

563 """Iterate over the rows of the array.

564

565 This is equivalent to calling :meth:`VLArray.iterrows` with default

566 arguments, i.e. it iterates over *all the rows* in the array.

567

568 Examples

569 --------

570

571 ::

572

573 result = [row for row in vlarray]

574

575 Which is equivalent to::

576

577 result = [row for row in vlarray.iterrows()]

578

579 """

580

581 if not self._init:

582 # If the iterator is called directly, assign default variables

583 self._start = 0

584 self._stop = self.nrows

585 self._step = 1

586 # and initialize the loop

587 self._init_loop()

588

589 return self

590

591 def _init_loop(self):

592 """Initialization for the __iter__ iterator."""

593

594 self._nrowsread = self._start

595 self._startb = self._start

596 self._row = -1 # Sentinel

597 self._init = True # Sentinel

598 self.nrow = SizeType(self._start - self._step) # row number

599

600 def __next__(self):

601 """Get the next element of the array during an iteration.

602

603 The element is returned as a list of objects of the current

604 flavor.

605

606 """

607

608 if self._nrowsread >= self._stop:

609 self._init = False

610 raise StopIteration # end of iteration

611 else:

612 # Read a chunk of rows

613 if self._row + 1 >= self.nrowsinbuf or self._row < 0:

614 self._stopb = self._startb + self._step * self.nrowsinbuf

615 self.listarr = self.read(self._startb, self._stopb, self._step)

616 self._row = -1

617 self._startb = self._stopb

618 self._row += 1

619 self.nrow += self._step

620 self._nrowsread += self._step

621 return self.listarr[self._row]

622

623 def __getitem__(self, key):

624 """Get a row or a range of rows from the array.

625

626 If key argument is an integer, the corresponding array row is returned

627 as an object of the current flavor. If key is a slice, the range of

628 rows determined by it is returned as a list of objects of the current

629 flavor.

630

631 In addition, NumPy-style point selections are supported. In

632 particular, if key is a list of row coordinates, the set of rows

633 determined by it is returned. Furthermore, if key is an array of

634 boolean values, only the coordinates where key is True are returned.

635 Note that for the latter to work it is necessary that key list would

636 contain exactly as many rows as the array has.

637

638 Examples

639 --------

640

641 ::

642

643 a_row = vlarray[4]

644 a_list = vlarray[4:1000:2]

645 a_list2 = vlarray[[0,2]] # get list of coords

646 a_list3 = vlarray[[0,-2]] # negative values accepted

647 a_list4 = vlarray[numpy.array([True,...,False])] # array of bools

648

649 """

650

651 self._g_check_open()

652 if is_idx(key):

653 key = operator.index(key)

654

655 # Index out of range protection

656 if key >= self.nrows:

657 raise IndexError("Index out of range")

658 if key < 0:

659 # To support negative values

660 key += self.nrows

661 (start, stop, step) = self._process_range(key, key + 1, 1)

662 return self.read(start, stop, step)[0]

663 elif isinstance(key, slice):

664 start, stop, step = self._process_range(

665 key.start, key.stop, key.step)

666 return self.read(start, stop, step)

667 # Try with a boolean or point selection

668 elif type(key) in (list, tuple) or isinstance(key, np.ndarray):

669 coords = self._point_selection(key)

670 return self._read_coordinates(coords)

671 else:

672 raise IndexError(f"Invalid index or slice: {key!r}")

673

674 def _assign_values(self, coords, values):

675 """Assign the `values` to the positions stated in `coords`."""

676

677 for nrow, value in zip(coords, values):

678 if nrow >= self.nrows:

679 raise IndexError("First index out of range")

680 if nrow < 0:

681 # To support negative values

682 nrow += self.nrows

683 object_ = value

684 # Prepare the object to convert it into a NumPy object

685 atom = self.atom

686 if not hasattr(atom, 'size'): # it is a pseudo-atom

687 object_ = atom.toarray(object_)

688 statom = atom.base

689 else:

690 statom = atom

691 value = convert_to_np_atom(object_, statom)

692 nobjects = self._getnobjects(value)

693

694 # Get the previous value

695 nrow = idx2long(

696 nrow) # To convert any possible numpy scalar value

697 nparr = self._read_array(nrow, nrow + 1, 1)[0]

698 nobjects = len(nparr)

699 if len(value) > nobjects:

700 raise ValueError("Length of value (%s) is larger than number "

701 "of elements in row (%s)" % (len(value),

702 nobjects))

703 try:

704 nparr[:] = value

705 except Exception as exc: # XXX

706 raise ValueError("Value parameter:\n'%r'\n"

707 "cannot be converted into an array object "

708 "compliant vlarray[%s] row: \n'%r'\n"

709 "The error was: <%s>" % (value, nrow,

710 nparr[:], exc))

711

712 if nparr.size > 0:

713 self._modify(nrow, nparr, nobjects)

714

715 def __setitem__(self, key, value):

716 """Set a row, or set of rows, in the array.

717

718 It takes different actions depending on the type of the *key*

719 parameter: if it is an integer, the corresponding table row is

720 set to *value* (a record or sequence capable of being converted

721 to the table structure). If *key* is a slice, the row slice

722 determined by it is set to *value* (a record array or sequence

723 of rows capable of being converted to the table structure).

724

725 In addition, NumPy-style point selections are supported. In

726 particular, if key is a list of row coordinates, the set of rows

727 determined by it is set to value. Furthermore, if key is an array of

728 boolean values, only the coordinates where key is True are set to

729 values from value. Note that for the latter to work it is necessary

730 that key list would contain exactly as many rows as the table has.

731

732 .. note::

733

734 When updating the rows of a VLArray object which uses a

735 pseudo-atom, there is a problem: you can only update values

736 with *exactly* the same size in bytes than the original row.

737 This is very difficult to meet with object pseudo-atoms,

738 because :mod:`pickle` applied on a Python object does not

739 guarantee to return the same number of bytes than over another

740 object, even if they are of the same class.

741 This effectively limits the kinds of objects than can be

742 updated in variable-length arrays.

743

744 Examples

745 --------

746

747 ::

748

749 vlarray[0] = vlarray[0] * 2 + 3

750 vlarray[99] = arange(96) * 2 + 3

751

752 # Negative values for the index are supported.

753 vlarray[-99] = vlarray[5] * 2 + 3

754 vlarray[1:30:2] = list_of_rows

755 vlarray[[1,3]] = new_1_and_3_rows

756

757 """

758

759 self._g_check_open()

760 self._v_file._check_writable()

761

762 if is_idx(key):

763 # If key is not a sequence, convert to it

764 coords = [key]

765 value = [value]

766 elif isinstance(key, slice):

767 start, stop, step = self._process_range(

768 key.start, key.stop, key.step)

769 coords = range(start, stop, step)

770 # Try with a boolean or point selection

771 elif type(key) in (list, tuple) or isinstance(key, np.ndarray):

772 coords = self._point_selection(key)

773 else:

774 raise IndexError(f"Invalid index or slice: {key!r}")

775

776 # Do the assignment row by row

777 self._assign_values(coords, value)

778

779 # Accessor for the _read_array method in superclass

780 def read(self, start=None, stop=None, step=1):

781 """Get data in the array as a list of objects of the current flavor.

782

783 Please note that, as the lengths of the different rows are variable,

784 the returned value is a *Python list* (not an array of the current

785 flavor), with as many entries as specified rows in the range

786 parameters.

787

788 The start, stop and step parameters can be used to select only a

789 *range of rows* in the array. Their meanings are the same as in

790 the built-in range() Python function, except that negative values

791 of step are not allowed yet. Moreover, if only start is specified,

792 then stop will be set to start + 1. If you do not specify neither

793 start nor stop, then *all the rows* in the array are selected.

794

795 """

796

797 self._g_check_open()

798 start, stop, step = self._process_range_read(start, stop, step)

799 if start == stop:

800 listarr = []

801 else:

802 listarr = self._read_array(start, stop, step)

803

804 atom = self.atom

805 if not hasattr(atom, 'size'): # it is a pseudo-atom

806 outlistarr = [atom.fromarray(arr) for arr in listarr]

807 else:

808 # Convert the list to the right flavor

809 flavor = self.flavor

810 outlistarr = [internal_to_flavor(arr, flavor) for arr in listarr]

811 return outlistarr

812

813 def _read_coordinates(self, coords):

814 """Read rows specified in `coords`."""

815 rows = []

816 for coord in coords:

817 rows.append(self.read(int(coord), int(coord) + 1, 1)[0])

818 return rows

819

820 def _g_copy_with_stats(self, group, name, start, stop, step,

821 title, filters, chunkshape, _log, **kwargs):

822 """Private part of Leaf.copy() for each kind of leaf."""

823

824 # Build the new VLArray object

825 object = VLArray(

826 group, name, self.atom, title=title, filters=filters,

827 expectedrows=self._v_expectedrows, chunkshape=chunkshape,

828 _log=_log)

829

830 # Now, fill the new vlarray with values from the old one

831 # This is not buffered because we cannot forsee the length

832 # of each record. So, the safest would be a copy row by row.

833 # In the future, some analysis can be done in order to buffer

834 # the copy process.

835 nrowsinbuf = 1

836 (start, stop, step) = self._process_range_read(start, stop, step)

837 # Optimized version (no conversions, no type and shape checks, etc...)

838 nrowscopied = SizeType(0)

839 nbytes = 0

840 if not hasattr(self.atom, 'size'): # it is a pseudo-atom

841 atomsize = self.atom.base.size

842 else:

843 atomsize = self.atom.size

844 for start2 in range(start, stop, step * nrowsinbuf):

845 # Save the records on disk

846 stop2 = start2 + step * nrowsinbuf

847 if stop2 > stop:

848 stop2 = stop

849 nparr = self._read_array(start=start2, stop=stop2, step=step)[0]

850 nobjects = nparr.shape[0]

851 object._append(nparr, nobjects)

852 nbytes += nobjects * atomsize

853 nrowscopied += 1

854 object.nrows = nrowscopied

855 return (object, nbytes)

856

857 def __repr__(self):

858 """This provides more metainfo in addition to standard __str__"""

859

860 return f"""{self}

861 atom = {self.atom!r}

862 byteorder = {self.byteorder!r}

863 nrows = {self.nrows}

864 flavor = {self.flavor!r}"""