Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/vlarray.py: 14%

278 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Here is defined the VLArray class.""" 

2 

3import operator 

4import sys 

5import numpy as np 

6 

7from . import hdf5extension 

8from .atom import ObjectAtom, VLStringAtom, VLUnicodeAtom 

9from .flavor import internal_to_flavor 

10from .leaf import Leaf, calc_chunksize 

11from .utils import ( 

12 convert_to_np_atom, convert_to_np_atom2, idx2long, correct_byteorder, 

13 SizeType, is_idx, lazyattr) 

14 

15 

16# default version for VLARRAY objects 

17# obversion = "1.0" # initial version 

18# obversion = "1.0" # add support for complex datatypes 

19# obversion = "1.1" # This adds support for time datatypes. 

20# obversion = "1.2" # This adds support for enumerated datatypes. 

21# obversion = "1.3" # Introduced 'PSEUDOATOM' attribute. 

22obversion = "1.4" # Numeric and numarray flavors are gone. 

23 

24 

25class VLArray(hdf5extension.VLArray, Leaf): 

26 """This class represents variable length (ragged) arrays in an HDF5 file. 

27 

28 Instances of this class represent array objects in the object tree 

29 with the property that their rows can have a *variable* number of 

30 homogeneous elements, called *atoms*. Like Table datasets (see 

31 :ref:`TableClassDescr`), variable length arrays can have only one 

32 dimension, and the elements (atoms) of their rows can be fully 

33 multidimensional. 

34 

35 When reading a range of rows from a VLArray, you will *always* get 

36 a Python list of objects of the current flavor (each of them for a 

37 row), which may have different lengths. 

38 

39 This class provides methods to write or read data to or from 

40 variable length array objects in the file. Note that it also 

41 inherits all the public attributes and methods that Leaf (see 

42 :ref:`LeafClassDescr`) already provides. 

43 

44 .. note:: 

45 

46 VLArray objects also support compression although compression 

47 is only performed on the data structures used internally by 

48 the HDF5 to take references of the location of the variable 

49 length data. Data itself (the raw data) are not compressed 

50 or filtered. 

51 

52 Please refer to the `VLTypes Technical Note 

53 <https://support.hdfgroup.org/HDF5/doc/TechNotes/VLTypes.html>`_ 

54 for more details on the topic. 

55 

56 Parameters 

57 ---------- 

58 parentnode 

59 The parent :class:`Group` object. 

60 name : str 

61 The name of this node in its parent group. 

62 atom 

63 An `Atom` instance representing the *type* and *shape* of the atomic 

64 objects to be saved. 

65 title 

66 A description for this node (it sets the ``TITLE`` HDF5 attribute on 

67 disk). 

68 filters 

69 An instance of the `Filters` class that provides information about the 

70 desired I/O filters to be applied during the life of this object. 

71 expectedrows 

72 A user estimate about the number of row elements that will 

73 be added to the growable dimension in the `VLArray` node. 

74 If not provided, the default value is ``EXPECTED_ROWS_VLARRAY`` 

75 (see ``tables/parameters.py``). If you plan to create either 

76 a much smaller or a much bigger `VLArray` try providing a guess; 

77 this will optimize the HDF5 B-Tree creation and management 

78 process time and the amount of memory used. 

79 

80 .. versionadded:: 3.0 

81 

82 chunkshape 

83 The shape of the data chunk to be read or written in a single HDF5 I/O 

84 operation. Filters are applied to those chunks of data. The 

85 dimensionality of `chunkshape` must be 1. If ``None``, a sensible 

86 value is calculated (which is recommended). 

87 byteorder 

88 The byteorder of the data *on disk*, specified as 'little' or 'big'. 

89 If this is not specified, the byteorder is that of the platform. 

90 

91 track_times 

92 Whether time data associated with the leaf are recorded (object 

93 access time, raw data modification time, metadata change time, object 

94 birth time); default True. Semantics of these times depend on their 

95 implementation in the HDF5 library: refer to documentation of the 

96 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata 

97 change time) is implemented. 

98 

99 .. versionadded:: 3.4.3 

100 

101 

102 .. versionchanged:: 3.0 

103 *parentNode* renamed into *parentnode*. 

104 

105 .. versionchanged:: 3.0 

106 The *expectedsizeinMB* parameter has been replaced by *expectedrows*. 

107 

108 Examples 

109 -------- 

110 See below a small example of the use of the VLArray class. The code is 

111 available in :file:`examples/vlarray1.py`:: 

112 

113 import numpy as np 

114 import tables as tb 

115 

116 # Create a VLArray: 

117 fileh = tb.open_file('vlarray1.h5', mode='w') 

118 vlarray = fileh.create_vlarray( 

119 fileh.root, 

120 'vlarray1', 

121 tb.Int32Atom(shape=()), 

122 "ragged array of ints", 

123 filters=tb.Filters(1)) 

124 

125 # Append some (variable length) rows: 

126 vlarray.append(np.array([5, 6])) 

127 vlarray.append(np.array([5, 6, 7])) 

128 vlarray.append([5, 6, 9, 8]) 

129 

130 # Now, read it through an iterator: 

131 print('-->', vlarray.title) 

132 for x in vlarray: 

133 print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, x)) 

134 

135 # Now, do the same with native Python strings. 

136 vlarray2 = fileh.create_vlarray( 

137 fileh.root, 

138 'vlarray2', 

139 tb.StringAtom(itemsize=2), 

140 "ragged array of strings", 

141 filters=tb.Filters(1)) 

142 vlarray2.flavor = 'python' 

143 

144 # Append some (variable length) rows: 

145 print('-->', vlarray2.title) 

146 vlarray2.append(['5', '66']) 

147 vlarray2.append(['5', '6', '77']) 

148 vlarray2.append(['5', '6', '9', '88']) 

149 

150 # Now, read it through an iterator: 

151 for x in vlarray2: 

152 print('%s[%d]--> %s' % (vlarray2.name, vlarray2.nrow, x)) 

153 

154 # Close the file. 

155 fileh.close() 

156 

157 The output for the previous script is something like:: 

158 

159 --> ragged array of ints 

160 vlarray1[0]--> [5 6] 

161 vlarray1[1]--> [5 6 7] 

162 vlarray1[2]--> [5 6 9 8] 

163 --> ragged array of strings 

164 vlarray2[0]--> ['5', '66'] 

165 vlarray2[1]--> ['5', '6', '77'] 

166 vlarray2[2]--> ['5', '6', '9', '88'] 

167 

168 

169 .. rubric:: VLArray attributes 

170 

171 The instance variables below are provided in addition to those in 

172 Leaf (see :ref:`LeafClassDescr`). 

173 

174 .. attribute:: atom 

175 

176 An Atom (see :ref:`AtomClassDescr`) 

177 instance representing the *type* and 

178 *shape* of the atomic objects to be 

179 saved. You may use a *pseudo-atom* for 

180 storing a serialized object or variable length string per row. 

181 

182 .. attribute:: flavor 

183 

184 The type of data object read from this leaf. 

185 

186 Please note that when reading several rows of VLArray data, 

187 the flavor only applies to the *components* of the returned 

188 Python list, not to the list itself. 

189 

190 .. attribute:: nrow 

191 

192 On iterators, this is the index of the current row. 

193 

194 .. attribute:: nrows 

195 

196 The current number of rows in the array. 

197 

198 .. attribute:: extdim 

199 

200 The index of the enlargeable dimension (always 0 for vlarrays). 

201 

202 """ 

203 

204 # Class identifier. 

205 _c_classid = 'VLARRAY' 

206 

207 @lazyattr 

208 def dtype(self): 

209 """The NumPy ``dtype`` that most closely matches this array.""" 

210 return self.atom.dtype 

211 

212 @property 

213 def shape(self): 

214 """The shape of the stored array.""" 

215 return (self.nrows,) 

216 

217 @property 

218 def size_on_disk(self): 

219 """ 

220 The HDF5 library does not include a function to determine size_on_disk 

221 for variable-length arrays. Accessing this attribute will raise a 

222 NotImplementedError. 

223 """ 

224 raise NotImplementedError('size_on_disk not implemented for VLArrays') 

225 

226 @property 

227 def size_in_memory(self): 

228 """ 

229 The size of this array's data in bytes when it is fully loaded 

230 into memory. 

231 

232 .. note:: 

233 

234 When data is stored in a VLArray using the ObjectAtom type, 

235 it is first serialized using pickle, and then converted to 

236 a NumPy array suitable for storage in an HDF5 file. 

237 This attribute will return the size of that NumPy 

238 representation. If you wish to know the size of the Python 

239 objects after they are loaded from disk, you can use this 

240 `ActiveState recipe 

241 <http://code.activestate.com/recipes/577504/>`_. 

242 """ 

243 return self._get_memory_size() 

244 

245 def __init__(self, parentnode, name, atom=None, title="", 

246 filters=None, expectedrows=None, 

247 chunkshape=None, byteorder=None, 

248 _log=True, track_times=True): 

249 

250 self._v_version = None 

251 """The object version of this array.""" 

252 

253 self._v_new = new = atom is not None 

254 """Is this the first time the node has been created?""" 

255 

256 self._v_new_title = title 

257 """New title for this node.""" 

258 

259 self._v_new_filters = filters 

260 """New filter properties for this array.""" 

261 

262 if expectedrows is None: 

263 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_VLARRAY'] 

264 self._v_expectedrows = expectedrows 

265 """The expected number of rows to be stored in the array. 

266 

267 .. versionadded:: 3.0 

268 

269 """ 

270 

271 self._v_chunkshape = None 

272 """Private storage for the `chunkshape` property of Leaf.""" 

273 

274 # Miscellaneous iteration rubbish. 

275 self._start = None 

276 """Starting row for the current iteration.""" 

277 

278 self._stop = None 

279 """Stopping row for the current iteration.""" 

280 

281 self._step = None 

282 """Step size for the current iteration.""" 

283 

284 self._nrowsread = None 

285 """Number of rows read up to the current state of iteration.""" 

286 

287 self._startb = None 

288 """Starting row for current buffer.""" 

289 

290 self._stopb = None 

291 """Stopping row for current buffer. """ 

292 

293 self._row = None 

294 """Current row in iterators (sentinel).""" 

295 

296 self._init = False 

297 """Whether we are in the middle of an iteration or not (sentinel).""" 

298 

299 self.listarr = None 

300 """Current buffer in iterators.""" 

301 

302 # Documented (*public*) attributes. 

303 self.atom = atom 

304 """ 

305 An Atom (see :ref:`AtomClassDescr`) instance representing the 

306 *type* and *shape* of the atomic objects to be saved. You may 

307 use a *pseudo-atom* for storing a serialized object or 

308 variable length string per row. 

309 """ 

310 self.nrow = None 

311 """On iterators, this is the index of the current row.""" 

312 

313 self.nrows = None 

314 """The current number of rows in the array.""" 

315 

316 self.extdim = 0 # VLArray only have one dimension currently 

317 """The index of the enlargeable dimension (always 0 for vlarrays).""" 

318 

319 # Check the chunkshape parameter 

320 if new and chunkshape is not None: 

321 if isinstance(chunkshape, (int, np.integer)): 

322 chunkshape = (chunkshape,) 

323 try: 

324 chunkshape = tuple(chunkshape) 

325 except TypeError: 

326 raise TypeError( 

327 "`chunkshape` parameter must be an integer or sequence " 

328 "and you passed a %s" % type(chunkshape)) 

329 if len(chunkshape) != 1: 

330 raise ValueError("`chunkshape` rank (length) must be 1: %r" 

331 % (chunkshape,)) 

332 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) 

333 

334 super().__init__(parentnode, name, new, filters, 

335 byteorder, _log, track_times) 

336 

337 def _g_post_init_hook(self): 

338 super()._g_post_init_hook() 

339 self.nrowsinbuf = 100 # maybe enough for most applications 

340 

341 # This is too specific for moving it into Leaf 

342 def _calc_chunkshape(self, expectedrows): 

343 """Calculate the size for the HDF5 chunk.""" 

344 

345 # For computing the chunkshape for HDF5 VL types, we have to 

346 # choose the itemsize of the *each* element of the atom and 

347 # not the size of the entire atom. I don't know why this 

348 # should be like this, perhaps I should report this to the 

349 # HDF5 list. 

350 # F. Alted 2006-11-23 

351 # elemsize = self.atom.atomsize() 

352 elemsize = self._basesize 

353 

354 # AV 2013-05-03 

355 # This is just a quick workaround tha allows to change the API for 

356 # PyTables 3.0 release and remove the expected_mb parameter. 

357 # The algorithm for computing the chunkshape should be rewritten as 

358 # requested by gh-35. 

359 expected_mb = expectedrows * elemsize / 1024 ** 2 

360 

361 chunksize = calc_chunksize(expected_mb) 

362 

363 # Set the chunkshape 

364 chunkshape = chunksize // elemsize 

365 # Safeguard against itemsizes being extremely large 

366 if chunkshape == 0: 

367 chunkshape = 1 

368 return (SizeType(chunkshape),) 

369 

370 def _g_create(self): 

371 """Create a variable length array (ragged array).""" 

372 

373 atom = self.atom 

374 self._v_version = obversion 

375 # Check for zero dims in atom shape (not allowed in VLArrays) 

376 zerodims = np.sum(np.array(atom.shape) == 0) 

377 if zerodims > 0: 

378 raise ValueError("When creating VLArrays, none of the dimensions " 

379 "of the Atom instance can be zero.") 

380 

381 if not hasattr(atom, 'size'): # it is a pseudo-atom 

382 self._atomicdtype = atom.base.dtype 

383 self._atomicsize = atom.base.size 

384 self._basesize = atom.base.itemsize 

385 else: 

386 self._atomicdtype = atom.dtype 

387 self._atomicsize = atom.size 

388 self._basesize = atom.itemsize 

389 self._atomictype = atom.type 

390 self._atomicshape = atom.shape 

391 

392 # Compute the optimal chunkshape, if needed 

393 if self._v_chunkshape is None: 

394 self._v_chunkshape = self._calc_chunkshape(self._v_expectedrows) 

395 

396 self.nrows = SizeType(0) # No rows at creation time 

397 

398 # Correct the byteorder if needed 

399 if self.byteorder is None: 

400 self.byteorder = correct_byteorder(atom.type, sys.byteorder) 

401 

402 # After creating the vlarray, ``self._v_objectid`` needs to be 

403 # set because it is needed for setting attributes afterwards. 

404 self._v_objectid = self._create_array(self._v_new_title) 

405 

406 # Add an attribute in case we have a pseudo-atom so that we 

407 # can retrieve the proper class after a re-opening operation. 

408 if not hasattr(atom, 'size'): # it is a pseudo-atom 

409 self.attrs.PSEUDOATOM = atom.kind 

410 

411 return self._v_objectid 

412 

413 def _g_open(self): 

414 """Get the metadata info for an array in file.""" 

415 

416 self._v_objectid, self.nrows, self._v_chunkshape, atom = \ 

417 self._open_array() 

418 

419 # Check if the atom can be a PseudoAtom 

420 if "PSEUDOATOM" in self.attrs: 

421 kind = self.attrs.PSEUDOATOM 

422 if kind == 'vlstring': 

423 atom = VLStringAtom() 

424 elif kind == 'vlunicode': 

425 atom = VLUnicodeAtom() 

426 elif kind == 'object': 

427 atom = ObjectAtom() 

428 else: 

429 raise ValueError( 

430 "pseudo-atom name ``%s`` not known." % kind) 

431 elif self._v_file.format_version[:1] == "1": 

432 flavor1x = self.attrs.FLAVOR 

433 if flavor1x == "VLString": 

434 atom = VLStringAtom() 

435 elif flavor1x == "Object": 

436 atom = ObjectAtom() 

437 

438 self.atom = atom 

439 return self._v_objectid 

440 

441 def _getnobjects(self, nparr): 

442 """Return the number of objects in a NumPy array.""" 

443 

444 # Check for zero dimensionality array 

445 zerodims = np.sum(np.array(nparr.shape) == 0) 

446 if zerodims > 0: 

447 # No objects to be added 

448 return 0 

449 shape = nparr.shape 

450 atom_shape = self.atom.shape 

451 shapelen = len(nparr.shape) 

452 if isinstance(atom_shape, tuple): 

453 atomshapelen = len(self.atom.shape) 

454 else: 

455 atom_shape = (self.atom.shape,) 

456 atomshapelen = 1 

457 diflen = shapelen - atomshapelen 

458 if shape == atom_shape: 

459 nobjects = 1 

460 elif (diflen == 1 and shape[diflen:] == atom_shape): 

461 # Check if the leading dimensions are all ones 

462 # if shape[:diflen-1] == (1,)*(diflen-1): 

463 # nobjects = shape[diflen-1] 

464 # shape = shape[diflen:] 

465 # It's better to accept only inputs with the exact dimensionality 

466 # i.e. a dimensionality only 1 element larger than atom 

467 nobjects = shape[0] 

468 shape = shape[1:] 

469 elif atom_shape == (1,) and shapelen == 1: 

470 # Case where shape = (N,) and shape_atom = 1 or (1,) 

471 nobjects = shape[0] 

472 else: 

473 raise ValueError("The object '%s' is composed of elements with " 

474 "shape '%s', which is not compatible with the " 

475 "atom shape ('%s')." % (nparr, shape, atom_shape)) 

476 return nobjects 

477 

478 def get_enum(self): 

479 """Get the enumerated type associated with this array. 

480 

481 If this array is of an enumerated type, the corresponding Enum instance 

482 (see :ref:`EnumClassDescr`) is returned. If it is not of an enumerated 

483 type, a TypeError is raised. 

484 

485 """ 

486 

487 if self.atom.kind != 'enum': 

488 raise TypeError("array ``%s`` is not of an enumerated type" 

489 % self._v_pathname) 

490 

491 return self.atom.enum 

492 

493 def append(self, sequence): 

494 """Add a sequence of data to the end of the dataset. 

495 

496 This method appends the objects in the sequence to a *single row* in 

497 this array. The type and shape of individual objects must be compliant 

498 with the atoms in the array. In the case of serialized objects and 

499 variable length strings, the object or string to append is itself the 

500 sequence. 

501 

502 """ 

503 

504 self._g_check_open() 

505 self._v_file._check_writable() 

506 

507 # Prepare the sequence to convert it into a NumPy object 

508 atom = self.atom 

509 if not hasattr(atom, 'size'): # it is a pseudo-atom 

510 sequence = atom.toarray(sequence) 

511 statom = atom.base 

512 else: 

513 try: # fastest check in most cases 

514 len(sequence) 

515 except TypeError: 

516 raise TypeError("argument is not a sequence") 

517 statom = atom 

518 

519 if len(sequence) > 0: 

520 # The sequence needs to be copied to make the operation safe 

521 # to in-place conversion. 

522 nparr = convert_to_np_atom2(sequence, statom) 

523 nobjects = self._getnobjects(nparr) 

524 else: 

525 nobjects = 0 

526 nparr = None 

527 

528 self._append(nparr, nobjects) 

529 self.nrows += 1 

530 

531 def iterrows(self, start=None, stop=None, step=None): 

532 """Iterate over the rows of the array. 

533 

534 This method returns an iterator yielding an object of the current 

535 flavor for each selected row in the array. 

536 

537 If a range is not supplied, *all the rows* in the array are iterated 

538 upon. You can also use the :meth:`VLArray.__iter__` special method for 

539 that purpose. If you only want to iterate over a given *range of rows* 

540 in the array, you may use the start, stop and step parameters. 

541 

542 Examples 

543 -------- 

544 

545 :: 

546 

547 for row in vlarray.iterrows(step=4): 

548 print('%s[%d]--> %s' % (vlarray.name, vlarray.nrow, row)) 

549 

550 .. versionchanged:: 3.0 

551 If the *start* parameter is provided and *stop* is None then the 

552 array is iterated from *start* to the last line. 

553 In PyTables < 3.0 only one element was returned. 

554 

555 """ 

556 

557 (self._start, self._stop, self._step) = self._process_range( 

558 start, stop, step) 

559 self._init_loop() 

560 return self 

561 

562 def __iter__(self): 

563 """Iterate over the rows of the array. 

564 

565 This is equivalent to calling :meth:`VLArray.iterrows` with default 

566 arguments, i.e. it iterates over *all the rows* in the array. 

567 

568 Examples 

569 -------- 

570 

571 :: 

572 

573 result = [row for row in vlarray] 

574 

575 Which is equivalent to:: 

576 

577 result = [row for row in vlarray.iterrows()] 

578 

579 """ 

580 

581 if not self._init: 

582 # If the iterator is called directly, assign default variables 

583 self._start = 0 

584 self._stop = self.nrows 

585 self._step = 1 

586 # and initialize the loop 

587 self._init_loop() 

588 

589 return self 

590 

591 def _init_loop(self): 

592 """Initialization for the __iter__ iterator.""" 

593 

594 self._nrowsread = self._start 

595 self._startb = self._start 

596 self._row = -1 # Sentinel 

597 self._init = True # Sentinel 

598 self.nrow = SizeType(self._start - self._step) # row number 

599 

600 def __next__(self): 

601 """Get the next element of the array during an iteration. 

602 

603 The element is returned as a list of objects of the current 

604 flavor. 

605 

606 """ 

607 

608 if self._nrowsread >= self._stop: 

609 self._init = False 

610 raise StopIteration # end of iteration 

611 else: 

612 # Read a chunk of rows 

613 if self._row + 1 >= self.nrowsinbuf or self._row < 0: 

614 self._stopb = self._startb + self._step * self.nrowsinbuf 

615 self.listarr = self.read(self._startb, self._stopb, self._step) 

616 self._row = -1 

617 self._startb = self._stopb 

618 self._row += 1 

619 self.nrow += self._step 

620 self._nrowsread += self._step 

621 return self.listarr[self._row] 

622 

623 def __getitem__(self, key): 

624 """Get a row or a range of rows from the array. 

625 

626 If key argument is an integer, the corresponding array row is returned 

627 as an object of the current flavor. If key is a slice, the range of 

628 rows determined by it is returned as a list of objects of the current 

629 flavor. 

630 

631 In addition, NumPy-style point selections are supported. In 

632 particular, if key is a list of row coordinates, the set of rows 

633 determined by it is returned. Furthermore, if key is an array of 

634 boolean values, only the coordinates where key is True are returned. 

635 Note that for the latter to work it is necessary that key list would 

636 contain exactly as many rows as the array has. 

637 

638 Examples 

639 -------- 

640 

641 :: 

642 

643 a_row = vlarray[4] 

644 a_list = vlarray[4:1000:2] 

645 a_list2 = vlarray[[0,2]] # get list of coords 

646 a_list3 = vlarray[[0,-2]] # negative values accepted 

647 a_list4 = vlarray[numpy.array([True,...,False])] # array of bools 

648 

649 """ 

650 

651 self._g_check_open() 

652 if is_idx(key): 

653 key = operator.index(key) 

654 

655 # Index out of range protection 

656 if key >= self.nrows: 

657 raise IndexError("Index out of range") 

658 if key < 0: 

659 # To support negative values 

660 key += self.nrows 

661 (start, stop, step) = self._process_range(key, key + 1, 1) 

662 return self.read(start, stop, step)[0] 

663 elif isinstance(key, slice): 

664 start, stop, step = self._process_range( 

665 key.start, key.stop, key.step) 

666 return self.read(start, stop, step) 

667 # Try with a boolean or point selection 

668 elif type(key) in (list, tuple) or isinstance(key, np.ndarray): 

669 coords = self._point_selection(key) 

670 return self._read_coordinates(coords) 

671 else: 

672 raise IndexError(f"Invalid index or slice: {key!r}") 

673 

674 def _assign_values(self, coords, values): 

675 """Assign the `values` to the positions stated in `coords`.""" 

676 

677 for nrow, value in zip(coords, values): 

678 if nrow >= self.nrows: 

679 raise IndexError("First index out of range") 

680 if nrow < 0: 

681 # To support negative values 

682 nrow += self.nrows 

683 object_ = value 

684 # Prepare the object to convert it into a NumPy object 

685 atom = self.atom 

686 if not hasattr(atom, 'size'): # it is a pseudo-atom 

687 object_ = atom.toarray(object_) 

688 statom = atom.base 

689 else: 

690 statom = atom 

691 value = convert_to_np_atom(object_, statom) 

692 nobjects = self._getnobjects(value) 

693 

694 # Get the previous value 

695 nrow = idx2long( 

696 nrow) # To convert any possible numpy scalar value 

697 nparr = self._read_array(nrow, nrow + 1, 1)[0] 

698 nobjects = len(nparr) 

699 if len(value) > nobjects: 

700 raise ValueError("Length of value (%s) is larger than number " 

701 "of elements in row (%s)" % (len(value), 

702 nobjects)) 

703 try: 

704 nparr[:] = value 

705 except Exception as exc: # XXX 

706 raise ValueError("Value parameter:\n'%r'\n" 

707 "cannot be converted into an array object " 

708 "compliant vlarray[%s] row: \n'%r'\n" 

709 "The error was: <%s>" % (value, nrow, 

710 nparr[:], exc)) 

711 

712 if nparr.size > 0: 

713 self._modify(nrow, nparr, nobjects) 

714 

715 def __setitem__(self, key, value): 

716 """Set a row, or set of rows, in the array. 

717 

718 It takes different actions depending on the type of the *key* 

719 parameter: if it is an integer, the corresponding table row is 

720 set to *value* (a record or sequence capable of being converted 

721 to the table structure). If *key* is a slice, the row slice 

722 determined by it is set to *value* (a record array or sequence 

723 of rows capable of being converted to the table structure). 

724 

725 In addition, NumPy-style point selections are supported. In 

726 particular, if key is a list of row coordinates, the set of rows 

727 determined by it is set to value. Furthermore, if key is an array of 

728 boolean values, only the coordinates where key is True are set to 

729 values from value. Note that for the latter to work it is necessary 

730 that key list would contain exactly as many rows as the table has. 

731 

732 .. note:: 

733 

734 When updating the rows of a VLArray object which uses a 

735 pseudo-atom, there is a problem: you can only update values 

736 with *exactly* the same size in bytes than the original row. 

737 This is very difficult to meet with object pseudo-atoms, 

738 because :mod:`pickle` applied on a Python object does not 

739 guarantee to return the same number of bytes than over another 

740 object, even if they are of the same class. 

741 This effectively limits the kinds of objects than can be 

742 updated in variable-length arrays. 

743 

744 Examples 

745 -------- 

746 

747 :: 

748 

749 vlarray[0] = vlarray[0] * 2 + 3 

750 vlarray[99] = arange(96) * 2 + 3 

751 

752 # Negative values for the index are supported. 

753 vlarray[-99] = vlarray[5] * 2 + 3 

754 vlarray[1:30:2] = list_of_rows 

755 vlarray[[1,3]] = new_1_and_3_rows 

756 

757 """ 

758 

759 self._g_check_open() 

760 self._v_file._check_writable() 

761 

762 if is_idx(key): 

763 # If key is not a sequence, convert to it 

764 coords = [key] 

765 value = [value] 

766 elif isinstance(key, slice): 

767 start, stop, step = self._process_range( 

768 key.start, key.stop, key.step) 

769 coords = range(start, stop, step) 

770 # Try with a boolean or point selection 

771 elif type(key) in (list, tuple) or isinstance(key, np.ndarray): 

772 coords = self._point_selection(key) 

773 else: 

774 raise IndexError(f"Invalid index or slice: {key!r}") 

775 

776 # Do the assignment row by row 

777 self._assign_values(coords, value) 

778 

779 # Accessor for the _read_array method in superclass 

780 def read(self, start=None, stop=None, step=1): 

781 """Get data in the array as a list of objects of the current flavor. 

782 

783 Please note that, as the lengths of the different rows are variable, 

784 the returned value is a *Python list* (not an array of the current 

785 flavor), with as many entries as specified rows in the range 

786 parameters. 

787 

788 The start, stop and step parameters can be used to select only a 

789 *range of rows* in the array. Their meanings are the same as in 

790 the built-in range() Python function, except that negative values 

791 of step are not allowed yet. Moreover, if only start is specified, 

792 then stop will be set to start + 1. If you do not specify neither 

793 start nor stop, then *all the rows* in the array are selected. 

794 

795 """ 

796 

797 self._g_check_open() 

798 start, stop, step = self._process_range_read(start, stop, step) 

799 if start == stop: 

800 listarr = [] 

801 else: 

802 listarr = self._read_array(start, stop, step) 

803 

804 atom = self.atom 

805 if not hasattr(atom, 'size'): # it is a pseudo-atom 

806 outlistarr = [atom.fromarray(arr) for arr in listarr] 

807 else: 

808 # Convert the list to the right flavor 

809 flavor = self.flavor 

810 outlistarr = [internal_to_flavor(arr, flavor) for arr in listarr] 

811 return outlistarr 

812 

813 def _read_coordinates(self, coords): 

814 """Read rows specified in `coords`.""" 

815 rows = [] 

816 for coord in coords: 

817 rows.append(self.read(int(coord), int(coord) + 1, 1)[0]) 

818 return rows 

819 

820 def _g_copy_with_stats(self, group, name, start, stop, step, 

821 title, filters, chunkshape, _log, **kwargs): 

822 """Private part of Leaf.copy() for each kind of leaf.""" 

823 

824 # Build the new VLArray object 

825 object = VLArray( 

826 group, name, self.atom, title=title, filters=filters, 

827 expectedrows=self._v_expectedrows, chunkshape=chunkshape, 

828 _log=_log) 

829 

830 # Now, fill the new vlarray with values from the old one 

831 # This is not buffered because we cannot forsee the length 

832 # of each record. So, the safest would be a copy row by row. 

833 # In the future, some analysis can be done in order to buffer 

834 # the copy process. 

835 nrowsinbuf = 1 

836 (start, stop, step) = self._process_range_read(start, stop, step) 

837 # Optimized version (no conversions, no type and shape checks, etc...) 

838 nrowscopied = SizeType(0) 

839 nbytes = 0 

840 if not hasattr(self.atom, 'size'): # it is a pseudo-atom 

841 atomsize = self.atom.base.size 

842 else: 

843 atomsize = self.atom.size 

844 for start2 in range(start, stop, step * nrowsinbuf): 

845 # Save the records on disk 

846 stop2 = start2 + step * nrowsinbuf 

847 if stop2 > stop: 

848 stop2 = stop 

849 nparr = self._read_array(start=start2, stop=stop2, step=step)[0] 

850 nobjects = nparr.shape[0] 

851 object._append(nparr, nobjects) 

852 nbytes += nobjects * atomsize 

853 nrowscopied += 1 

854 object.nrows = nrowscopied 

855 return (object, nbytes) 

856 

857 def __repr__(self): 

858 """This provides more metainfo in addition to standard __str__""" 

859 

860 return f"""{self} 

861 atom = {self.atom!r} 

862 byteorder = {self.byteorder!r} 

863 nrows = {self.nrows} 

864 flavor = {self.flavor!r}"""