Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/leaf.py: 22%

279 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Here is defined the Leaf class.""" 

2 

3import warnings 

4import math 

5 

6import numpy as np 

7try: 

8 import cpuinfo 

9 cpu_info = cpuinfo.get_cpu_info() 

10except ImportError: 

11 cpu_info = {} 

12 

13from .flavor import (check_flavor, internal_flavor, toarray, 

14 alias_map as flavor_alias_map) 

15from .node import Node 

16from .filters import Filters 

17from .utils import byteorders, lazyattr, SizeType 

18from .exceptions import PerformanceWarning 

19 

20 

21def csformula(expected_mb): 

22 """Return the fitted chunksize for expected_mb.""" 

23 

24 # For a basesize of 8 KB, this will return: 

25 # 8 KB for datasets <= 1 MB 

26 # 1 MB for datasets >= 10 TB 

27 basesize = 8 * 1024 # 8 KB is a good minimum 

28 return basesize * int(2**math.log10(expected_mb)) 

29 

30 

31def limit_es(expected_mb): 

32 """Protection against creating too small or too large chunks.""" 

33 

34 if expected_mb < 1: # < 1 MB 

35 expected_mb = 1 

36 elif expected_mb > 10**7: # > 10 TB 

37 expected_mb = 10**7 

38 return expected_mb 

39 

40 

41def calc_chunksize(expected_mb): 

42 """Compute the optimum HDF5 chunksize for I/O purposes. 

43 

44 Rational: HDF5 takes the data in bunches of chunksize length to 

45 write the on disk. A BTree in memory is used to map structures on 

46 disk. The more chunks that are allocated for a dataset the larger 

47 the B-tree. Large B-trees take memory and causes file storage 

48 overhead as well as more disk I/O and higher contention for the meta 

49 data cache. You have to balance between memory and I/O overhead 

50 (small B-trees) and time to access to data (big B-trees). 

51 

52 The tuning of the chunksize parameter affects the performance and 

53 the memory consumed. This is based on my own experiments and, as 

54 always, your mileage may vary. 

55 

56 """ 

57 

58 expected_mb = limit_es(expected_mb) 

59 zone = int(math.log10(expected_mb)) 

60 expected_mb = 10**zone 

61 chunksize = csformula(expected_mb) 

62 # XXX: Multiply by 8 seems optimal for sequential access 

63 return chunksize * 8 

64 

65 

66class Leaf(Node): 

67 """Abstract base class for all PyTables leaves. 

68 

69 A leaf is a node (see the Node class in :class:`Node`) which hangs from a 

70 group (see the Group class in :class:`Group`) but, unlike a group, it can 

71 not have any further children below it (i.e. it is an end node). 

72 

73 This definition includes all nodes which contain actual data (datasets 

74 handled by the Table - see :ref:`TableClassDescr`, Array - 

75 see :ref:`ArrayClassDescr`, CArray - see :ref:`CArrayClassDescr`, EArray - 

76 see :ref:`EArrayClassDescr`, and VLArray - see :ref:`VLArrayClassDescr` 

77 classes) and unsupported nodes (the UnImplemented 

78 class - :ref:`UnImplementedClassDescr`) these classes do in fact inherit 

79 from Leaf. 

80 

81 

82 .. rubric:: Leaf attributes 

83 

84 These instance variables are provided in addition to those in Node 

85 (see :ref:`NodeClassDescr`): 

86 

87 .. attribute:: byteorder 

88 

89 The byte ordering of the leaf data *on disk*. It will be either 

90 ``little`` or ``big``. 

91 

92 .. attribute:: dtype 

93 

94 The NumPy dtype that most closely matches this leaf type. 

95 

96 .. attribute:: extdim 

97 

98 The index of the enlargeable dimension (-1 if none). 

99 

100 .. attribute:: nrows 

101 

102 The length of the main dimension of the leaf data. 

103 

104 .. attribute:: nrowsinbuf 

105 

106 The number of rows that fit in internal input buffers. 

107 

108 You can change this to fine-tune the speed or memory 

109 requirements of your application. 

110 

111 .. attribute:: shape 

112 

113 The shape of data in the leaf. 

114 

115 """ 

116 

117 # These are a little hard to override, but so are properties. 

118 attrs = Node._v_attrs 

119 """The associated AttributeSet instance - see :ref:`AttributeSetClassDescr` 

120 (This is an easier-to-write alias of :attr:`Node._v_attrs`.""" 

121 title = Node._v_title 

122 """A description for this node 

123 (This is an easier-to-write alias of :attr:`Node._v_title`).""" 

124 

125 @property 

126 def name(self): 

127 """The name of this node in its parent group (This is an 

128 easier-to-write alias of :attr:`Node._v_name`).""" 

129 return self._v_name 

130 

131 @property 

132 def chunkshape(self): 

133 """The HDF5 chunk size for chunked leaves (a tuple). 

134 

135 This is read-only because you cannot change the chunk size of a 

136 leaf once it has been created. 

137 """ 

138 return getattr(self, '_v_chunkshape', None) 

139 

140 @property 

141 def object_id(self): 

142 """A node identifier, which may change from run to run. 

143 (This is an easier-to-write alias of :attr:`Node._v_objectid`). 

144 

145 .. versionchanged:: 3.0 

146 The *objectID* property has been renamed into *object_id*. 

147 

148 """ 

149 return self._v_objectid 

150 

151 @property 

152 def ndim(self): 

153 """The number of dimensions of the leaf data. 

154 

155 .. versionadded: 2.4""" 

156 return len(self.shape) 

157 

158 @lazyattr 

159 def filters(self): 

160 """Filter properties for this leaf. 

161 

162 See Also 

163 -------- 

164 Filters 

165 

166 """ 

167 

168 return Filters._from_leaf(self) 

169 

170 @property 

171 def track_times(self): 

172 """Whether timestamps for the leaf are recorded 

173 

174 If the leaf is not a dataset, this will fail with HDF5ExtError. 

175 

176 The track times dataset creation property does not seem to 

177 survive closing and reopening as of HDF5 1.8.17. Currently, 

178 it may be more accurate to test whether the ctime for the 

179 dataset is 0: 

180 track_times = (leaf._get_obj_timestamps().ctime == 0) 

181 """ 

182 return self._get_obj_track_times() 

183 

184 @property 

185 def maindim(self): 

186 """The dimension along which iterators work. 

187 

188 Its value is 0 (i.e. the first dimension) when the dataset is not 

189 extendable, and self.extdim (where available) for extendable ones. 

190 """ 

191 

192 if self.extdim < 0: 

193 return 0 # choose the first dimension 

194 return self.extdim 

195 

196 @property 

197 def flavor(self): 

198 """The type of data object read from this leaf. 

199 

200 It can be any of 'numpy' or 'python'. 

201 

202 You can (and are encouraged to) use this property to get, set 

203 and delete the FLAVOR HDF5 attribute of the leaf. When the leaf 

204 has no such attribute, the default flavor is used.. 

205 """ 

206 

207 return self._flavor 

208 

209 @flavor.setter 

210 def flavor(self, flavor): 

211 self._v_file._check_writable() 

212 check_flavor(flavor) 

213 self._v_attrs.FLAVOR = self._flavor = flavor # logs the change 

214 

215 @flavor.deleter 

216 def flavor(self): 

217 del self._v_attrs.FLAVOR 

218 self._flavor = internal_flavor 

219 

220 @property 

221 def size_on_disk(self): 

222 """ 

223 The size of this leaf's data in bytes as it is stored on disk. If the 

224 data is compressed, this shows the compressed size. In the case of 

225 uncompressed, chunked data, this may be slightly larger than the amount 

226 of data, due to partially filled chunks. 

227 """ 

228 return self._get_storage_size() 

229 

230 def __init__(self, parentnode, name, 

231 new=False, filters=None, 

232 byteorder=None, _log=True, 

233 track_times=True): 

234 self._v_new = new 

235 """Is this the first time the node has been created?""" 

236 self.nrowsinbuf = None 

237 """ 

238 The number of rows that fits in internal input buffers. 

239 

240 You can change this to fine-tune the speed or memory 

241 requirements of your application. 

242 """ 

243 self._flavor = None 

244 """Private storage for the `flavor` property.""" 

245 

246 if new: 

247 # Get filter properties from parent group if not given. 

248 if filters is None: 

249 filters = parentnode._v_filters 

250 self.__dict__['filters'] = filters # bypass the property 

251 

252 if byteorder not in (None, 'little', 'big'): 

253 raise ValueError( 

254 "the byteorder can only take 'little' or 'big' values " 

255 "and you passed: %s" % byteorder) 

256 self.byteorder = byteorder 

257 """The byte ordering of the leaf data *on disk*.""" 

258 

259 self._want_track_times = track_times 

260 

261 # Existing filters need not be read since `filters` 

262 # is a lazy property that automatically handles their loading. 

263 

264 super().__init__(parentnode, name, _log) 

265 

266 def __len__(self): 

267 """Return the length of the main dimension of the leaf data. 

268 

269 Please note that this may raise an OverflowError on 32-bit platforms 

270 for datasets having more than 2**31-1 rows. This is a limitation of 

271 Python that you can work around by using the nrows or shape attributes. 

272 

273 """ 

274 

275 return self.nrows 

276 

277 def __str__(self): 

278 """The string representation for this object is its pathname in the 

279 HDF5 object tree plus some additional metainfo.""" 

280 

281 filters = [] 

282 if self.filters.fletcher32: 

283 filters.append("fletcher32") 

284 if self.filters.complevel: 

285 if self.filters.shuffle: 

286 filters.append("shuffle") 

287 if self.filters.bitshuffle: 

288 filters.append("bitshuffle") 

289 filters.append(f"{self.filters.complib}({self.filters.complevel})") 

290 return (f"{self._v_pathname} ({self.__class__.__name__}" 

291 f"{self.shape}{', '.join(filters)}) {self._v_title!r}") 

292 

293 def _g_post_init_hook(self): 

294 """Code to be run after node creation and before creation logging. 

295 

296 This method gets or sets the flavor of the leaf. 

297 

298 """ 

299 

300 super()._g_post_init_hook() 

301 if self._v_new: # set flavor of new node 

302 if self._flavor is None: 

303 self._flavor = internal_flavor 

304 else: # flavor set at creation time, do not log 

305 if self._v_file.params['PYTABLES_SYS_ATTRS']: 

306 self._v_attrs._g__setattr('FLAVOR', self._flavor) 

307 else: # get flavor of existing node (if any) 

308 if self._v_file.params['PYTABLES_SYS_ATTRS']: 

309 flavor = getattr(self._v_attrs, 'FLAVOR', internal_flavor) 

310 self._flavor = flavor_alias_map.get(flavor, flavor) 

311 else: 

312 self._flavor = internal_flavor 

313 

314 def _calc_chunkshape(self, expectedrows, rowsize, itemsize): 

315 """Calculate the shape for the HDF5 chunk.""" 

316 

317 # In case of a scalar shape, return the unit chunksize 

318 if self.shape == (): 

319 return (SizeType(1),) 

320 

321 # Compute the chunksize 

322 MB = 1024 * 1024 

323 expected_mb = (expectedrows * rowsize) // MB 

324 chunksize = calc_chunksize(expected_mb) 

325 complib = self.filters.complib 

326 if (complib is not None and 

327 complib.startswith("blosc2") and 

328 self._c_classid == 'TABLE'): 

329 # Blosc2 can introspect into blocks, so we can increase the 

330 # chunksize for improving HDF5 perf for its internal btree. 

331 # For the time being, this has been implemented efficiently 

332 # just for tables, but in the future *Array objects could also 

333 # be included. 

334 # In Blosc2, the role of HDF5 chunksize could be played by the 

335 # Blosc2 blocksize... 

336 # self._v_blocksize = chunksize 

337 # but let's use the internal machinery in Blosc2 decide the actual 

338 # blocksize. 

339 self._v_blocksize = 0 

340 # Use a decent default value for chunksize 

341 chunksize *= 16 

342 # Now, go explore the L3 size and try to find a smarter chunksize 

343 if 'l3_cache_size' in cpu_info: 

344 # In general, is a good idea to set the chunksize equal to L3 

345 l3_cache_size = cpu_info['l3_cache_size'] 

346 # cpuinfo sometimes returns cache sizes as strings (like, 

347 # "4096 KB"), so refuse the temptation to guess and use the 

348 # value only when it is an actual int. 

349 # Also, sometimes cpuinfo does not return a correct L3 size; 

350 # so in general, enforcing L3 > L2 is a good sanity check. 

351 l2_cache_size = cpu_info.get('l2_cache_size', "Not found") 

352 if (type(l3_cache_size) is int and 

353 type(l2_cache_size) is int and 

354 l3_cache_size > l2_cache_size): 

355 chunksize = l3_cache_size 

356 # In Blosc2, the chunksize cannot be larger than 2 GB - BLOSC2_MAX_BUFFERSIZE 

357 if chunksize > 2**31 - 32: 

358 chunksize = 2**31 - 32 

359 

360 maindim = self.maindim 

361 # Compute the chunknitems 

362 chunknitems = chunksize // itemsize 

363 # Safeguard against itemsizes being extremely large 

364 if chunknitems == 0: 

365 chunknitems = 1 

366 chunkshape = list(self.shape) 

367 # Check whether trimming the main dimension is enough 

368 chunkshape[maindim] = 1 

369 newchunknitems = np.prod(chunkshape, dtype=SizeType) 

370 if newchunknitems <= chunknitems: 

371 chunkshape[maindim] = chunknitems // newchunknitems 

372 else: 

373 # No, so start trimming other dimensions as well 

374 for j in range(len(chunkshape)): 

375 # Check whether trimming this dimension is enough 

376 chunkshape[j] = 1 

377 newchunknitems = np.prod(chunkshape, dtype=SizeType) 

378 if newchunknitems <= chunknitems: 

379 chunkshape[j] = chunknitems // newchunknitems 

380 break 

381 else: 

382 # Ops, we ran out of the loop without a break 

383 # Set the last dimension to chunknitems 

384 chunkshape[-1] = chunknitems 

385 

386 return tuple(SizeType(s) for s in chunkshape) 

387 

388 def _calc_nrowsinbuf(self): 

389 """Calculate the number of rows that fits on a PyTables buffer.""" 

390 

391 params = self._v_file.params 

392 # Compute the nrowsinbuf 

393 rowsize = self.rowsize 

394 buffersize = params['IO_BUFFER_SIZE'] 

395 if rowsize != 0: 

396 nrowsinbuf = buffersize // rowsize 

397 else: 

398 nrowsinbuf = 1 

399 

400 # Safeguard against row sizes being extremely large 

401 if nrowsinbuf == 0: 

402 nrowsinbuf = 1 

403 # If rowsize is too large, issue a Performance warning 

404 maxrowsize = params['BUFFER_TIMES'] * buffersize 

405 if rowsize > maxrowsize: 

406 warnings.warn("""\ 

407The Leaf ``%s`` is exceeding the maximum recommended rowsize (%d bytes); 

408be ready to see PyTables asking for *lots* of memory and possibly slow 

409I/O. You may want to reduce the rowsize by trimming the value of 

410dimensions that are orthogonal (and preferably close) to the *main* 

411dimension of this leave. Alternatively, in case you have specified a 

412very small/large chunksize, you may want to increase/decrease it.""" 

413 % (self._v_pathname, maxrowsize), 

414 PerformanceWarning) 

415 return nrowsinbuf 

416 

417 # This method is appropriate for calls to __getitem__ methods 

418 def _process_range(self, start, stop, step, dim=None, warn_negstep=True): 

419 if dim is None: 

420 nrows = self.nrows # self.shape[self.maindim] 

421 else: 

422 nrows = self.shape[dim] 

423 

424 if warn_negstep and step and step < 0: 

425 raise ValueError("slice step cannot be negative") 

426 

427 # if start is not None: start = long(start) 

428 # if stop is not None: stop = long(stop) 

429 # if step is not None: step = long(step) 

430 

431 return slice(start, stop, step).indices(int(nrows)) 

432 

433 # This method is appropriate for calls to read() methods 

434 def _process_range_read(self, start, stop, step, warn_negstep=True): 

435 nrows = self.nrows 

436 if start is not None and stop is None and step is None: 

437 # Protection against start greater than available records 

438 # nrows == 0 is a special case for empty objects 

439 if 0 < nrows <= start: 

440 raise IndexError("start of range (%s) is greater than " 

441 "number of rows (%s)" % (start, nrows)) 

442 step = 1 

443 if start == -1: # corner case 

444 stop = nrows 

445 else: 

446 stop = start + 1 

447 # Finally, get the correct values (over the main dimension) 

448 start, stop, step = self._process_range(start, stop, step, 

449 warn_negstep=warn_negstep) 

450 return (start, stop, step) 

451 

452 def _g_copy(self, newparent, newname, recursive, _log=True, **kwargs): 

453 # Compute default arguments. 

454 start = kwargs.pop('start', None) 

455 stop = kwargs.pop('stop', None) 

456 step = kwargs.pop('step', None) 

457 title = kwargs.pop('title', self._v_title) 

458 filters = kwargs.pop('filters', self.filters) 

459 chunkshape = kwargs.pop('chunkshape', self.chunkshape) 

460 copyuserattrs = kwargs.pop('copyuserattrs', True) 

461 stats = kwargs.pop('stats', None) 

462 if chunkshape == 'keep': 

463 chunkshape = self.chunkshape # Keep the original chunkshape 

464 elif chunkshape == 'auto': 

465 chunkshape = None # Will recompute chunkshape 

466 

467 # Fix arguments with explicit None values for backwards compatibility. 

468 if title is None: 

469 title = self._v_title 

470 if filters is None: 

471 filters = self.filters 

472 

473 # Create a copy of the object. 

474 (new_node, bytes) = self._g_copy_with_stats( 

475 newparent, newname, start, stop, step, 

476 title, filters, chunkshape, _log, **kwargs) 

477 

478 # Copy user attributes if requested (or the flavor at least). 

479 if copyuserattrs: 

480 self._v_attrs._g_copy(new_node._v_attrs, copyclass=True) 

481 elif 'FLAVOR' in self._v_attrs: 

482 if self._v_file.params['PYTABLES_SYS_ATTRS']: 

483 new_node._v_attrs._g__setattr('FLAVOR', self._flavor) 

484 new_node._flavor = self._flavor # update cached value 

485 

486 # Update statistics if needed. 

487 if stats is not None: 

488 stats['leaves'] += 1 

489 stats['bytes'] += bytes 

490 

491 return new_node 

492 

493 def _g_fix_byteorder_data(self, data, dbyteorder): 

494 """Fix the byteorder of data passed in constructors.""" 

495 dbyteorder = byteorders[dbyteorder] 

496 # If self.byteorder has not been passed as an argument of 

497 # the constructor, then set it to the same value of data. 

498 if self.byteorder is None: 

499 self.byteorder = dbyteorder 

500 # Do an additional in-place byteswap of data if the in-memory 

501 # byteorder doesn't match that of the on-disk. This is the only 

502 # place that we have to do the conversion manually. In all the 

503 # other cases, it will be HDF5 the responsible of doing the 

504 # byteswap properly. 

505 if dbyteorder in ['little', 'big']: 

506 if dbyteorder != self.byteorder: 

507 # if data is not writeable, do a copy first 

508 if not data.flags.writeable: 

509 data = data.copy() 

510 data.byteswap(True) 

511 else: 

512 # Fix the byteorder again, no matter which byteorder have 

513 # specified the user in the constructor. 

514 self.byteorder = "irrelevant" 

515 return data 

516 

517 def _point_selection(self, key): 

518 """Perform a point-wise selection. 

519 

520 `key` can be any of the following items: 

521 

522 * A boolean array with the same shape than self. Those positions 

523 with True values will signal the coordinates to be returned. 

524 

525 * A numpy array (or list or tuple) with the point coordinates. 

526 This has to be a two-dimensional array of size len(self.shape) 

527 by num_elements containing a list of of zero-based values 

528 specifying the coordinates in the dataset of the selected 

529 elements. The order of the element coordinates in the array 

530 specifies the order in which the array elements are iterated 

531 through when I/O is performed. Duplicate coordinate locations 

532 are not checked for. 

533 

534 Return the coordinates array. If this is not possible, raise a 

535 `TypeError` so that the next selection method can be tried out. 

536 

537 This is useful for whatever `Leaf` instance implementing a 

538 point-wise selection. 

539 

540 """ 

541 input_key = key 

542 if type(key) in (list, tuple): 

543 if isinstance(key, tuple) and len(key) > len(self.shape): 

544 raise IndexError(f"Invalid index or slice: {key!r}") 

545 # Try to convert key to a numpy array. If not possible, 

546 # a TypeError will be issued (to be catched later on). 

547 try: 

548 key = toarray(key) 

549 except ValueError: 

550 raise TypeError(f"Invalid index or slice: {key!r}") 

551 elif not isinstance(key, np.ndarray): 

552 raise TypeError(f"Invalid index or slice: {key!r}") 

553 

554 # Protection against empty keys 

555 if len(key) == 0: 

556 return np.array([], dtype="i8") 

557 

558 if key.dtype.kind == 'b': 

559 if not key.shape == self.shape: 

560 raise IndexError( 

561 "Boolean indexing array has incompatible shape") 

562 # Get the True coordinates (64-bit indices!) 

563 coords = np.asarray(key.nonzero(), dtype='i8') 

564 coords = np.transpose(coords) 

565 elif key.dtype.kind == 'i' or key.dtype.kind == 'u': 

566 if len(key.shape) > 2: 

567 raise IndexError( 

568 "Coordinate indexing array has incompatible shape") 

569 elif len(key.shape) == 2: 

570 if key.shape[0] != len(self.shape): 

571 raise IndexError( 

572 "Coordinate indexing array has incompatible shape") 

573 coords = np.asarray(key, dtype="i8") 

574 coords = np.transpose(coords) 

575 else: 

576 # For 1-dimensional datasets 

577 coords = np.asarray(key, dtype="i8") 

578 

579 # handle negative indices 

580 base = coords if coords.base is None else coords.base 

581 if base is input_key: 

582 # never modify the original "key" data 

583 coords = coords.copy() 

584 

585 idx = coords < 0 

586 coords[idx] = (coords + self.shape)[idx] 

587 

588 # bounds check 

589 if np.any(coords < 0) or np.any(coords >= self.shape): 

590 raise IndexError("Index out of bounds") 

591 else: 

592 raise TypeError("Only integer coordinates allowed.") 

593 # We absolutely need a contiguous array 

594 if not coords.flags.contiguous: 

595 coords = coords.copy() 

596 return coords 

597 

598 # Tree manipulation 

599 def remove(self): 

600 """Remove this node from the hierarchy. 

601 

602 This method has the behavior described 

603 in :meth:`Node._f_remove`. Please note that there is no recursive flag 

604 since leaves do not have child nodes. 

605 

606 """ 

607 

608 self._f_remove(False) 

609 

610 def rename(self, newname): 

611 """Rename this node in place. 

612 

613 This method has the behavior described in :meth:`Node._f_rename()`. 

614 

615 """ 

616 

617 self._f_rename(newname) 

618 

619 def move(self, newparent=None, newname=None, 

620 overwrite=False, createparents=False): 

621 """Move or rename this node. 

622 

623 This method has the behavior described in :meth:`Node._f_move` 

624 

625 """ 

626 

627 self._f_move(newparent, newname, overwrite, createparents) 

628 

629 def copy(self, newparent=None, newname=None, 

630 overwrite=False, createparents=False, **kwargs): 

631 """Copy this node and return the new one. 

632 

633 This method has the behavior described in :meth:`Node._f_copy`. Please 

634 note that there is no recursive flag since leaves do not have child 

635 nodes. 

636 

637 .. warning:: 

638 

639 Note that unknown parameters passed to this method will be 

640 ignored, so may want to double check the spelling of these 

641 (i.e. if you write them incorrectly, they will most probably 

642 be ignored). 

643 

644 Parameters 

645 ---------- 

646 title 

647 The new title for the destination. If omitted or None, the original 

648 title is used. 

649 filters : Filters 

650 Specifying this parameter overrides the original filter properties 

651 in the source node. If specified, it must be an instance of the 

652 Filters class (see :ref:`FiltersClassDescr`). The default is to 

653 copy the filter properties from the source node. 

654 copyuserattrs 

655 You can prevent the user attributes from being copied by setting 

656 this parameter to False. The default is to copy them. 

657 start, stop, step : int 

658 Specify the range of rows to be copied; the default is to copy all 

659 the rows. 

660 stats 

661 This argument may be used to collect statistics on the copy 

662 process. When used, it should be a dictionary with keys 'groups', 

663 'leaves' and 'bytes' having a numeric value. Their values will be 

664 incremented to reflect the number of groups, leaves and bytes, 

665 respectively, that have been copied during the operation. 

666 chunkshape 

667 The chunkshape of the new leaf. It supports a couple of special 

668 values. A value of keep means that the chunkshape will be the same 

669 than original leaf (this is the default). A value of auto means 

670 that a new shape will be computed automatically in order to ensure 

671 best performance when accessing the dataset through the main 

672 dimension. Any other value should be an integer or a tuple 

673 matching the dimensions of the leaf. 

674 

675 """ 

676 

677 return self._f_copy( 

678 newparent, newname, overwrite, createparents, **kwargs) 

679 

680 def truncate(self, size): 

681 """Truncate the main dimension to be size rows. 

682 

683 If the main dimension previously was larger than this size, the extra 

684 data is lost. If the main dimension previously was shorter, it is 

685 extended, and the extended part is filled with the default values. 

686 

687 The truncation operation can only be applied to *enlargeable* datasets, 

688 else a TypeError will be raised. 

689 

690 """ 

691 

692 # A non-enlargeable arrays (Array, CArray) cannot be truncated 

693 if self.extdim < 0: 

694 raise TypeError("non-enlargeable datasets cannot be truncated") 

695 self._g_truncate(size) 

696 

697 def isvisible(self): 

698 """Is this node visible? 

699 

700 This method has the behavior described in :meth:`Node._f_isvisible()`. 

701 

702 """ 

703 

704 return self._f_isvisible() 

705 

706 # Attribute handling 

707 def get_attr(self, name): 

708 """Get a PyTables attribute from this node. 

709 

710 This method has the behavior described in :meth:`Node._f_getattr`. 

711 

712 """ 

713 

714 return self._f_getattr(name) 

715 

716 def set_attr(self, name, value): 

717 """Set a PyTables attribute for this node. 

718 

719 This method has the behavior described in :meth:`Node._f_setattr()`. 

720 

721 """ 

722 

723 self._f_setattr(name, value) 

724 

725 def del_attr(self, name): 

726 """Delete a PyTables attribute from this node. 

727 

728 This method has the behavior described in :meth:`Node_f_delAttr`. 

729 

730 """ 

731 

732 self._f_delattr(name) 

733 

734 # Data handling 

735 def flush(self): 

736 """Flush pending data to disk. 

737 

738 Saves whatever remaining buffered data to disk. It also releases 

739 I/O buffers, so if you are filling many datasets in the same 

740 PyTables session, please call flush() extensively so as to help 

741 PyTables to keep memory requirements low. 

742 

743 """ 

744 

745 self._g_flush() 

746 

747 def _f_close(self, flush=True): 

748 """Close this node in the tree. 

749 

750 This method has the behavior described in :meth:`Node._f_close`. 

751 Besides that, the optional argument flush tells whether to flush 

752 pending data to disk or not before closing. 

753 

754 """ 

755 

756 if not self._v_isopen: 

757 return # the node is already closed or not initialized 

758 

759 # Only do a flush in case the leaf has an IO buffer. The 

760 # internal buffers of HDF5 will be flushed afterwards during the 

761 # self._g_close() call. Avoiding an unnecessary flush() 

762 # operation accelerates the closing for the unbuffered leaves. 

763 if flush and hasattr(self, "_v_iobuf"): 

764 self.flush() 

765 

766 # Close the dataset and release resources 

767 self._g_close() 

768 

769 # Close myself as a node. 

770 super()._f_close() 

771 

772 def close(self, flush=True): 

773 """Close this node in the tree. 

774 

775 This method is completely equivalent to :meth:`Leaf._f_close`. 

776 

777 """ 

778 

779 self._f_close(flush)