Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/table.py: 14%

1343 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Here is defined the Table class.""" 

2 

3import functools 

4import math 

5import operator 

6import platform 

7import sys 

8import warnings 

9from pathlib import Path 

10 

11from time import perf_counter as clock 

12 

13import numexpr as ne 

14import numpy as np 

15 

16from . import tableextension 

17from .lrucacheextension import ObjectCache, NumCache 

18from .atom import Atom 

19from .conditions import compile_condition 

20from .flavor import flavor_of, array_as_internal, internal_to_flavor 

21from .utils import is_idx, lazyattr, SizeType, NailedDict as CacheDict 

22from .leaf import Leaf 

23from .description import (IsDescription, Description, Col, descr_from_dtype) 

24from .exceptions import ( 

25 NodeError, HDF5ExtError, PerformanceWarning, OldIndexWarning, 

26 NoSuchNodeError) 

27from .utilsextension import get_nested_field 

28 

29from .path import join_path, split_path 

30from .index import ( 

31 OldIndex, default_index_filters, default_auto_index, Index, IndexesDescG, 

32 IndexesTableG) 

33 

34 

35profile = False 

36# profile = True # Uncomment for profiling 

37if profile: 

38 from .utils import show_stats 

39 

40 

41# 2.2: Added support for complex types. Introduced in version 0.9. 

42# 2.2.1: Added suport for time types. 

43# 2.3: Changed the indexes naming schema. 

44# 2.4: Changed indexes naming schema (again). 

45# 2.5: Added the FIELD_%d_FILL attributes. 

46# 2.6: Added the FLAVOR attribute (optional). 

47# 2.7: Numeric and numarray flavors are gone. 

48obversion = "2.7" # The Table VERSION number 

49 

50 

51# Maps NumPy types to the types used by Numexpr. 

52_nxtype_from_nptype = { 

53 np.bool_: bool, 

54 np.int8: ne.necompiler.int_, 

55 np.int16: ne.necompiler.int_, 

56 np.int32: ne.necompiler.int_, 

57 np.int64: ne.necompiler.long_, 

58 np.uint8: ne.necompiler.int_, 

59 np.uint16: ne.necompiler.int_, 

60 np.uint32: ne.necompiler.long_, 

61 np.uint64: ne.necompiler.long_, 

62 np.float32: float, 

63 np.float64: ne.necompiler.double, 

64 np.complex64: complex, 

65 np.complex128: complex, 

66 np.bytes_: bytes, 

67} 

68 

69_nxtype_from_nptype[np.str_] = str 

70 

71if hasattr(np, 'float16'): 

72 _nxtype_from_nptype[np.float16] = float # XXX: check 

73if hasattr(np, 'float96'): 

74 _nxtype_from_nptype[np.float96] = ne.necompiler.double # XXX: check 

75if hasattr(np, 'float128'): 

76 _nxtype_from_nptype[np.float128] = ne.necompiler.double # XXX: check 

77if hasattr(np, 'complex192'): 

78 _nxtype_from_nptype[np.complex192] = complex # XXX: check 

79if hasattr(np, 'complex256'): 

80 _nxtype_from_nptype[np.complex256] = complex # XXX: check 

81 

82 

83# The NumPy scalar type corresponding to `SizeType`. 

84_npsizetype = np.array(SizeType(0)).dtype.type 

85 

86 

87def _index_name_of(node): 

88 return '_i_%s' % node._v_name 

89 

90 

91def _index_pathname_of(node): 

92 nodeParentPath = split_path(node._v_pathname)[0] 

93 return join_path(nodeParentPath, _index_name_of(node)) 

94 

95 

96def _index_pathname_of_column(table, colpathname): 

97 return join_path(_index_pathname_of(table), colpathname) 

98 

99 

100# The next are versions that work with just paths (i.e. we don't need 

101# a node instance for using them, which can be critical in certain 

102# situations) 

103 

104 

105def _index_name_of_(nodeName): 

106 return '_i_%s' % nodeName 

107 

108 

109def _index_pathname_of_(nodePath): 

110 nodeParentPath, nodeName = split_path(nodePath) 

111 return join_path(nodeParentPath, _index_name_of_(nodeName)) 

112 

113 

114def _index_pathname_of_column_(tablePath, colpathname): 

115 return join_path(_index_pathname_of_(tablePath), colpathname) 

116 

117 

118def restorecache(self): 

119 # Define a cache for sparse table reads 

120 params = self._v_file.params 

121 chunksize = self._v_chunkshape[0] 

122 nslots = params['TABLE_MAX_SIZE'] / (chunksize * self._v_dtype.itemsize) 

123 self._chunkcache = NumCache((nslots, chunksize), self._v_dtype, 

124 'table chunk cache') 

125 self._seqcache = ObjectCache(params['ITERSEQ_MAX_SLOTS'], 

126 params['ITERSEQ_MAX_SIZE'], 

127 'Iter sequence cache') 

128 self._dirtycache = False 

129 

130 

131def _table__where_indexed(self, compiled, condition, condvars, 

132 start, stop, step): 

133 if profile: 

134 tref = clock() 

135 if profile: 

136 show_stats("Entering table_whereIndexed", tref) 

137 self._use_index = True 

138 # Clean the table caches for indexed queries if needed 

139 if self._dirtycache: 

140 restorecache(self) 

141 

142 # Get the values in expression that are not columns 

143 values = [] 

144 for key, value in condvars.items(): 

145 if isinstance(value, np.ndarray): 

146 values.append((key, value.item())) 

147 # Build a key for the sequence cache 

148 seqkey = (condition, tuple(values), (start, stop, step)) 

149 # Do a lookup in sequential cache for this query 

150 nslot = self._seqcache.getslot(seqkey) 

151 if nslot >= 0: 

152 # Get the row sequence from the cache 

153 seq = self._seqcache.getitem(nslot) 

154 if len(seq) == 0: 

155 return iter([]) 

156 # seq is a list. 

157 seq = np.array(seq, dtype='int64') 

158 # Correct the ranges in cached sequence 

159 if (start, stop, step) != (0, self.nrows, 1): 

160 seq = seq[(seq >= start) & ( 

161 seq < stop) & ((seq - start) % step == 0)] 

162 return self.itersequence(seq) 

163 else: 

164 # No luck. self._seqcache will be populated 

165 # in the iterator if possible. (Row._finish_riterator) 

166 self._seqcache_key = seqkey 

167 

168 # Compute the chunkmap for every index in indexed expression 

169 idxexprs = compiled.index_expressions 

170 strexpr = compiled.string_expression 

171 cmvars = {} 

172 tcoords = 0 

173 for i, idxexpr in enumerate(idxexprs): 

174 var, ops, lims = idxexpr 

175 col = condvars[var] 

176 index = col.index 

177 assert index is not None, "the chosen column is not indexed" 

178 assert not index.dirty, "the chosen column has a dirty index" 

179 

180 # Get the number of rows that the indexed condition yields. 

181 range_ = index.get_lookup_range(ops, lims) 

182 ncoords = index.search(range_) 

183 tcoords += ncoords 

184 if index.reduction == 1 and ncoords == 0: 

185 # No values from index condition, thus the chunkmap should be empty 

186 nrowsinchunk = self.chunkshape[0] 

187 nchunks = math.ceil(self.nrows / nrowsinchunk) 

188 chunkmap = np.zeros(shape=nchunks, dtype="bool") 

189 else: 

190 # Get the chunkmap from the index 

191 chunkmap = index.get_chunkmap() 

192 # Assign the chunkmap to the cmvars dictionary 

193 cmvars["e%d" % i] = chunkmap 

194 

195 if index.reduction == 1 and tcoords == 0: 

196 # No candidates found in any indexed expression component, so leave now 

197 self._seqcache.setitem(seqkey, [], 1) 

198 return iter([]) 

199 

200 # Compute the final chunkmap 

201 chunkmap = ne.evaluate(strexpr, cmvars) 

202 if not chunkmap.any(): 

203 # The chunkmap is all False, so the result is empty 

204 self._seqcache.setitem(seqkey, [], 1) 

205 return iter([]) 

206 

207 if profile: 

208 show_stats("Exiting table_whereIndexed", tref) 

209 return chunkmap 

210 

211 

212def create_indexes_table(table): 

213 itgroup = IndexesTableG( 

214 table._v_parent, _index_name_of(table), 

215 "Indexes container for table " + table._v_pathname, new=True) 

216 return itgroup 

217 

218 

219def create_indexes_descr(igroup, dname, iname, filters): 

220 idgroup = IndexesDescG( 

221 igroup, iname, 

222 "Indexes container for sub-description " + dname, 

223 filters=filters, new=True) 

224 return idgroup 

225 

226 

227def _column__create_index(self, optlevel, kind, filters, tmp_dir, 

228 blocksizes, verbose): 

229 name = self.name 

230 table = self.table 

231 dtype = self.dtype 

232 descr = self.descr 

233 index = self.index 

234 get_node = table._v_file._get_node 

235 

236 # Warn if the index already exists 

237 if index: 

238 raise ValueError("%s for column '%s' already exists. If you want to " 

239 "re-create it, please, try with reindex() method " 

240 "better" % (str(index), str(self.pathname))) 

241 

242 # Check that the datatype is indexable. 

243 if dtype.str[1:] == 'u8': 

244 raise NotImplementedError( 

245 "indexing 64-bit unsigned integer columns " 

246 "is not supported yet, sorry") 

247 if dtype.kind == 'c': 

248 raise TypeError("complex columns can not be indexed") 

249 if dtype.shape != (): 

250 raise TypeError("multidimensional columns can not be indexed") 

251 

252 # Get the indexes group for table, and if not exists, create it 

253 try: 

254 itgroup = get_node(_index_pathname_of(table)) 

255 except NoSuchNodeError: 

256 itgroup = create_indexes_table(table) 

257 

258 # Create the necessary intermediate groups for descriptors 

259 idgroup = itgroup 

260 dname = "" 

261 pathname = descr._v_pathname 

262 if pathname != '': 

263 inames = pathname.split('/') 

264 for iname in inames: 

265 if dname == '': 

266 dname = iname 

267 else: 

268 dname += '/' + iname 

269 try: 

270 idgroup = get_node(f'{itgroup._v_pathname}/{dname}') 

271 except NoSuchNodeError: 

272 idgroup = create_indexes_descr(idgroup, dname, iname, filters) 

273 

274 # Create the atom 

275 assert dtype.shape == () 

276 atom = Atom.from_dtype(np.dtype((dtype, (0,)))) 

277 

278 # Protection on tables larger than the expected rows (perhaps the 

279 # user forgot to pass this parameter to the Table constructor?) 

280 expectedrows = table._v_expectedrows 

281 if table.nrows > expectedrows: 

282 expectedrows = table.nrows 

283 

284 # Create the index itself 

285 index = Index( 

286 idgroup, name, atom=atom, 

287 title="Index for %s column" % name, 

288 kind=kind, 

289 optlevel=optlevel, 

290 filters=filters, 

291 tmp_dir=tmp_dir, 

292 expectedrows=expectedrows, 

293 byteorder=table.byteorder, 

294 blocksizes=blocksizes) 

295 

296 table._set_column_indexing(self.pathname, True) 

297 

298 # Feed the index with values 

299 

300 # Add rows to the index if necessary 

301 if table.nrows > 0: 

302 indexedrows = table._add_rows_to_index( 

303 self.pathname, 0, table.nrows, lastrow=True, update=False) 

304 else: 

305 indexedrows = 0 

306 index.dirty = False 

307 table._indexedrows = indexedrows 

308 table._unsaved_indexedrows = table.nrows - indexedrows 

309 

310 # Optimize the index that has been already filled-up 

311 index.optimize(verbose=verbose) 

312 

313 # We cannot do a flush here because when reindexing during a 

314 # flush, the indexes are created anew, and that creates a nested 

315 # call to flush(). 

316 # table.flush() 

317 

318 return indexedrows 

319 

320 

321class _ColIndexes(dict): 

322 """Provides a nice representation of column indexes.""" 

323 

324 def __repr__(self): 

325 """Gives a detailed Description column representation.""" 

326 

327 rep = [f' \"{k}\": {v}' for k, v in self.items()] 

328 return '{\n %s}' % (',\n '.join(rep)) 

329 

330 

331class Table(tableextension.Table, Leaf): 

332 """This class represents heterogeneous datasets in an HDF5 file. 

333 

334 Tables are leaves (see the Leaf class in :ref:`LeafClassDescr`) whose data 

335 consists of a unidimensional sequence of *rows*, where each row contains 

336 one or more *fields*. Fields have an associated unique *name* and 

337 *position*, with the first field having position 0. All rows have the same 

338 fields, which are arranged in *columns*. 

339 

340 Fields can have any type supported by the Col class (see 

341 :ref:`ColClassDescr`) and its descendants, which support multidimensional 

342 data. Moreover, a field can be *nested* (to an arbitrary depth), meaning 

343 that it includes further fields inside. A field named x inside a nested 

344 field a in a table can be accessed as the field a/x (its *path name*) from 

345 the table. 

346 

347 The structure of a table is declared by its description, which is made 

348 available in the Table.description attribute (see :class:`Table`). 

349 

350 This class provides new methods to read, write and search table data 

351 efficiently. It also provides special Python methods to allow accessing 

352 the table as a normal sequence or array (with extended slicing supported). 

353 

354 PyTables supports *in-kernel* searches working simultaneously on several 

355 columns using complex conditions. These are faster than selections using 

356 Python expressions. See the :meth:`Table.where` method for more 

357 information on in-kernel searches. 

358 

359 Non-nested columns can be *indexed*. Searching an indexed column can be 

360 several times faster than searching a non-nested one. Search methods 

361 automatically take advantage of indexing where available. 

362 

363 When iterating a table, an object from the Row (see :ref:`RowClassDescr`) 

364 class is used. This object allows to read and write data one row at a 

365 time, as well as to perform queries which are not supported by in-kernel 

366 syntax (at a much lower speed, of course). 

367 

368 Objects of this class support access to individual columns via *natural 

369 naming* through the :attr:`Table.cols` accessor. Nested columns are 

370 mapped to Cols instances, and non-nested ones to Column instances. 

371 See the Column class in :ref:`ColumnClassDescr` for examples of this 

372 feature. 

373 

374 Parameters 

375 ---------- 

376 parentnode 

377 The parent :class:`Group` object. 

378 

379 .. versionchanged:: 3.0 

380 Renamed from *parentNode* to *parentnode*. 

381 

382 name : str 

383 The name of this node in its parent group. 

384 description 

385 An IsDescription subclass or a dictionary where the keys are the field 

386 names, and the values the type definitions. In addition, a pure NumPy 

387 dtype is accepted. If None, the table metadata is read from disk, 

388 else, it's taken from previous parameters. 

389 title 

390 Sets a TITLE attribute on the HDF5 table entity. 

391 filters : Filters 

392 An instance of the Filters class that provides information about the 

393 desired I/O filters to be applied during the life of this object. 

394 expectedrows 

395 A user estimate about the number of rows that will be on table. If not 

396 provided, the default value is ``EXPECTED_ROWS_TABLE`` (see 

397 ``tables/parameters.py``). If you plan to save bigger tables, try 

398 providing a guess; this will optimize the HDF5 B-Tree creation and 

399 management process time and memory used. 

400 chunkshape 

401 The shape of the data chunk to be read or written as a single HDF5 I/O 

402 operation. The filters are applied to those chunks of data. Its rank 

403 for tables has to be 1. If ``None``, a sensible value is calculated 

404 based on the `expectedrows` parameter (which is recommended). 

405 byteorder 

406 The byteorder of the data *on-disk*, specified as 'little' or 'big'. If 

407 this is not specified, the byteorder is that of the platform, unless 

408 you passed a recarray as the `description`, in which case the recarray 

409 byteorder will be chosen. 

410 track_times 

411 Whether time data associated with the leaf are recorded (object 

412 access time, raw data modification time, metadata change time, object 

413 birth time); default True. Semantics of these times depend on their 

414 implementation in the HDF5 library: refer to documentation of the 

415 H5O_info_t data structure. As of HDF5 1.8.15, only ctime (metadata 

416 change time) is implemented. 

417 

418 .. versionadded:: 3.4.3 

419 

420 Notes 

421 ----- 

422 The instance variables below are provided in addition to those in 

423 Leaf (see :ref:`LeafClassDescr`). Please note that there are several 

424 col* dictionaries to ease retrieving information about a column 

425 directly by its path name, avoiding the need to walk through 

426 Table.description or Table.cols. 

427 

428 

429 .. rubric:: Table attributes 

430 

431 .. attribute:: coldescrs 

432 

433 Maps the name of a column to its Col description (see 

434 :ref:`ColClassDescr`). 

435 

436 .. attribute:: coldflts 

437 

438 Maps the name of a column to its default value. 

439 

440 .. attribute:: coldtypes 

441 

442 Maps the name of a column to its NumPy data type. 

443 

444 .. attribute:: colindexed 

445 

446 Is the column which name is used as a key indexed? 

447 

448 .. attribute:: colinstances 

449 

450 Maps the name of a column to its Column (see 

451 :ref:`ColumnClassDescr`) or Cols (see :ref:`ColsClassDescr`) 

452 instance. 

453 

454 .. attribute:: colnames 

455 

456 A list containing the names of *top-level* columns in the table. 

457 

458 .. attribute:: colpathnames 

459 

460 A list containing the pathnames of *bottom-level* columns in 

461 the table. 

462 

463 These are the leaf columns obtained when walking the table 

464 description left-to-right, bottom-first. Columns inside a 

465 nested column have slashes (/) separating name components in 

466 their pathname. 

467 

468 .. attribute:: cols 

469 

470 A Cols instance that provides *natural naming* access to 

471 non-nested (Column, see :ref:`ColumnClassDescr`) and nested 

472 (Cols, see :ref:`ColsClassDescr`) columns. 

473 

474 .. attribute:: coltypes 

475 

476 Maps the name of a column to its PyTables data type. 

477 

478 .. attribute:: description 

479 

480 A Description instance (see :ref:`DescriptionClassDescr`) 

481 reflecting the structure of the table. 

482 

483 .. attribute:: extdim 

484 

485 The index of the enlargeable dimension (always 0 for tables). 

486 

487 .. attribute:: indexed 

488 

489 Does this table have any indexed columns? 

490 

491 .. attribute:: nrows 

492 

493 The current number of rows in the table. 

494 

495 """ 

496 

497 # Class identifier. 

498 _c_classid = 'TABLE' 

499 

500 @lazyattr 

501 def row(self): 

502 """The associated Row instance (see :ref:`RowClassDescr`).""" 

503 

504 return tableextension.Row(self) 

505 

506 @lazyattr 

507 def dtype(self): 

508 """The NumPy ``dtype`` that most closely matches this table.""" 

509 

510 return self.description._v_dtype 

511 

512 @property 

513 def shape(self): 

514 """The shape of this table.""" 

515 return (self.nrows,) 

516 

517 @property 

518 def rowsize(self): 

519 """The size in bytes of each row in the table.""" 

520 return self.description._v_dtype.itemsize 

521 

522 @property 

523 def size_in_memory(self): 

524 """The size of this table's data in bytes when it is fully loaded into 

525 memory. This may be used in combination with size_on_disk to calculate 

526 the compression ratio of the data.""" 

527 return self.nrows * self.rowsize 

528 

529 @lazyattr 

530 def _v_iobuf(self): 

531 """A buffer for doing I/O.""" 

532 

533 return self._get_container(self.nrowsinbuf) 

534 

535 @lazyattr 

536 def _v_wdflts(self): 

537 """The defaults for writing in recarray format.""" 

538 

539 # First, do a check to see whether we need to set default values 

540 # different from 0 or not. 

541 for coldflt in self.coldflts.values(): 

542 if isinstance(coldflt, np.ndarray) or coldflt: 

543 break 

544 else: 

545 # No default different from 0 found. Returning None. 

546 return None 

547 wdflts = self._get_container(1) 

548 for colname, coldflt in self.coldflts.items(): 

549 ra = get_nested_field(wdflts, colname) 

550 ra[:] = coldflt 

551 return wdflts 

552 

553 @lazyattr 

554 def _colunaligned(self): 

555 """The pathnames of unaligned, *unidimensional* columns.""" 

556 colunaligned, rarr = [], self._get_container(0) 

557 for colpathname in self.colpathnames: 

558 carr = get_nested_field(rarr, colpathname) 

559 if not carr.flags.aligned and carr.ndim == 1: 

560 colunaligned.append(colpathname) 

561 return frozenset(colunaligned) 

562 

563 # **************** WARNING! *********************** 

564 # This function can be called during the destruction time of a table 

565 # so measures have been taken so that it doesn't have to revive 

566 # another node (which can fool the LRU cache). The solution devised 

567 # has been to add a cache for autoindex (Table._autoindex), populate 

568 # it in creation time of the cache (which is a safe period) and then 

569 # update the cache whenever it changes. 

570 # This solves the error when running test_indexes.py ManyNodesTestCase. 

571 # F. Alted 2007-04-20 

572 # ************************************************** 

573 

574 @property 

575 def autoindex(self): 

576 """Automatically keep column indexes up to date? 

577 

578 Setting this value states whether existing indexes should be 

579 automatically updated after an append operation or recomputed 

580 after an index-invalidating operation (i.e. removal and 

581 modification of rows). The default is true. 

582 

583 This value gets into effect whenever a column is altered. If you 

584 don't have automatic indexing activated and you want to do an an 

585 immediate update use `Table.flush_rows_to_index()`; for an immediate 

586 reindexing of invalidated indexes, use `Table.reindex_dirty()`. 

587 

588 This value is persistent. 

589 

590 .. versionchanged:: 3.0 

591 The *autoIndex* property has been renamed into *autoindex*. 

592 """ 

593 

594 if self._autoindex is None: 

595 try: 

596 indexgroup = self._v_file._get_node(_index_pathname_of(self)) 

597 except NoSuchNodeError: 

598 self._autoindex = default_auto_index # update cache 

599 return self._autoindex 

600 else: 

601 self._autoindex = indexgroup.auto # update cache 

602 return self._autoindex 

603 else: 

604 # The value is in cache, return it 

605 return self._autoindex 

606 

607 @autoindex.setter 

608 def autoindex(self, auto): 

609 auto = bool(auto) 

610 try: 

611 indexgroup = self._v_file._get_node(_index_pathname_of(self)) 

612 except NoSuchNodeError: 

613 indexgroup = create_indexes_table(self) 

614 indexgroup.auto = auto 

615 # Update the cache in table instance as well 

616 self._autoindex = auto 

617 

618 @property 

619 def indexedcolpathnames(self): 

620 """List of pathnames of indexed columns in the table.""" 

621 return [_colpname 

622 for _colpname in self.colpathnames 

623 if self.colindexed[_colpname]] 

624 

625 @property 

626 def colindexes(self): 

627 """A dictionary with the indexes of the indexed columns.""" 

628 return _ColIndexes((_colpname, self.cols._f_col(_colpname).index) 

629 for _colpname in self.colpathnames 

630 if self.colindexed[_colpname]) 

631 

632 @property 

633 def _dirtyindexes(self): 

634 """Whether some index in table is dirty.""" 

635 return self._condition_cache._nailcount > 0 

636 

637 def __init__(self, parentnode, name, 

638 description=None, title="", filters=None, 

639 expectedrows=None, chunkshape=None, 

640 byteorder=None, _log=True, track_times=True): 

641 

642 self._v_new = new = description is not None 

643 """Is this the first time the node has been created?""" 

644 self._v_new_title = title 

645 """New title for this node.""" 

646 self._v_new_filters = filters 

647 """New filter properties for this node.""" 

648 self.extdim = 0 # Tables only have one dimension currently 

649 """The index of the enlargeable dimension (always 0 for tables).""" 

650 self._v_recarray = None 

651 """A structured array to be stored in the table.""" 

652 self._rabyteorder = None 

653 """The computed byteorder of the self._v_recarray.""" 

654 if expectedrows is None: 

655 expectedrows = parentnode._v_file.params['EXPECTED_ROWS_TABLE'] 

656 self._v_expectedrows = expectedrows 

657 """The expected number of rows to be stored in the table.""" 

658 self.nrows = SizeType(0) 

659 """The current number of rows in the table.""" 

660 self.description = None 

661 """A Description instance (see :ref:`DescriptionClassDescr`) 

662 reflecting the structure of the table.""" 

663 self._time64colnames = [] 

664 """The names of ``Time64`` columns.""" 

665 self._strcolnames = [] 

666 """The names of ``String`` columns.""" 

667 self._colenums = {} 

668 """Maps the name of an enumerated column to its ``Enum`` instance.""" 

669 self._v_chunkshape = None 

670 """Private storage for the `chunkshape` property of the leaf.""" 

671 

672 self.indexed = False 

673 """Does this table have any indexed columns?""" 

674 self._indexedrows = 0 

675 """Number of rows indexed in disk.""" 

676 self._unsaved_indexedrows = 0 

677 """Number of rows indexed in memory but still not in disk.""" 

678 self._listoldindexes = [] 

679 """The list of columns with old indexes.""" 

680 self._autoindex = None 

681 """Private variable that caches the value for autoindex.""" 

682 

683 self.colnames = [] 

684 """A list containing the names of *top-level* columns in the table.""" 

685 self.colpathnames = [] 

686 """A list containing the pathnames of *bottom-level* columns in the 

687 table. 

688 

689 These are the leaf columns obtained when walking the 

690 table description left-to-right, bottom-first. Columns inside a 

691 nested column have slashes (/) separating name components in 

692 their pathname. 

693 """ 

694 self.colinstances = {} 

695 """Maps the name of a column to its Column (see 

696 :ref:`ColumnClassDescr`) or Cols (see :ref:`ColsClassDescr`) 

697 instance.""" 

698 self.coldescrs = {} 

699 """Maps the name of a column to its Col description (see 

700 :ref:`ColClassDescr`).""" 

701 self.coltypes = {} 

702 """Maps the name of a column to its PyTables data type.""" 

703 self.coldtypes = {} 

704 """Maps the name of a column to its NumPy data type.""" 

705 self.coldflts = {} 

706 """Maps the name of a column to its default value.""" 

707 self.colindexed = {} 

708 """Is the column which name is used as a key indexed?""" 

709 

710 self._use_index = False 

711 """Whether an index can be used or not in a search. Boolean.""" 

712 self._where_condition = None 

713 """Condition function and argument list for selection of values.""" 

714 self._seqcache_key = None 

715 """The key under which to save a query's results (list of row indexes) 

716 or None to not save.""" 

717 max_slots = parentnode._v_file.params['COND_CACHE_SLOTS'] 

718 self._condition_cache = CacheDict(max_slots) 

719 """Cache of already compiled conditions.""" 

720 self._exprvars_cache = {} 

721 """Cache of variables participating in numexpr expressions.""" 

722 self._enabled_indexing_in_queries = True 

723 """Is indexing enabled in queries? *Use only for testing.*""" 

724 self._empty_array_cache = {} 

725 """Cache of empty arrays.""" 

726 

727 self._v_dtype = None 

728 """The NumPy datatype fopr this table.""" 

729 self.cols = None 

730 """ 

731 A Cols instance that provides *natural naming* access to non-nested 

732 (Column, see :ref:`ColumnClassDescr`) and nested (Cols, see 

733 :ref:`ColsClassDescr`) columns. 

734 """ 

735 self._dirtycache = True 

736 """Whether the data caches are dirty or not. Initially set to yes.""" 

737 self._descflavor = None 

738 """Temporarily keeps the flavor of a description with data.""" 

739 

740 # Initialize this object in case is a new Table 

741 

742 # Try purely descriptive description objects. 

743 if new and isinstance(description, dict): 

744 # Dictionary case 

745 self.description = Description(description, 

746 ptparams=parentnode._v_file.params) 

747 elif new and (type(description) == type(IsDescription) 

748 and issubclass(description, IsDescription)): 

749 # IsDescription subclass case 

750 descr = description() 

751 self.description = Description(descr.columns, 

752 ptparams=parentnode._v_file.params) 

753 elif new and isinstance(description, Description): 

754 # It is a Description instance already 

755 self.description = description 

756 

757 # No description yet? 

758 if new and self.description is None: 

759 # Try NumPy dtype instances 

760 if isinstance(description, np.dtype): 

761 tup = descr_from_dtype(description, 

762 ptparams=parentnode._v_file.params) 

763 self.description, self._rabyteorder = tup 

764 

765 # No description yet? 

766 if new and self.description is None: 

767 # Try structured array description objects. 

768 try: 

769 self._descflavor = flavor = flavor_of(description) 

770 except TypeError: # probably not an array 

771 pass 

772 else: 

773 if flavor == 'python': 

774 nparray = np.rec.array(description) 

775 else: 

776 nparray = array_as_internal(description, flavor) 

777 self.nrows = nrows = SizeType(nparray.size) 

778 # If `self._v_recarray` is set, it will be used as the 

779 # initial buffer. 

780 if nrows > 0: 

781 self._v_recarray = nparray 

782 tup = descr_from_dtype(nparray.dtype, 

783 ptparams=parentnode._v_file.params) 

784 self.description, self._rabyteorder = tup 

785 

786 # No description yet? 

787 if new and self.description is None: 

788 raise TypeError( 

789 "the ``description`` argument is not of a supported type: " 

790 "``IsDescription`` subclass, ``Description`` instance, " 

791 "dictionary, or structured array") 

792 

793 # Check the chunkshape parameter 

794 if new and chunkshape is not None: 

795 if isinstance(chunkshape, (int, np.integer)): 

796 chunkshape = (chunkshape,) 

797 try: 

798 chunkshape = tuple(chunkshape) 

799 except TypeError: 

800 raise TypeError( 

801 "`chunkshape` parameter must be an integer or sequence " 

802 "and you passed a %s" % type(chunkshape)) 

803 if len(chunkshape) != 1: 

804 raise ValueError("`chunkshape` rank (length) must be 1: %r" 

805 % (chunkshape,)) 

806 self._v_chunkshape = tuple(SizeType(s) for s in chunkshape) 

807 

808 super().__init__(parentnode, name, new, filters, byteorder, _log, 

809 track_times) 

810 

811 def _g_post_init_hook(self): 

812 # We are putting here the index-related issues 

813 # as well as filling general info for table 

814 # This is needed because we need first the index objects created 

815 

816 # First, get back the flavor of input data (if any) for 

817 # `Leaf._g_post_init_hook()`. 

818 self._flavor, self._descflavor = self._descflavor, None 

819 super()._g_post_init_hook() 

820 

821 self.blosc2_support_write = ( 

822 (self.byteorder == sys.byteorder) and 

823 (self.filters.complib != None) and 

824 (self.filters.complib.startswith("blosc2"))) 

825 # For reading, Windows does not support re-opening a file twice 

826 # in not read-only mode (for good reason), so we cannot use the 

827 # blosc2 opt 

828 self.blosc2_support_read = ( 

829 self.blosc2_support_write and 

830 ((platform.system().lower() != 'windows') or 

831 ((self._v_file.mode == 'r'))) 

832 ) 

833 

834 # Create a cols accessor. 

835 self.cols = Cols(self, self.description) 

836 

837 # Place the `Cols` and `Column` objects into `self.colinstances`. 

838 colinstances, cols = self.colinstances, self.cols 

839 for colpathname in self.description._v_pathnames: 

840 colinstances[colpathname] = cols._g_col(colpathname) 

841 

842 if self._v_new: 

843 # Columns are never indexed on creation. 

844 self.colindexed = {cpn: False for cpn in self.colpathnames} 

845 return 

846 

847 # The following code is only for opened tables. 

848 

849 # Do the indexes group exist? 

850 indexesgrouppath = _index_pathname_of(self) 

851 igroup = indexesgrouppath in self._v_file 

852 oldindexes = False 

853 for colobj in self.description._f_walk(type="Col"): 

854 colname = colobj._v_pathname 

855 # Is this column indexed? 

856 if igroup: 

857 indexname = _index_pathname_of_column(self, colname) 

858 indexed = indexname in self._v_file 

859 self.colindexed[colname] = indexed 

860 if indexed: 

861 column = self.cols._g_col(colname) 

862 indexobj = column.index 

863 if isinstance(indexobj, OldIndex): 

864 indexed = False # Not a vaild index 

865 oldindexes = True 

866 self._listoldindexes.append(colname) 

867 else: 

868 # Tell the condition cache about columns with dirty 

869 # indexes. 

870 if indexobj.dirty: 

871 self._condition_cache.nail() 

872 else: 

873 indexed = False 

874 self.colindexed[colname] = False 

875 if indexed: 

876 self.indexed = True 

877 

878 if oldindexes: # this should only appear under 2.x Pro 

879 warnings.warn( 

880 "table ``%s`` has column indexes with PyTables 1.x format. " 

881 "Unfortunately, this format is not supported in " 

882 "PyTables 2.x series. Note that you can use the " 

883 "``ptrepack`` utility in order to recreate the indexes. " 

884 "The 1.x indexed columns found are: %s" % 

885 (self._v_pathname, self._listoldindexes), 

886 OldIndexWarning) 

887 

888 # It does not matter to which column 'indexobj' belongs, 

889 # since their respective index objects share 

890 # the same number of elements. 

891 if self.indexed: 

892 self._indexedrows = indexobj.nelements 

893 self._unsaved_indexedrows = self.nrows - self._indexedrows 

894 # Put the autoindex value in a cache variable 

895 self._autoindex = self.autoindex 

896 

897 def _calc_nrowsinbuf(self): 

898 """Calculate the number of rows that fits on a PyTables buffer.""" 

899 

900 params = self._v_file.params 

901 # Compute the nrowsinbuf 

902 rowsize = self.rowsize 

903 buffersize = params['IO_BUFFER_SIZE'] 

904 if rowsize != 0: 

905 nrowsinbuf = buffersize // rowsize 

906 # The number of rows in buffer needs to be an exact multiple of 

907 # chunkshape[0] for queries using indexed columns. 

908 # Fixes #319 and probably #409 too. 

909 nrowsinbuf -= nrowsinbuf % self.chunkshape[0] 

910 else: 

911 nrowsinbuf = 1 

912 

913 # tableextension.pyx performs an assertion 

914 # to make sure nrowsinbuf is greater than or 

915 # equal to the chunksize. 

916 # See gh-206 and gh-238 

917 if self.chunkshape is not None: 

918 if nrowsinbuf < self.chunkshape[0]: 

919 nrowsinbuf = self.chunkshape[0] 

920 

921 # Safeguard against row sizes being extremely large 

922 if nrowsinbuf == 0: 

923 nrowsinbuf = 1 

924 # If rowsize is too large, issue a Performance warning 

925 maxrowsize = params['BUFFER_TIMES'] * buffersize 

926 if rowsize > maxrowsize: 

927 warnings.warn("""\ 

928The Table ``%s`` is exceeding the maximum recommended rowsize (%d bytes); 

929be ready to see PyTables asking for *lots* of memory and possibly slow 

930I/O. You may want to reduce the rowsize by trimming the value of 

931dimensions that are orthogonal (and preferably close) to the *main* 

932dimension of this leave. Alternatively, in case you have specified a 

933very small/large chunksize, you may want to increase/decrease it.""" 

934 % (self._v_pathname, maxrowsize), 

935 PerformanceWarning) 

936 return nrowsinbuf 

937 

938 def _getemptyarray(self, dtype): 

939 # Acts as a cache for empty arrays 

940 key = dtype 

941 if key in self._empty_array_cache: 

942 return self._empty_array_cache[key] 

943 else: 

944 self._empty_array_cache[ 

945 key] = arr = np.empty(shape=0, dtype=key) 

946 return arr 

947 

948 def _get_container(self, shape): 

949 """Get the appropriate buffer for data depending on table 

950 nestedness.""" 

951 

952 # This is *much* faster than the numpy.rec.array counterpart 

953 return np.empty(shape=shape, dtype=self._v_dtype) 

954 

955 def _get_type_col_names(self, type_): 

956 """Returns a list containing 'type_' column names.""" 

957 

958 return [colobj._v_pathname 

959 for colobj in self.description._f_walk('Col') 

960 if colobj.type == type_] 

961 

962 def _get_enum_map(self): 

963 """Return mapping from enumerated column names to `Enum` instances.""" 

964 

965 enumMap = {} 

966 for colobj in self.description._f_walk('Col'): 

967 if colobj.kind == 'enum': 

968 enumMap[colobj._v_pathname] = colobj.enum 

969 return enumMap 

970 

971 def _g_create(self): 

972 """Create a new table on disk.""" 

973 

974 # Warning against assigning too much columns... 

975 # F. Alted 2005-06-05 

976 maxColumns = self._v_file.params['MAX_COLUMNS'] 

977 if (len(self.description._v_names) > maxColumns): 

978 warnings.warn( 

979 "table ``%s`` is exceeding the recommended " 

980 "maximum number of columns (%d); " 

981 "be ready to see PyTables asking for *lots* of memory " 

982 "and possibly slow I/O" % (self._v_pathname, maxColumns), 

983 PerformanceWarning) 

984 

985 # 1. Create the HDF5 table (some parameters need to be computed). 

986 

987 # Fix the byteorder of the recarray and update the number of 

988 # expected rows if necessary 

989 if self._v_recarray is not None: 

990 self._v_recarray = self._g_fix_byteorder_data(self._v_recarray, 

991 self._rabyteorder) 

992 if len(self._v_recarray) > self._v_expectedrows: 

993 self._v_expectedrows = len(self._v_recarray) 

994 # Compute a sensible chunkshape 

995 if self._v_chunkshape is None: 

996 self._v_chunkshape = self._calc_chunkshape( 

997 self._v_expectedrows, self.rowsize, self.rowsize) 

998 # Correct the byteorder, if still needed 

999 if self.byteorder is None: 

1000 self.byteorder = sys.byteorder 

1001 

1002 # Cache some data which is already in the description. 

1003 # This is necessary to happen before creation time in order 

1004 # to be able to populate the self._v_wdflts 

1005 self._cache_description_data() 

1006 

1007 # After creating the table, ``self._v_objectid`` needs to be 

1008 # set because it is needed for setting attributes afterwards. 

1009 self._v_objectid = self._create_table( 

1010 self._v_new_title, self.filters.complib or '', obversion) 

1011 self._v_recarray = None # not useful anymore 

1012 self._rabyteorder = None # not useful anymore 

1013 

1014 # 2. Compute or get chunk shape and buffer size parameters. 

1015 self.nrowsinbuf = self._calc_nrowsinbuf() 

1016 

1017 # 3. Get field fill attributes from the table description and 

1018 # set them on disk. 

1019 if self._v_file.params['PYTABLES_SYS_ATTRS']: 

1020 set_attr = self._v_attrs._g__setattr 

1021 for i, colobj in enumerate(self.description._f_walk(type="Col")): 

1022 fieldname = "FIELD_%d_FILL" % i 

1023 set_attr(fieldname, colobj.dflt) 

1024 

1025 return self._v_objectid 

1026 

1027 def _g_open(self): 

1028 """Opens a table from disk and read the metadata on it. 

1029 

1030 Creates an user description on the flight to easy the access to 

1031 the actual data. 

1032 

1033 """ 

1034 

1035 # 1. Open the HDF5 table and get some data from it. 

1036 self._v_objectid, description, chunksize = self._get_info() 

1037 self._v_expectedrows = self.nrows # the actual number of rows 

1038 

1039 # 2. Create an instance description to host the record fields. 

1040 validate = not self._v_file._isPTFile # only for non-PyTables files 

1041 self.description = Description(description, validate=validate, 

1042 ptparams=self._v_file.params) 

1043 

1044 # 3. Compute or get chunk shape and buffer size parameters. 

1045 if chunksize == 0: 

1046 self._v_chunkshape = self._calc_chunkshape( 

1047 self._v_expectedrows, self.rowsize, self.rowsize) 

1048 else: 

1049 self._v_chunkshape = (chunksize,) 

1050 self.nrowsinbuf = self._calc_nrowsinbuf() 

1051 

1052 # 4. If there are field fill attributes, get them from disk and 

1053 # set them in the table description. 

1054 if self._v_file.params['PYTABLES_SYS_ATTRS']: 

1055 if "FIELD_0_FILL" in self._v_attrs._f_list("sys"): 

1056 i = 0 

1057 get_attr = self._v_attrs.__getattr__ 

1058 for objcol in self.description._f_walk(type="Col"): 

1059 colname = objcol._v_pathname 

1060 # Get the default values for each column 

1061 fieldname = "FIELD_%s_FILL" % i 

1062 defval = get_attr(fieldname) 

1063 if defval is not None: 

1064 objcol.dflt = defval 

1065 else: 

1066 warnings.warn("could not load default value " 

1067 "for the ``%s`` column of table ``%s``; " 

1068 "using ``%r`` instead" 

1069 % (colname, self._v_pathname, 

1070 objcol.dflt)) 

1071 defval = objcol.dflt 

1072 i += 1 

1073 

1074 # Set also the correct value in the desc._v_dflts dictionary 

1075 for descr in self.description._f_walk(type="Description"): 

1076 for name in descr._v_names: 

1077 objcol = descr._v_colobjects[name] 

1078 if isinstance(objcol, Col): 

1079 descr._v_dflts[objcol._v_name] = objcol.dflt 

1080 

1081 # 5. Cache some data which is already in the description. 

1082 self._cache_description_data() 

1083 

1084 return self._v_objectid 

1085 

1086 def _cache_description_data(self): 

1087 """Cache some data which is already in the description. 

1088 

1089 Some information is extracted from `self.description` to build 

1090 some useful (but redundant) structures: 

1091 

1092 * `self.colnames` 

1093 * `self.colpathnames` 

1094 * `self.coldescrs` 

1095 * `self.coltypes` 

1096 * `self.coldtypes` 

1097 * `self.coldflts` 

1098 * `self._v_dtype` 

1099 * `self._time64colnames` 

1100 * `self._strcolnames` 

1101 * `self._colenums` 

1102 

1103 """ 

1104 

1105 self.colnames = list(self.description._v_names) 

1106 self.colpathnames = [ 

1107 col._v_pathname for col in self.description._f_walk() 

1108 if not hasattr(col, '_v_names')] # bottom-level 

1109 

1110 # Find ``time64`` column names. 

1111 self._time64colnames = self._get_type_col_names('time64') 

1112 # Find ``string`` column names. 

1113 self._strcolnames = self._get_type_col_names('string') 

1114 # Get a mapping of enumerated columns to their `Enum` instances. 

1115 self._colenums = self._get_enum_map() 

1116 

1117 # Get info about columns 

1118 for colobj in self.description._f_walk(type="Col"): 

1119 colname = colobj._v_pathname 

1120 # Get the column types, types and defaults 

1121 self.coldescrs[colname] = colobj 

1122 self.coltypes[colname] = colobj.type 

1123 self.coldtypes[colname] = colobj.dtype 

1124 self.coldflts[colname] = colobj.dflt 

1125 

1126 # Assign _v_dtype for this table 

1127 self._v_dtype = self.description._v_dtype 

1128 

1129 def _get_column_instance(self, colpathname): 

1130 """Get the instance of the column with the given `colpathname`. 

1131 

1132 If the column does not exist in the table, a `KeyError` is 

1133 raised. 

1134 

1135 """ 

1136 

1137 try: 

1138 return functools.reduce( 

1139 getattr, colpathname.split('/'), self.description) 

1140 except AttributeError: 

1141 raise KeyError("table ``%s`` does not have a column named ``%s``" 

1142 % (self._v_pathname, colpathname)) 

1143 

1144 _check_column = _get_column_instance 

1145 

1146 def _disable_indexing_in_queries(self): 

1147 """Force queries not to use indexing. 

1148 

1149 *Use only for testing.* 

1150 

1151 """ 

1152 

1153 if not self._enabled_indexing_in_queries: 

1154 return # already disabled 

1155 # The nail avoids setting/getting compiled conditions in/from 

1156 # the cache where indexing is used. 

1157 self._condition_cache.nail() 

1158 self._enabled_indexing_in_queries = False 

1159 

1160 def _enable_indexing_in_queries(self): 

1161 """Allow queries to use indexing. 

1162 

1163 *Use only for testing.* 

1164 

1165 """ 

1166 

1167 if self._enabled_indexing_in_queries: 

1168 return # already enabled 

1169 self._condition_cache.unnail() 

1170 self._enabled_indexing_in_queries = True 

1171 

1172 def _required_expr_vars(self, expression, uservars, depth=1): 

1173 """Get the variables required by the `expression`. 

1174 

1175 A new dictionary defining the variables used in the `expression` 

1176 is returned. Required variables are first looked up in the 

1177 `uservars` mapping, then in the set of top-level columns of the 

1178 table. Unknown variables cause a `NameError` to be raised. 

1179 

1180 When `uservars` is `None`, the local and global namespace where 

1181 the API callable which uses this method is called is sought 

1182 instead. This mechanism will not work as expected if this 

1183 method is not used *directly* from an API callable. To disable 

1184 this mechanism, just specify a mapping as `uservars`. 

1185 

1186 Nested columns and columns from other tables are not allowed 

1187 (`TypeError` and `ValueError` are raised, respectively). Also, 

1188 non-column variable values are converted to NumPy arrays. 

1189 

1190 `depth` specifies the depth of the frame in order to reach local 

1191 or global variables. 

1192 

1193 """ 

1194 

1195 # Get the names of variables used in the expression. 

1196 exprvarscache = self._exprvars_cache 

1197 if expression not in exprvarscache: 

1198 # Protection against growing the cache too much 

1199 if len(exprvarscache) > 256: 

1200 # Remove 10 (arbitrary) elements from the cache 

1201 for k in list(exprvarscache)[:10]: 

1202 del exprvarscache[k] 

1203 cexpr = compile(expression, '<string>', 'eval') 

1204 exprvars = [var for var in cexpr.co_names 

1205 if var not in ['None', 'False', 'True'] 

1206 and var not in ne.expressions.functions] 

1207 exprvarscache[expression] = exprvars 

1208 else: 

1209 exprvars = exprvarscache[expression] 

1210 

1211 # Get the local and global variable mappings of the user frame 

1212 # if no mapping has been explicitly given for user variables. 

1213 user_locals, user_globals = {}, {} 

1214 if uservars is None: 

1215 # We use specified depth to get the frame where the API 

1216 # callable using this method is called. For instance: 

1217 # 

1218 # * ``table._required_expr_vars()`` (depth 0) is called by 

1219 # * ``table._where()`` (depth 1) is called by 

1220 # * ``table.where()`` (depth 2) is called by 

1221 # * user-space functions (depth 3) 

1222 user_frame = sys._getframe(depth) 

1223 user_locals = user_frame.f_locals 

1224 user_globals = user_frame.f_globals 

1225 

1226 colinstances = self.colinstances 

1227 tblfile, tblpath = self._v_file, self._v_pathname 

1228 # Look for the required variables first among the ones 

1229 # explicitly provided by the user, then among implicit columns, 

1230 # then among external variables (only if no explicit variables). 

1231 reqvars = {} 

1232 for var in exprvars: 

1233 # Get the value. 

1234 if uservars is not None and var in uservars: 

1235 val = uservars[var] 

1236 elif var in colinstances: 

1237 val = colinstances[var] 

1238 elif uservars is None and var in user_locals: 

1239 val = user_locals[var] 

1240 elif uservars is None and var in user_globals: 

1241 val = user_globals[var] 

1242 else: 

1243 raise NameError("name ``%s`` is not defined" % var) 

1244 

1245 # Check the value. 

1246 if hasattr(val, 'pathname'): # non-nested column 

1247 if val.shape[1:] != (): 

1248 raise NotImplementedError( 

1249 "variable ``%s`` refers to " 

1250 "a multidimensional column, " 

1251 "not yet supported in conditions, sorry" % var) 

1252 if (val._table_file is not tblfile or 

1253 val._table_path != tblpath): 

1254 raise ValueError("variable ``%s`` refers to a column " 

1255 "which is not part of table ``%s``" 

1256 % (var, tblpath)) 

1257 if val.dtype.str[1:] == 'u8': 

1258 raise NotImplementedError( 

1259 "variable ``%s`` refers to " 

1260 "a 64-bit unsigned integer column, " 

1261 "not yet supported in conditions, sorry; " 

1262 "please use regular Python selections" % var) 

1263 elif hasattr(val, '_v_colpathnames'): # nested column 

1264 raise TypeError( 

1265 "variable ``%s`` refers to a nested column, " 

1266 "not allowed in conditions" % var) 

1267 else: # only non-column values are converted to arrays 

1268 # XXX: not 100% sure about this 

1269 if isinstance(val, str): 

1270 val = np.asarray(val.encode('ascii')) 

1271 else: 

1272 val = np.asarray(val) 

1273 reqvars[var] = val 

1274 return reqvars 

1275 

1276 def _get_condition_key(self, condition, condvars): 

1277 """Get the condition cache key for `condition` with `condvars`. 

1278 

1279 Currently, the key is a tuple of `condition`, column variables 

1280 names, normal variables names, column paths and variable paths 

1281 (all are tuples). 

1282 

1283 """ 

1284 

1285 # Variable names for column and normal variables. 

1286 colnames, varnames = [], [] 

1287 # Column paths and types for each of the previous variable. 

1288 colpaths, vartypes = [], [] 

1289 for (var, val) in condvars.items(): 

1290 if hasattr(val, 'pathname'): # column 

1291 colnames.append(var) 

1292 colpaths.append(val.pathname) 

1293 else: # array 

1294 try: 

1295 varnames.append(var) 

1296 vartypes.append(ne.necompiler.getType(val)) # expensive 

1297 except ValueError: 

1298 # This is more clear than the error given by Numexpr. 

1299 raise TypeError("variable ``%s`` has data type ``%s``, " 

1300 "not allowed in conditions" 

1301 % (var, val.dtype.name)) 

1302 colnames, varnames = tuple(colnames), tuple(varnames) 

1303 colpaths, vartypes = tuple(colpaths), tuple(vartypes) 

1304 condkey = (condition, colnames, varnames, colpaths, vartypes) 

1305 return condkey 

1306 

1307 def _compile_condition(self, condition, condvars): 

1308 """Compile the `condition` and extract usable index conditions. 

1309 

1310 This method returns an instance of ``CompiledCondition``. See 

1311 the ``compile_condition()`` function in the ``conditions`` 

1312 module for more information about the compilation process. 

1313 

1314 This method makes use of the condition cache when possible. 

1315 

1316 """ 

1317 

1318 # Look up the condition in the condition cache. 

1319 condcache = self._condition_cache 

1320 condkey = self._get_condition_key(condition, condvars) 

1321 compiled = condcache.get(condkey) 

1322 if compiled: 

1323 return compiled.with_replaced_vars(condvars) # bingo! 

1324 

1325 # Bad luck, the condition must be parsed and compiled. 

1326 # Fortunately, the key provides some valuable information. ;) 

1327 (condition, colnames, varnames, colpaths, vartypes) = condkey 

1328 

1329 # Extract more information from referenced columns. 

1330 

1331 # start with normal variables 

1332 typemap = dict(list(zip(varnames, vartypes))) 

1333 indexedcols = [] 

1334 for colname in colnames: 

1335 col = condvars[colname] 

1336 

1337 # Extract types from *all* the given variables. 

1338 coltype = col.dtype.type 

1339 typemap[colname] = _nxtype_from_nptype[coltype] 

1340 

1341 # Get the set of columns with usable indexes. 

1342 if (self._enabled_indexing_in_queries # no in-kernel searches 

1343 and self.colindexed[col.pathname] and not col.index.dirty): 

1344 indexedcols.append(colname) 

1345 

1346 indexedcols = frozenset(indexedcols) 

1347 # Now let ``compile_condition()`` do the Numexpr-related job. 

1348 compiled = compile_condition(condition, typemap, indexedcols) 

1349 

1350 # Check that there actually are columns in the condition. 

1351 if not set(compiled.parameters).intersection(set(colnames)): 

1352 raise ValueError("there are no columns taking part " 

1353 "in condition ``%s``" % (condition,)) 

1354 

1355 # Store the compiled condition in the cache and return it. 

1356 condcache[condkey] = compiled 

1357 return compiled.with_replaced_vars(condvars) 

1358 

1359 def will_query_use_indexing(self, condition, condvars=None): 

1360 """Will a query for the condition use indexing? 

1361 

1362 The meaning of the condition and *condvars* arguments is the same as in 

1363 the :meth:`Table.where` method. If condition can use indexing, this 

1364 method returns a frozenset with the path names of the columns whose 

1365 index is usable. Otherwise, it returns an empty list. 

1366 

1367 This method is mainly intended for testing. Keep in mind that changing 

1368 the set of indexed columns or their dirtiness may make this method 

1369 return different values for the same arguments at different times. 

1370 

1371 """ 

1372 

1373 # Compile the condition and extract usable index conditions. 

1374 condvars = self._required_expr_vars(condition, condvars, depth=2) 

1375 compiled = self._compile_condition(condition, condvars) 

1376 # Return the columns in indexed expressions 

1377 idxcols = [condvars[var].pathname for var in compiled.index_variables] 

1378 return frozenset(idxcols) 

1379 

1380 def where(self, condition, condvars=None, 

1381 start=None, stop=None, step=None): 

1382 r"""Iterate over values fulfilling a condition. 

1383 

1384 This method returns a Row iterator (see :ref:`RowClassDescr`) which 

1385 only selects rows in the table that satisfy the given condition (an 

1386 expression-like string). 

1387 

1388 The condvars mapping may be used to define the variable names appearing 

1389 in the condition. condvars should consist of identifier-like strings 

1390 pointing to Column (see :ref:`ColumnClassDescr`) instances *of this 

1391 table*, or to other values (which will be converted to arrays). A 

1392 default set of condition variables is provided where each top-level, 

1393 non-nested column with an identifier-like name appears. Variables in 

1394 condvars override the default ones. 

1395 

1396 When condvars is not provided or None, the current local and global 

1397 namespace is sought instead of condvars. The previous mechanism is 

1398 mostly intended for interactive usage. To disable it, just specify a 

1399 (maybe empty) mapping as condvars. 

1400 

1401 If a range is supplied (by setting some of the start, stop or step 

1402 parameters), only the rows in that range and fulfilling the condition 

1403 are used. The meaning of the start, stop and step parameters is the 

1404 same as for Python slices. 

1405 

1406 When possible, indexed columns participating in the condition will be 

1407 used to speed up the search. It is recommended that you place the 

1408 indexed columns as left and out in the condition as possible. Anyway, 

1409 this method has always better performance than regular Python 

1410 selections on the table. 

1411 

1412 You can mix this method with regular Python selections in order to 

1413 support even more complex queries. It is strongly recommended that you 

1414 pass the most restrictive condition as the parameter to this method if 

1415 you want to achieve maximum performance. 

1416 

1417 .. warning:: 

1418 

1419 When in the middle of a table row iterator, you should not 

1420 use methods that can change the number of rows in the table 

1421 (like :meth:`Table.append` or :meth:`Table.remove_rows`) or 

1422 unexpected errors will happen. 

1423 

1424 Examples 

1425 -------- 

1426 

1427 :: 

1428 

1429 passvalues = [ row['col3'] for row in 

1430 table.where('(col1 > 0) & (col2 <= 20)', step=5) 

1431 if your_function(row['col2']) ] 

1432 print("Values that pass the cuts:", passvalues) 

1433 

1434 .. note:: 

1435 

1436 A special care should be taken when the query condition includes 

1437 string literals. 

1438 

1439 Let's assume that the table ``table`` has the following 

1440 structure:: 

1441 

1442 class Record(IsDescription): 

1443 col1 = StringCol(4) # 4-character String of bytes 

1444 col2 = IntCol() 

1445 col3 = FloatCol() 

1446 

1447 The type of "col1" corresponds to strings of bytes. 

1448 

1449 Any condition involving "col1" should be written using the 

1450 appropriate type for string literals in order to avoid 

1451 :exc:`TypeError`\ s. 

1452 

1453 The code below will fail with a :exc:`TypeError`:: 

1454 

1455 condition = 'col1 == "AAAA"' 

1456 for record in table.where(condition): # TypeError in Python3 

1457 # do something with "record" 

1458 

1459 The reason is that in Python 3 "condition" implies a comparison 

1460 between a string of bytes ("col1" contents) and a unicode literal 

1461 ("AAAA"). 

1462 

1463 The correct way to write the condition is:: 

1464 

1465 condition = 'col1 == b"AAAA"' 

1466 

1467 .. versionchanged:: 3.0 

1468 The start, stop and step parameters now behave like in slice. 

1469 

1470 """ 

1471 

1472 return self._where(condition, condvars, start, stop, step) 

1473 

1474 def _where(self, condition, condvars, start=None, stop=None, step=None): 

1475 """Low-level counterpart of `self.where()`.""" 

1476 

1477 if profile: 

1478 tref = clock() 

1479 if profile: 

1480 show_stats("Entering table._where", tref) 

1481 # Adjust the slice to be used. 

1482 (start, stop, step) = self._process_range_read(start, stop, step) 

1483 if start >= stop: # empty range, reset conditions 

1484 self._use_index = False 

1485 self._where_condition = None 

1486 return iter([]) 

1487 

1488 # Compile the condition and extract usable index conditions. 

1489 condvars = self._required_expr_vars(condition, condvars, depth=3) 

1490 compiled = self._compile_condition(condition, condvars) 

1491 

1492 # Can we use indexes? 

1493 if compiled.index_expressions: 

1494 chunkmap = _table__where_indexed( 

1495 self, compiled, condition, condvars, start, stop, step) 

1496 if not isinstance(chunkmap, np.ndarray): 

1497 # If it is not a NumPy array it should be an iterator 

1498 # Reset conditions 

1499 self._use_index = False 

1500 self._where_condition = None 

1501 # ...and return the iterator 

1502 return chunkmap 

1503 else: 

1504 chunkmap = None # default to an in-kernel query 

1505 

1506 args = [condvars[param] for param in compiled.parameters] 

1507 self._where_condition = (compiled.function, args, compiled.kwargs) 

1508 row = tableextension.Row(self) 

1509 if profile: 

1510 show_stats("Exiting table._where", tref) 

1511 return row._iter(start, stop, step, chunkmap=chunkmap) 

1512 

1513 def read_where(self, condition, condvars=None, field=None, 

1514 start=None, stop=None, step=None): 

1515 """Read table data fulfilling the given *condition*. 

1516 

1517 This method is similar to :meth:`Table.read`, having their common 

1518 arguments and return values the same meanings. However, only the rows 

1519 fulfilling the *condition* are included in the result. 

1520 

1521 The meaning of the other arguments is the same as in the 

1522 :meth:`Table.where` method. 

1523 

1524 """ 

1525 

1526 self._g_check_open() 

1527 coords = [p.nrow for p in 

1528 self._where(condition, condvars, start, stop, step)] 

1529 self._where_condition = None # reset the conditions 

1530 if len(coords) > 1: 

1531 cstart, cstop = coords[0], coords[-1] + 1 

1532 if cstop - cstart == len(coords): 

1533 # Chances for monotonically increasing row values. Refine. 

1534 inc_seq = np.alltrue( 

1535 np.arange(cstart, cstop) == np.array(coords)) 

1536 if inc_seq: 

1537 return self.read(cstart, cstop, field=field) 

1538 return self.read_coordinates(coords, field) 

1539 

1540 def append_where(self, dstTable, condition=None, condvars=None, 

1541 start=None, stop=None, step=None): 

1542 """Append rows fulfilling the condition to the dstTable table. 

1543 

1544 dstTable must be capable of taking the rows resulting from the query, 

1545 i.e. it must have columns with the expected names and compatible 

1546 types. The meaning of the other arguments is the same as in the 

1547 :meth:`Table.where` method. 

1548 

1549 The number of rows appended to dstTable is returned as a result. 

1550 

1551 .. versionchanged:: 3.0 

1552 The *whereAppend* method has been renamed into *append_where*. 

1553 

1554 """ 

1555 

1556 self._g_check_open() 

1557 

1558 # Check that the destination file is not in read-only mode. 

1559 dstTable._v_file._check_writable() 

1560 

1561 # Row objects do not support nested columns, so we must iterate 

1562 # over the flat column paths. When rows support nesting, 

1563 # ``self.colnames`` can be directly iterated upon. 

1564 colNames = [colName for colName in self.colpathnames] 

1565 dstRow = dstTable.row 

1566 nrows = 0 

1567 if condition is not None: 

1568 srcRows = self._where(condition, condvars, start, stop, step) 

1569 else: 

1570 srcRows = self.iterrows(start, stop, step) 

1571 for srcRow in srcRows: 

1572 for colName in colNames: 

1573 dstRow[colName] = srcRow[colName] 

1574 dstRow.append() 

1575 nrows += 1 

1576 dstTable.flush() 

1577 return nrows 

1578 

1579 def get_where_list(self, condition, condvars=None, sort=False, 

1580 start=None, stop=None, step=None): 

1581 """Get the row coordinates fulfilling the given condition. 

1582 

1583 The coordinates are returned as a list of the current flavor. sort 

1584 means that you want to retrieve the coordinates ordered. The default is 

1585 to not sort them. 

1586 

1587 The meaning of the other arguments is the same as in the 

1588 :meth:`Table.where` method. 

1589 

1590 """ 

1591 

1592 self._g_check_open() 

1593 

1594 coords = [p.nrow for p in 

1595 self._where(condition, condvars, start, stop, step)] 

1596 coords = np.array(coords, dtype=SizeType) 

1597 # Reset the conditions 

1598 self._where_condition = None 

1599 if sort: 

1600 coords = np.sort(coords) 

1601 return internal_to_flavor(coords, self.flavor) 

1602 

1603 def itersequence(self, sequence): 

1604 """Iterate over a sequence of row coordinates.""" 

1605 

1606 if not hasattr(sequence, '__getitem__'): 

1607 raise TypeError("Wrong 'sequence' parameter type. Only sequences " 

1608 "are suported.") 

1609 # start, stop and step are necessary for the new iterator for 

1610 # coordinates, and perhaps it would be useful to add them as 

1611 # parameters in the future (not now, because I've just removed 

1612 # the `sort` argument for 2.1). 

1613 # 

1614 # *Important note*: Negative values for step are not supported 

1615 # for the general case, but only for the itersorted() and 

1616 # read_sorted() purposes! The self._process_range_read will raise 

1617 # an appropiate error. 

1618 # F. Alted 2008-09-18 

1619 # A.V. 20130513: _process_range_read --> _process_range 

1620 (start, stop, step) = self._process_range(None, None, None) 

1621 if (start > stop) or (len(sequence) == 0): 

1622 return iter([]) 

1623 row = tableextension.Row(self) 

1624 return row._iter(start, stop, step, coords=sequence) 

1625 

1626 def _check_sortby_csi(self, sortby, checkCSI): 

1627 if isinstance(sortby, Column): 

1628 icol = sortby 

1629 elif isinstance(sortby, str): 

1630 icol = self.cols._f_col(sortby) 

1631 else: 

1632 raise TypeError( 

1633 "`sortby` can only be a `Column` or string object, " 

1634 "but you passed an object of type: %s" % type(sortby)) 

1635 if icol.is_indexed and icol.index.kind == "full": 

1636 if checkCSI and not icol.index.is_csi: 

1637 # The index exists, but it is not a CSI one. 

1638 raise ValueError( 

1639 "Field `%s` must have associated a CSI index " 

1640 "in table `%s`, but the existing one is not. " 

1641 % (sortby, self)) 

1642 return icol.index 

1643 else: 

1644 raise ValueError( 

1645 "Field `%s` must have associated a 'full' index " 

1646 "in table `%s`." % (sortby, self)) 

1647 

1648 def itersorted(self, sortby, checkCSI=False, 

1649 start=None, stop=None, step=None): 

1650 """Iterate table data following the order of the index of sortby 

1651 column. 

1652 

1653 The sortby column must have associated a full index. If you want to 

1654 ensure a fully sorted order, the index must be a CSI one. You may want 

1655 to use the checkCSI argument in order to explicitly check for the 

1656 existence of a CSI index. 

1657 

1658 The meaning of the start, stop and step arguments is the same as in 

1659 :meth:`Table.read`. 

1660 

1661 .. versionchanged:: 3.0 

1662 If the *start* parameter is provided and *stop* is None then the 

1663 table is iterated from *start* to the last line. 

1664 In PyTables < 3.0 only one element was returned. 

1665 

1666 """ 

1667 

1668 index = self._check_sortby_csi(sortby, checkCSI) 

1669 # Adjust the slice to be used. 

1670 (start, stop, step) = self._process_range(start, stop, step, 

1671 warn_negstep=False) 

1672 if (start > stop and 0 < step) or (start < stop and 0 > step): 

1673 # Fall-back action is to return an empty iterator 

1674 return iter([]) 

1675 row = tableextension.Row(self) 

1676 return row._iter(start, stop, step, coords=index) 

1677 

1678 def read_sorted(self, sortby, checkCSI=False, field=None, 

1679 start=None, stop=None, step=None): 

1680 """Read table data following the order of the index of sortby column. 

1681 

1682 The sortby column must have associated a full index. If you want to 

1683 ensure a fully sorted order, the index must be a CSI one. You may want 

1684 to use the checkCSI argument in order to explicitly check for the 

1685 existence of a CSI index. 

1686 

1687 If field is supplied only the named column will be selected. If the 

1688 column is not nested, an *array* of the current flavor will be 

1689 returned; if it is, a *structured array* will be used instead. If no 

1690 field is specified, all the columns will be returned in a structured 

1691 array of the current flavor. 

1692 

1693 The meaning of the start, stop and step arguments is the same as in 

1694 :meth:`Table.read`. 

1695 

1696 .. versionchanged:: 3.0 

1697 The start, stop and step parameters now behave like in slice. 

1698 

1699 """ 

1700 

1701 self._g_check_open() 

1702 index = self._check_sortby_csi(sortby, checkCSI) 

1703 coords = index[start:stop:step] 

1704 return self.read_coordinates(coords, field) 

1705 

1706 def iterrows(self, start=None, stop=None, step=None): 

1707 """Iterate over the table using a Row instance. 

1708 

1709 If a range is not supplied, *all the rows* in the table are iterated 

1710 upon - you can also use the :meth:`Table.__iter__` special method for 

1711 that purpose. If you want to iterate over a given *range of rows* in 

1712 the table, you may use the start, stop and step parameters. 

1713 

1714 .. warning:: 

1715 

1716 When in the middle of a table row iterator, you should not 

1717 use methods that can change the number of rows in the table 

1718 (like :meth:`Table.append` or :meth:`Table.remove_rows`) or 

1719 unexpected errors will happen. 

1720 

1721 See Also 

1722 -------- 

1723 tableextension.Row : the table row iterator and field accessor 

1724 

1725 Examples 

1726 -------- 

1727 

1728 :: 

1729 

1730 result = [ row['var2'] for row in table.iterrows(step=5) 

1731 if row['var1'] <= 20 ] 

1732 

1733 .. versionchanged:: 3.0 

1734 If the *start* parameter is provided and *stop* is None then the 

1735 table is iterated from *start* to the last line. 

1736 In PyTables < 3.0 only one element was returned. 

1737 

1738 """ 

1739 (start, stop, step) = self._process_range(start, stop, step, 

1740 warn_negstep=False) 

1741 if (start > stop and 0 < step) or (start < stop and 0 > step): 

1742 # Fall-back action is to return an empty iterator 

1743 return iter([]) 

1744 row = tableextension.Row(self) 

1745 return row._iter(start, stop, step) 

1746 

1747 def __iter__(self): 

1748 """Iterate over the table using a Row instance. 

1749 

1750 This is equivalent to calling :meth:`Table.iterrows` with default 

1751 arguments, i.e. it iterates over *all the rows* in the table. 

1752 

1753 See Also 

1754 -------- 

1755 tableextension.Row : the table row iterator and field accessor 

1756 

1757 Examples 

1758 -------- 

1759 

1760 :: 

1761 

1762 result = [ row['var2'] for row in table if row['var1'] <= 20 ] 

1763 

1764 Which is equivalent to:: 

1765 

1766 result = [ row['var2'] for row in table.iterrows() 

1767 if row['var1'] <= 20 ] 

1768 

1769 """ 

1770 

1771 return self.iterrows() 

1772 

1773 def _read(self, start, stop, step, field=None, out=None): 

1774 """Read a range of rows and return an in-memory object.""" 

1775 

1776 select_field = None 

1777 if field: 

1778 if field not in self.coldtypes: 

1779 if field in self.description._v_names: 

1780 # Remember to select this field 

1781 select_field = field 

1782 field = None 

1783 else: 

1784 raise KeyError(("Field {} not found in table " 

1785 "{}").format(field, self)) 

1786 else: 

1787 # The column hangs directly from the top 

1788 dtype_field = self.coldtypes[field] 

1789 

1790 # Return a rank-0 array if start > stop 

1791 if (start >= stop and 0 < step) or (start <= stop and 0 > step): 

1792 if field is None: 

1793 nra = self._get_container(0) 

1794 return nra 

1795 return np.empty(shape=0, dtype=dtype_field) 

1796 

1797 nrows = len(range(start, stop, step)) 

1798 

1799 if out is None: 

1800 # Compute the shape of the resulting column object 

1801 if field: 

1802 # Create a container for the results 

1803 result = np.empty(shape=nrows, dtype=dtype_field) 

1804 else: 

1805 # Recarray case 

1806 result = self._get_container(nrows) 

1807 else: 

1808 # there is no fast way to byteswap, since different columns may 

1809 # have different byteorders 

1810 if not out.dtype.isnative: 

1811 raise ValueError("output array must be in system's byteorder " 

1812 "or results will be incorrect") 

1813 if field: 

1814 bytes_required = dtype_field.itemsize * nrows 

1815 else: 

1816 bytes_required = self.rowsize * nrows 

1817 if bytes_required != out.nbytes: 

1818 raise ValueError(f'output array size invalid, got {out.nbytes}' 

1819 f' bytes, need {bytes_required} bytes') 

1820 if not out.flags['C_CONTIGUOUS']: 

1821 raise ValueError('output array not C contiguous') 

1822 result = out 

1823 

1824 # Call the routine to fill-up the resulting array 

1825 if step == 1 and not field: 

1826 # This optimization works three times faster than 

1827 # the row._fill_col method (up to 170 MB/s on a pentium IV @ 2GHz) 

1828 self._read_records(start, stop - start, result) 

1829 # Warning!: _read_field_name should not be used until 

1830 # H5TBread_fields_name in tableextension will be finished 

1831 # F. Alted 2005/05/26 

1832 # XYX Ho implementem per a PyTables 2.0?? 

1833 elif field and step > 15 and 0: 

1834 # For step>15, this seems to work always faster than row._fill_col. 

1835 self._read_field_name(result, start, stop, step, field) 

1836 else: 

1837 self.row._fill_col(result, start, stop, step, field) 

1838 

1839 if select_field: 

1840 return result[select_field] 

1841 else: 

1842 return result 

1843 

1844 def read(self, start=None, stop=None, step=None, field=None, out=None): 

1845 """Get data in the table as a (record) array. 

1846 

1847 The start, stop and step parameters can be used to select only 

1848 a *range of rows* in the table. Their meanings are the same as 

1849 in the built-in Python slices. 

1850 

1851 If field is supplied only the named column will be selected. 

1852 If the column is not nested, an *array* of the current flavor 

1853 will be returned; if it is, a *structured array* will be used 

1854 instead. If no field is specified, all the columns will be 

1855 returned in a structured array of the current flavor. 

1856 

1857 Columns under a nested column can be specified in the field 

1858 parameter by using a slash character (/) as a separator (e.g. 

1859 'position/x'). 

1860 

1861 The out parameter may be used to specify a NumPy array to 

1862 receive the output data. Note that the array must have the 

1863 same size as the data selected with the other parameters. 

1864 Note that the array's datatype is not checked and no type 

1865 casting is performed, so if it does not match the datatype on 

1866 disk, the output will not be correct. 

1867 

1868 When specifying a single nested column with the field parameter, 

1869 and supplying an output buffer with the out parameter, the 

1870 output buffer must contain all columns in the table. 

1871 The data in all columns will be read into the output buffer. 

1872 However, only the specified nested column will be returned from 

1873 the method call. 

1874 

1875 When data is read from disk in NumPy format, the output will be 

1876 in the current system's byteorder, regardless of how it is 

1877 stored on disk. If the out parameter is specified, the output 

1878 array also must be in the current system's byteorder. 

1879 

1880 .. versionchanged:: 3.0 

1881 Added the *out* parameter. Also the start, stop and step 

1882 parameters now behave like in slice. 

1883 

1884 Examples 

1885 -------- 

1886 

1887 Reading the entire table:: 

1888 

1889 t.read() 

1890 

1891 Reading record n. 6:: 

1892 

1893 t.read(6, 7) 

1894 

1895 Reading from record n. 6 to the end of the table:: 

1896 

1897 t.read(6) 

1898 

1899 """ 

1900 

1901 self._g_check_open() 

1902 

1903 if field: 

1904 self._check_column(field) 

1905 

1906 if out is not None and self.flavor != 'numpy': 

1907 msg = ("Optional 'out' argument may only be supplied if array " 

1908 "flavor is 'numpy', currently is {}").format(self.flavor) 

1909 raise TypeError(msg) 

1910 

1911 start, stop, step = self._process_range(start, stop, step, 

1912 warn_negstep=False) 

1913 

1914 arr = self._read(start, stop, step, field, out) 

1915 return internal_to_flavor(arr, self.flavor) 

1916 

1917 def _read_coordinates(self, coords, field=None): 

1918 """Private part of `read_coordinates()` with no flavor conversion.""" 

1919 

1920 coords = self._point_selection(coords) 

1921 

1922 ncoords = len(coords) 

1923 # Create a read buffer only if needed 

1924 if field is None or ncoords > 0: 

1925 # Doing a copy is faster when ncoords is small (<1000) 

1926 if ncoords < min(1000, self.nrowsinbuf): 

1927 result = self._v_iobuf[:ncoords].copy() 

1928 else: 

1929 result = self._get_container(ncoords) 

1930 

1931 # Do the real read 

1932 if ncoords > 0: 

1933 # Turn coords into an array of coordinate indexes, if necessary 

1934 if not (isinstance(coords, np.ndarray) and 

1935 coords.dtype.type is _npsizetype and 

1936 coords.flags.contiguous and 

1937 coords.flags.aligned): 

1938 # Get a contiguous and aligned coordinate array 

1939 coords = np.array(coords, dtype=SizeType) 

1940 self._read_elements(coords, result) 

1941 

1942 # Do the final conversions, if needed 

1943 if field: 

1944 if ncoords > 0: 

1945 result = get_nested_field(result, field) 

1946 else: 

1947 # Get an empty array from the cache 

1948 result = self._getemptyarray(self.coldtypes[field]) 

1949 return result 

1950 

1951 def read_coordinates(self, coords, field=None): 

1952 """Get a set of rows given their indexes as a (record) array. 

1953 

1954 This method works much like the :meth:`Table.read` method, but it uses 

1955 a sequence (coords) of row indexes to select the wanted columns, 

1956 instead of a column range. 

1957 

1958 The selected rows are returned in an array or structured array of the 

1959 current flavor. 

1960 

1961 """ 

1962 

1963 self._g_check_open() 

1964 result = self._read_coordinates(coords, field) 

1965 return internal_to_flavor(result, self.flavor) 

1966 

1967 def get_enum(self, colname): 

1968 """Get the enumerated type associated with the named column. 

1969 

1970 If the column named colname (a string) exists and is of an enumerated 

1971 type, the corresponding Enum instance (see :ref:`EnumClassDescr`) is 

1972 returned. If it is not of an enumerated type, a TypeError is raised. If 

1973 the column does not exist, a KeyError is raised. 

1974 

1975 """ 

1976 

1977 self._check_column(colname) 

1978 

1979 try: 

1980 return self._colenums[colname] 

1981 except KeyError: 

1982 raise TypeError( 

1983 "column ``%s`` of table ``%s`` is not of an enumerated type" 

1984 % (colname, self._v_pathname)) 

1985 

1986 def col(self, name): 

1987 """Get a column from the table. 

1988 

1989 If a column called name exists in the table, it is read and returned as 

1990 a NumPy object. If it does not exist, a KeyError is raised. 

1991 

1992 Examples 

1993 -------- 

1994 

1995 :: 

1996 

1997 narray = table.col('var2') 

1998 

1999 That statement is equivalent to:: 

2000 

2001 narray = table.read(field='var2') 

2002 

2003 Here you can see how this method can be used as a shorthand for the 

2004 :meth:`Table.read` method. 

2005 

2006 """ 

2007 

2008 return self.read(field=name) 

2009 

2010 def __getitem__(self, key): 

2011 """Get a row or a range of rows from the table. 

2012 

2013 If key argument is an integer, the corresponding table row is returned 

2014 as a record of the current flavor. If key is a slice, the range of rows 

2015 determined by it is returned as a structured array of the current 

2016 flavor. 

2017 

2018 In addition, NumPy-style point selections are supported. In 

2019 particular, if key is a list of row coordinates, the set of rows 

2020 determined by it is returned. Furthermore, if key is an array of 

2021 boolean values, only the coordinates where key is True are returned. 

2022 Note that for the latter to work it is necessary that key list would 

2023 contain exactly as many rows as the table has. 

2024 

2025 Examples 

2026 -------- 

2027 

2028 :: 

2029 

2030 record = table[4] 

2031 recarray = table[4:1000:2] 

2032 recarray = table[[4,1000]] # only retrieves rows 4 and 1000 

2033 recarray = table[[True, False, ..., True]] 

2034 

2035 Those statements are equivalent to:: 

2036 

2037 record = table.read(start=4)[0] 

2038 recarray = table.read(start=4, stop=1000, step=2) 

2039 recarray = table.read_coordinates([4,1000]) 

2040 recarray = table.read_coordinates([True, False, ..., True]) 

2041 

2042 Here, you can see how indexing can be used as a shorthand for the 

2043 :meth:`Table.read` and :meth:`Table.read_coordinates` methods. 

2044 

2045 """ 

2046 

2047 self._g_check_open() 

2048 

2049 if is_idx(key): 

2050 key = operator.index(key) 

2051 

2052 # Index out of range protection 

2053 if key >= self.nrows: 

2054 raise IndexError("Index out of range") 

2055 if key < 0: 

2056 # To support negative values 

2057 key += self.nrows 

2058 (start, stop, step) = self._process_range(key, key + 1, 1) 

2059 return self.read(start, stop, step)[0] 

2060 elif isinstance(key, slice): 

2061 (start, stop, step) = self._process_range( 

2062 key.start, key.stop, key.step) 

2063 return self.read(start, stop, step) 

2064 # Try with a boolean or point selection 

2065 elif type(key) in (list, tuple) or isinstance(key, np.ndarray): 

2066 return self._read_coordinates(key, None) 

2067 else: 

2068 raise IndexError(f"Invalid index or slice: {key!r}") 

2069 

2070 def __setitem__(self, key, value): 

2071 """Set a row or a range of rows in the table. 

2072 

2073 It takes different actions depending on the type of the *key* 

2074 parameter: if it is an integer, the corresponding table row is 

2075 set to *value* (a record or sequence capable of being converted 

2076 to the table structure). If *key* is a slice, the row slice 

2077 determined by it is set to *value* (a record array or sequence 

2078 capable of being converted to the table structure). 

2079 

2080 In addition, NumPy-style point selections are supported. In 

2081 particular, if key is a list of row coordinates, the set of rows 

2082 determined by it is set to value. Furthermore, if key is an array of 

2083 boolean values, only the coordinates where key is True are set to 

2084 values from value. Note that for the latter to work it is necessary 

2085 that key list would contain exactly as many rows as the table has. 

2086 

2087 Examples 

2088 -------- 

2089 

2090 :: 

2091 

2092 # Modify just one existing row 

2093 table[2] = [456,'db2',1.2] 

2094 

2095 # Modify two existing rows 

2096 rows = numpy.rec.array([[457,'db1',1.2],[6,'de2',1.3]], 

2097 formats='i4,a3,f8') 

2098 table[1:30:2] = rows # modify a table slice 

2099 table[[1,3]] = rows # only modifies rows 1 and 3 

2100 table[[True,False,True]] = rows # only modifies rows 0 and 2 

2101 

2102 Which is equivalent to:: 

2103 

2104 table.modify_rows(start=2, rows=[456,'db2',1.2]) 

2105 rows = numpy.rec.array([[457,'db1',1.2],[6,'de2',1.3]], 

2106 formats='i4,a3,f8') 

2107 table.modify_rows(start=1, stop=3, step=2, rows=rows) 

2108 table.modify_coordinates([1,3,2], rows) 

2109 table.modify_coordinates([True, False, True], rows) 

2110 

2111 Here, you can see how indexing can be used as a shorthand for the 

2112 :meth:`Table.modify_rows` and :meth:`Table.modify_coordinates` 

2113 methods. 

2114 

2115 """ 

2116 

2117 self._g_check_open() 

2118 self._v_file._check_writable() 

2119 

2120 if is_idx(key): 

2121 key = operator.index(key) 

2122 

2123 # Index out of range protection 

2124 if key >= self.nrows: 

2125 raise IndexError("Index out of range") 

2126 if key < 0: 

2127 # To support negative values 

2128 key += self.nrows 

2129 return self.modify_rows(key, key + 1, 1, [value]) 

2130 elif isinstance(key, slice): 

2131 (start, stop, step) = self._process_range( 

2132 key.start, key.stop, key.step) 

2133 return self.modify_rows(start, stop, step, value) 

2134 # Try with a boolean or point selection 

2135 elif type(key) in (list, tuple) or isinstance(key, np.ndarray): 

2136 return self.modify_coordinates(key, value) 

2137 else: 

2138 raise IndexError(f"Invalid index or slice: {key!r}") 

2139 

2140 def _save_buffered_rows(self, wbufRA, lenrows): 

2141 """Update the indexes after a flushing of rows.""" 

2142 

2143 self._open_append(wbufRA) 

2144 self._append_records(lenrows) 

2145 self._close_append() 

2146 if self.indexed: 

2147 self._unsaved_indexedrows += lenrows 

2148 # The table caches for indexed queries are dirty now 

2149 self._dirtycache = True 

2150 if self.autoindex: 

2151 # Flush the unindexed rows 

2152 self.flush_rows_to_index(_lastrow=False) 

2153 else: 

2154 # All the columns are dirty now 

2155 self._mark_columns_as_dirty(self.colpathnames) 

2156 

2157 def append(self, rows): 

2158 """Append a sequence of rows to the end of the table. 

2159 

2160 The rows argument may be any object which can be converted to 

2161 a structured array compliant with the table structure 

2162 (otherwise, a ValueError is raised). This includes NumPy 

2163 structured arrays, lists of tuples or array records, and a 

2164 string or Python buffer. 

2165 

2166 Examples 

2167 -------- 

2168 

2169 :: 

2170 

2171 import tables as tb 

2172 

2173 class Particle(tb.IsDescription): 

2174 name = tb.StringCol(16, pos=1) # 16-character String 

2175 lati = tb.IntCol(pos=2) # integer 

2176 longi = tb.IntCol(pos=3) # integer 

2177 pressure = tb.Float32Col(pos=4) # float (single-precision) 

2178 temperature = tb.FloatCol(pos=5) # double (double-precision) 

2179 

2180 fileh = tb.open_file('test4.h5', mode='w') 

2181 table = fileh.create_table(fileh.root, 'table', Particle, 

2182 "A table") 

2183 

2184 # Append several rows in only one call 

2185 table.append([("Particle: 10", 10, 0, 10 * 10, 10**2), 

2186 ("Particle: 11", 11, -1, 11 * 11, 11**2), 

2187 ("Particle: 12", 12, -2, 12 * 12, 12**2)]) 

2188 fileh.close() 

2189 

2190 """ 

2191 

2192 self._g_check_open() 

2193 self._v_file._check_writable() 

2194 

2195 if not self._chunked: 

2196 raise HDF5ExtError( 

2197 "You cannot append rows to a non-chunked table.", h5bt=False) 

2198 

2199 if (hasattr(rows, "dtype") and 

2200 not self.description._v_is_nested and 

2201 rows.dtype == self.dtype): 

2202 # Shortcut for compliant arrays 

2203 # (for some reason, not valid for nested types) 

2204 wbufRA = rows 

2205 else: 

2206 # Try to convert the object into a recarray compliant with table 

2207 try: 

2208 iflavor = flavor_of(rows) 

2209 if iflavor != 'python': 

2210 rows = array_as_internal(rows, iflavor) 

2211 # Works for Python structures and always copies the original, 

2212 # so the resulting object is safe for in-place conversion. 

2213 wbufRA = np.rec.array(rows, dtype=self._v_dtype) 

2214 except Exception as exc: # XXX 

2215 raise ValueError("rows parameter cannot be converted into a " 

2216 "recarray object compliant with table '%s'. " 

2217 "The error was: <%s>" % (str(self), exc)) 

2218 lenrows = wbufRA.shape[0] 

2219 # If the number of rows to append is zero, don't do anything else 

2220 if lenrows > 0: 

2221 # Save write buffer to disk 

2222 self._save_buffered_rows(wbufRA, lenrows) 

2223 

2224 def _conv_to_recarr(self, obj): 

2225 """Try to convert the object into a recarray.""" 

2226 

2227 try: 

2228 iflavor = flavor_of(obj) 

2229 if iflavor != 'python': 

2230 obj = array_as_internal(obj, iflavor) 

2231 if hasattr(obj, "shape") and obj.shape == (): 

2232 # To allow conversion of scalars (void type) into arrays. 

2233 # See http://projects.scipy.org/scipy/numpy/ticket/315 

2234 # for discussion on how to pass buffers to constructors 

2235 # See also http://projects.scipy.org/scipy/numpy/ticket/348 

2236 recarr = np.array([obj], dtype=self._v_dtype) 

2237 else: 

2238 # Works for Python structures and always copies the original, 

2239 # so the resulting object is safe for in-place conversion. 

2240 recarr = np.rec.array(obj, dtype=self._v_dtype) 

2241 except Exception as exc: # XXX 

2242 raise ValueError("Object cannot be converted into a recarray " 

2243 "object compliant with table format '%s'. " 

2244 "The error was: <%s>" % 

2245 (self.description._v_nested_descr, exc)) 

2246 

2247 return recarr 

2248 

2249 def modify_coordinates(self, coords, rows): 

2250 """Modify a series of rows in positions specified in coords. 

2251 

2252 The values in the selected rows will be modified with the data given in 

2253 rows. This method returns the number of rows modified. 

2254 

2255 The possible values for the rows argument are the same as in 

2256 :meth:`Table.append`. 

2257 

2258 """ 

2259 

2260 if rows is None: # Nothing to be done 

2261 return SizeType(0) 

2262 

2263 # Convert the coordinates to something expected by HDF5 

2264 coords = self._point_selection(coords) 

2265 

2266 lcoords = len(coords) 

2267 if len(rows) < lcoords: 

2268 raise ValueError("The value has not enough elements to fill-in " 

2269 "the specified range") 

2270 

2271 # Convert rows into a recarray 

2272 recarr = self._conv_to_recarr(rows) 

2273 

2274 if len(coords) > 0: 

2275 # Do the actual update of rows 

2276 self._update_elements(lcoords, coords, recarr) 

2277 

2278 # Redo the index if needed 

2279 self._reindex(self.colpathnames) 

2280 

2281 return SizeType(lcoords) 

2282 

2283 def modify_rows(self, start=None, stop=None, step=None, rows=None): 

2284 """Modify a series of rows in the slice [start:stop:step]. 

2285 

2286 The values in the selected rows will be modified with the data given in 

2287 rows. This method returns the number of rows modified. Should the 

2288 modification exceed the length of the table, an IndexError is raised 

2289 before changing data. 

2290 

2291 The possible values for the rows argument are the same as in 

2292 :meth:`Table.append`. 

2293 

2294 """ 

2295 

2296 if step is None: 

2297 step = 1 

2298 if rows is None: # Nothing to be done 

2299 return SizeType(0) 

2300 if start is None: 

2301 start = 0 

2302 

2303 if start < 0: 

2304 raise ValueError("'start' must have a positive value.") 

2305 if step < 1: 

2306 raise ValueError( 

2307 "'step' must have a value greater or equal than 1.") 

2308 if stop is None: 

2309 # compute the stop value. start + len(rows)*step does not work 

2310 stop = start + (len(rows) - 1) * step + 1 

2311 

2312 (start, stop, step) = self._process_range(start, stop, step) 

2313 if stop > self.nrows: 

2314 raise IndexError("This modification will exceed the length of " 

2315 "the table. Giving up.") 

2316 # Compute the number of rows to read. 

2317 nrows = len(range(start, stop, step)) 

2318 if len(rows) != nrows: 

2319 raise ValueError("The value has different elements than the " 

2320 "specified range") 

2321 

2322 # Convert rows into a recarray 

2323 recarr = self._conv_to_recarr(rows) 

2324 

2325 lenrows = len(recarr) 

2326 if start + lenrows > self.nrows: 

2327 raise IndexError("This modification will exceed the length of the " 

2328 "table. Giving up.") 

2329 

2330 # Do the actual update 

2331 self._update_records(start, stop, step, recarr) 

2332 

2333 # Redo the index if needed 

2334 self._reindex(self.colpathnames) 

2335 

2336 return SizeType(lenrows) 

2337 

2338 def modify_column(self, start=None, stop=None, step=None, 

2339 column=None, colname=None): 

2340 """Modify one single column in the row slice [start:stop:step]. 

2341 

2342 The colname argument specifies the name of the column in the 

2343 table to be modified with the data given in column. This 

2344 method returns the number of rows modified. Should the 

2345 modification exceed the length of the table, an IndexError is 

2346 raised before changing data. 

2347 

2348 The *column* argument may be any object which can be converted 

2349 to a (record) array compliant with the structure of the column 

2350 to be modified (otherwise, a ValueError is raised). This 

2351 includes NumPy (record) arrays, lists of scalars, tuples or 

2352 array records, and a string or Python buffer. 

2353 

2354 """ 

2355 if step is None: 

2356 step = 1 

2357 if not isinstance(colname, str): 

2358 raise TypeError("The 'colname' parameter must be a string.") 

2359 self._v_file._check_writable() 

2360 

2361 if column is None: # Nothing to be done 

2362 return SizeType(0) 

2363 if start is None: 

2364 start = 0 

2365 

2366 if start < 0: 

2367 raise ValueError("'start' must have a positive value.") 

2368 if step < 1: 

2369 raise ValueError( 

2370 "'step' must have a value greater or equal than 1.") 

2371 # Get the column format to be modified: 

2372 objcol = self._get_column_instance(colname) 

2373 descr = [objcol._v_parent._v_nested_descr[objcol._v_pos]] 

2374 # Try to convert the column object into a NumPy ndarray 

2375 try: 

2376 # If the column is a recarray (or kind of), convert into ndarray 

2377 if hasattr(column, 'dtype') and column.dtype.kind == 'V': 

2378 column = np.rec.array(column, dtype=descr).field(0) 

2379 else: 

2380 # Make sure the result is always a *copy* of the original, 

2381 # so the resulting object is safe for in-place conversion. 

2382 iflavor = flavor_of(column) 

2383 column = array_as_internal(column, iflavor) 

2384 except Exception as exc: # XXX 

2385 raise ValueError("column parameter cannot be converted into a " 

2386 "ndarray object compliant with specified column " 

2387 "'%s'. The error was: <%s>" % (str(column), exc)) 

2388 

2389 # Get rid of single-dimensional dimensions 

2390 column = column.squeeze() 

2391 if column.shape == (): 

2392 # Oops, stripped off to much dimensions 

2393 column.shape = (1,) 

2394 

2395 if stop is None: 

2396 # compute the stop value. start + len(rows)*step does not work 

2397 stop = start + (len(column) - 1) * step + 1 

2398 (start, stop, step) = self._process_range(start, stop, step) 

2399 if stop > self.nrows: 

2400 raise IndexError("This modification will exceed the length of " 

2401 "the table. Giving up.") 

2402 # Compute the number of rows to read. 

2403 nrows = len(range(start, stop, step)) 

2404 if len(column) < nrows: 

2405 raise ValueError("The value has not enough elements to fill-in " 

2406 "the specified range") 

2407 # Now, read the original values: 

2408 mod_recarr = self._read(start, stop, step) 

2409 # Modify the appropriate column in the original recarray 

2410 mod_col = get_nested_field(mod_recarr, colname) 

2411 mod_col[:] = column 

2412 # save this modified rows in table 

2413 self._update_records(start, stop, step, mod_recarr) 

2414 # Redo the index if needed 

2415 self._reindex([colname]) 

2416 

2417 return SizeType(nrows) 

2418 

2419 def modify_columns(self, start=None, stop=None, step=None, 

2420 columns=None, names=None): 

2421 """Modify a series of columns in the row slice [start:stop:step]. 

2422 

2423 The names argument specifies the names of the columns in the 

2424 table to be modified with the data given in columns. This 

2425 method returns the number of rows modified. Should the 

2426 modification exceed the length of the table, an IndexError 

2427 is raised before changing data. 

2428 

2429 The columns argument may be any object which can be converted 

2430 to a structured array compliant with the structure of the 

2431 columns to be modified (otherwise, a ValueError is raised). 

2432 This includes NumPy structured arrays, lists of tuples or array 

2433 records, and a string or Python buffer. 

2434 

2435 """ 

2436 if step is None: 

2437 step = 1 

2438 if type(names) not in (list, tuple): 

2439 raise TypeError("The 'names' parameter must be a list of strings.") 

2440 

2441 if columns is None: # Nothing to be done 

2442 return SizeType(0) 

2443 if start is None: 

2444 start = 0 

2445 if start < 0: 

2446 raise ValueError("'start' must have a positive value.") 

2447 if step < 1: 

2448 raise ValueError("'step' must have a value greater or " 

2449 "equal than 1.") 

2450 descr = [] 

2451 for colname in names: 

2452 objcol = self._get_column_instance(colname) 

2453 descr.append(objcol._v_parent._v_nested_descr[objcol._v_pos]) 

2454 # descr.append(objcol._v_parent._v_dtype[objcol._v_pos]) 

2455 # Try to convert the columns object into a recarray 

2456 try: 

2457 # Make sure the result is always a *copy* of the original, 

2458 # so the resulting object is safe for in-place conversion. 

2459 iflavor = flavor_of(columns) 

2460 if iflavor != 'python': 

2461 columns = array_as_internal(columns, iflavor) 

2462 recarray = np.rec.array(columns, dtype=descr) 

2463 else: 

2464 recarray = np.rec.fromarrays(columns, dtype=descr) 

2465 except Exception as exc: # XXX 

2466 raise ValueError("columns parameter cannot be converted into a " 

2467 "recarray object compliant with table '%s'. " 

2468 "The error was: <%s>" % (str(self), exc)) 

2469 

2470 if stop is None: 

2471 # compute the stop value. start + len(rows)*step does not work 

2472 stop = start + (len(recarray) - 1) * step + 1 

2473 (start, stop, step) = self._process_range(start, stop, step) 

2474 if stop > self.nrows: 

2475 raise IndexError("This modification will exceed the length of " 

2476 "the table. Giving up.") 

2477 # Compute the number of rows to read. 

2478 nrows = len(range(start, stop, step)) 

2479 if len(recarray) < nrows: 

2480 raise ValueError("The value has not enough elements to fill-in " 

2481 "the specified range") 

2482 # Now, read the original values: 

2483 mod_recarr = self._read(start, stop, step) 

2484 # Modify the appropriate columns in the original recarray 

2485 for i, name in enumerate(recarray.dtype.names): 

2486 mod_col = get_nested_field(mod_recarr, names[i]) 

2487 mod_col[:] = recarray[name].squeeze() 

2488 # save this modified rows in table 

2489 self._update_records(start, stop, step, mod_recarr) 

2490 # Redo the index if needed 

2491 self._reindex(names) 

2492 

2493 return SizeType(nrows) 

2494 

2495 def flush_rows_to_index(self, _lastrow=True): 

2496 """Add remaining rows in buffers to non-dirty indexes. 

2497 

2498 This can be useful when you have chosen non-automatic indexing 

2499 for the table (see the :attr:`Table.autoindex` property in 

2500 :class:`Table`) and you want to update the indexes on it. 

2501 

2502 """ 

2503 

2504 rowsadded = 0 

2505 if self.indexed: 

2506 # Update the number of unsaved indexed rows 

2507 start = self._indexedrows 

2508 nrows = self._unsaved_indexedrows 

2509 for (colname, colindexed) in self.colindexed.items(): 

2510 if colindexed: 

2511 col = self.cols._g_col(colname) 

2512 if nrows > 0 and not col.index.dirty: 

2513 rowsadded = self._add_rows_to_index( 

2514 colname, start, nrows, _lastrow, update=True) 

2515 self._unsaved_indexedrows -= rowsadded 

2516 self._indexedrows += rowsadded 

2517 return rowsadded 

2518 

2519 def _add_rows_to_index(self, colname, start, nrows, lastrow, update): 

2520 """Add more elements to the existing index.""" 

2521 

2522 # This method really belongs to Column, but since it makes extensive 

2523 # use of the table, it gets dangerous when closing the file, since the 

2524 # column may be accessing a table which is being destroyed. 

2525 index = self.cols._g_col(colname).index 

2526 slicesize = index.slicesize 

2527 # The next loop does not rely on xrange so that it can 

2528 # deal with long ints (i.e. more than 32-bit integers) 

2529 # This allows to index columns with more than 2**31 rows 

2530 # F. Alted 2005-05-09 

2531 startLR = index.sorted.nrows * slicesize 

2532 indexedrows = startLR - start 

2533 stop = start + nrows - slicesize + 1 

2534 while startLR < stop: 

2535 index.append( 

2536 [self._read(startLR, startLR + slicesize, 1, colname)], 

2537 update=update) 

2538 indexedrows += slicesize 

2539 startLR += slicesize 

2540 # index the remaining rows in last row 

2541 if lastrow and startLR < self.nrows: 

2542 index.append_last_row( 

2543 [self._read(startLR, self.nrows, 1, colname)], 

2544 update=update) 

2545 indexedrows += self.nrows - startLR 

2546 return indexedrows 

2547 

2548 def remove_rows(self, start=None, stop=None, step=None): 

2549 """Remove a range of rows in the table. 

2550 

2551 If only start is supplied, that row and all following will be deleted. 

2552 If a range is supplied, i.e. both the start and stop parameters are 

2553 passed, all the rows in the range are removed. 

2554 

2555 .. versionchanged:: 3.0 

2556 The start, stop and step parameters now behave like in slice. 

2557 

2558 .. seealso:: remove_row() 

2559 

2560 Parameters 

2561 ---------- 

2562 start : int 

2563 Sets the starting row to be removed. It accepts negative values 

2564 meaning that the count starts from the end. A value of 0 means the 

2565 first row. 

2566 stop : int 

2567 Sets the last row to be removed to stop-1, i.e. the end point is 

2568 omitted (in the Python range() tradition). Negative values are also 

2569 accepted. If None all rows after start will be removed. 

2570 step : int 

2571 The step size between rows to remove. 

2572 

2573 .. versionadded:: 3.0 

2574 

2575 Examples 

2576 -------- 

2577 

2578 Removing rows from 5 to 10 (excluded):: 

2579 

2580 t.remove_rows(5, 10) 

2581 

2582 Removing all rows starting from the 10th:: 

2583 

2584 t.remove_rows(10) 

2585 

2586 Removing the 6th row:: 

2587 

2588 t.remove_rows(6, 7) 

2589 

2590 .. note:: 

2591 

2592 removing a single row can be done using the specific 

2593 :meth:`remove_row` method. 

2594 

2595 """ 

2596 

2597 (start, stop, step) = self._process_range(start, stop, step) 

2598 nrows = self._remove_rows(start, stop, step) 

2599 # remove_rows is a invalidating index operation 

2600 self._reindex(self.colpathnames) 

2601 

2602 return SizeType(nrows) 

2603 

2604 def remove_row(self, n): 

2605 """Removes a row from the table. 

2606 

2607 Parameters 

2608 ---------- 

2609 n : int 

2610 The index of the row to remove. 

2611 

2612 

2613 .. versionadded:: 3.0 

2614 

2615 Examples 

2616 -------- 

2617 

2618 Remove row 15:: 

2619 

2620 table.remove_row(15) 

2621 

2622 Which is equivalent to:: 

2623 

2624 table.remove_rows(15, 16) 

2625 

2626 .. warning:: 

2627 

2628 This is not equivalent to:: 

2629 

2630 table.remove_rows(15) 

2631 

2632 """ 

2633 

2634 self.remove_rows(start=n, stop=n + 1) 

2635 

2636 def _g_update_dependent(self): 

2637 super()._g_update_dependent() 

2638 

2639 # Update the new path in columns 

2640 self.cols._g_update_table_location(self) 

2641 

2642 # Update the new path in the Row instance, if cached. Fixes #224. 

2643 if 'row' in self.__dict__: 

2644 self.__dict__['row'] = tableextension.Row(self) 

2645 

2646 def _g_move(self, newparent, newname): 

2647 """Move this node in the hierarchy. 

2648 

2649 This overloads the Node._g_move() method. 

2650 

2651 """ 

2652 

2653 itgpathname = _index_pathname_of(self) 

2654 

2655 # First, move the table to the new location. 

2656 super()._g_move(newparent, newname) 

2657 

2658 # Then move the associated index group (if any). 

2659 try: 

2660 itgroup = self._v_file._get_node(itgpathname) 

2661 except NoSuchNodeError: 

2662 pass 

2663 else: 

2664 newigroup = self._v_parent 

2665 newiname = _index_name_of(self) 

2666 itgroup._g_move(newigroup, newiname) 

2667 

2668 def _g_remove(self, recursive=False, force=False): 

2669 # Remove the associated index group (if any). 

2670 itgpathname = _index_pathname_of(self) 

2671 try: 

2672 itgroup = self._v_file._get_node(itgpathname) 

2673 except NoSuchNodeError: 

2674 pass 

2675 else: 

2676 itgroup._f_remove(recursive=True) 

2677 self.indexed = False # there are indexes no more 

2678 

2679 # Remove the leaf itself from the hierarchy. 

2680 super()._g_remove(recursive, force) 

2681 

2682 def _set_column_indexing(self, colpathname, indexed): 

2683 """Mark the referred column as indexed or non-indexed.""" 

2684 

2685 colindexed = self.colindexed 

2686 isindexed, wasindexed = bool(indexed), colindexed[colpathname] 

2687 if isindexed == wasindexed: 

2688 return # indexing state is unchanged 

2689 

2690 # Changing the set of indexed columns invalidates the condition cache 

2691 self._condition_cache.clear() 

2692 colindexed[colpathname] = isindexed 

2693 self.indexed = max(colindexed.values()) # this is an OR :) 

2694 

2695 def _mark_columns_as_dirty(self, colnames): 

2696 """Mark column indexes in `colnames` as dirty.""" 

2697 

2698 assert len(colnames) > 0 

2699 if self.indexed: 

2700 colindexed, cols = self.colindexed, self.cols 

2701 # Mark the proper indexes as dirty 

2702 for colname in colnames: 

2703 if colindexed[colname]: 

2704 col = cols._g_col(colname) 

2705 col.index.dirty = True 

2706 

2707 def _reindex(self, colnames): 

2708 """Re-index columns in `colnames` if automatic indexing is true.""" 

2709 

2710 if self.indexed: 

2711 colindexed, cols = self.colindexed, self.cols 

2712 colstoindex = [] 

2713 # Mark the proper indexes as dirty 

2714 for colname in colnames: 

2715 if colindexed[colname]: 

2716 col = cols._g_col(colname) 

2717 col.index.dirty = True 

2718 colstoindex.append(colname) 

2719 # Now, re-index the dirty ones 

2720 if self.autoindex and colstoindex: 

2721 self._do_reindex(dirty=True) 

2722 # The table caches for indexed queries are dirty now 

2723 self._dirtycache = True 

2724 

2725 def _do_reindex(self, dirty): 

2726 """Common code for `reindex()` and `reindex_dirty()`.""" 

2727 

2728 indexedrows = 0 

2729 for (colname, colindexed) in self.colindexed.items(): 

2730 if colindexed: 

2731 indexcol = self.cols._g_col(colname) 

2732 indexedrows = indexcol._do_reindex(dirty) 

2733 # Update counters in case some column has been updated 

2734 if indexedrows > 0: 

2735 self._indexedrows = indexedrows 

2736 self._unsaved_indexedrows = self.nrows - indexedrows 

2737 

2738 return SizeType(indexedrows) 

2739 

2740 def reindex(self): 

2741 """Recompute all the existing indexes in the table. 

2742 

2743 This can be useful when you suspect that, for any reason, the 

2744 index information for columns is no longer valid and want to 

2745 rebuild the indexes on it. 

2746 

2747 """ 

2748 

2749 self._do_reindex(dirty=False) 

2750 

2751 def reindex_dirty(self): 

2752 """Recompute the existing indexes in table, *if* they are dirty. 

2753 

2754 This can be useful when you have set :attr:`Table.autoindex` 

2755 (see :class:`Table`) to false for the table and you want to 

2756 update the indexes after a invalidating index operation 

2757 (:meth:`Table.remove_rows`, for example). 

2758 

2759 """ 

2760 

2761 self._do_reindex(dirty=True) 

2762 

2763 def _g_copy_rows(self, object, start, stop, step, sortby, checkCSI): 

2764 """Copy rows from self to object""" 

2765 if sortby is None: 

2766 self._g_copy_rows_optim(object, start, stop, step) 

2767 return 

2768 lenbuf = self.nrowsinbuf 

2769 absstep = step 

2770 if step < 0: 

2771 absstep = -step 

2772 start, stop = stop + 1, start + 1 

2773 if sortby is not None: 

2774 index = self._check_sortby_csi(sortby, checkCSI) 

2775 for start2 in range(start, stop, absstep * lenbuf): 

2776 stop2 = start2 + absstep * lenbuf 

2777 if stop2 > stop: 

2778 stop2 = stop 

2779 # The next 'if' is not needed, but it doesn't bother either 

2780 if sortby is None: 

2781 rows = self[start2:stop2:step] 

2782 else: 

2783 coords = index[start2:stop2:step] 

2784 rows = self.read_coordinates(coords) 

2785 # Save the records on disk 

2786 object.append(rows) 

2787 object.flush() 

2788 

2789 def _g_copy_rows_optim(self, object, start, stop, step): 

2790 """Copy rows from self to object (optimized version)""" 

2791 

2792 nrowsinbuf = self.nrowsinbuf 

2793 object._open_append(self._v_iobuf) 

2794 nrowsdest = object.nrows 

2795 for start2 in range(start, stop, step * nrowsinbuf): 

2796 # Save the records on disk 

2797 stop2 = start2 + step * nrowsinbuf 

2798 if stop2 > stop: 

2799 stop2 = stop 

2800 # Optimized version (it saves some conversions) 

2801 nrows = ((stop2 - start2 - 1) // step) + 1 

2802 self.row._fill_col(self._v_iobuf, start2, stop2, step, None) 

2803 # The output buffer is created anew, 

2804 # so the operation is safe to in-place conversion. 

2805 object._append_records(nrows) 

2806 nrowsdest += nrows 

2807 object._close_append() 

2808 

2809 def _g_prop_indexes(self, other): 

2810 """Generate index in `other` table for every indexed column here.""" 

2811 

2812 oldcols, newcols = self.colinstances, other.colinstances 

2813 for colname in newcols: 

2814 if (isinstance(oldcols[colname], Column)): 

2815 oldcolindexed = oldcols[colname].is_indexed 

2816 if oldcolindexed: 

2817 oldcolindex = oldcols[colname].index 

2818 newcol = newcols[colname] 

2819 newcol.create_index( 

2820 kind=oldcolindex.kind, optlevel=oldcolindex.optlevel, 

2821 filters=oldcolindex.filters, tmp_dir=None) 

2822 

2823 def _g_copy_with_stats(self, group, name, start, stop, step, 

2824 title, filters, chunkshape, _log, **kwargs): 

2825 """Private part of Leaf.copy() for each kind of leaf.""" 

2826 

2827 # Get the private args for the Table flavor of copy() 

2828 sortby = kwargs.pop('sortby', None) 

2829 propindexes = kwargs.pop('propindexes', False) 

2830 checkCSI = kwargs.pop('checkCSI', False) 

2831 # Compute the correct indices. 

2832 (start, stop, step) = self._process_range_read( 

2833 start, stop, step, warn_negstep=sortby is None) 

2834 # And the number of final rows 

2835 nrows = len(range(start, stop, step)) 

2836 # Create the new table and copy the selected data. 

2837 newtable = Table(group, name, self.description, title=title, 

2838 filters=filters, expectedrows=nrows, 

2839 chunkshape=chunkshape, 

2840 _log=_log) 

2841 self._g_copy_rows(newtable, start, stop, step, sortby, checkCSI) 

2842 nbytes = newtable.nrows * newtable.rowsize 

2843 # Generate equivalent indexes in the new table, if required. 

2844 if propindexes and self.indexed: 

2845 self._g_prop_indexes(newtable) 

2846 return (newtable, nbytes) 

2847 

2848 # This overloading of copy is needed here in order to document 

2849 # the additional keywords for the Table case. 

2850 def copy(self, newparent=None, newname=None, overwrite=False, 

2851 createparents=False, **kwargs): 

2852 """Copy this table and return the new one. 

2853 

2854 This method has the behavior and keywords described in 

2855 :meth:`Leaf.copy`. Moreover, it recognises the following additional 

2856 keyword arguments. 

2857 

2858 Parameters 

2859 ---------- 

2860 sortby 

2861 If specified, and sortby corresponds to a column with an index, 

2862 then the copy will be sorted by this index. If you want to ensure 

2863 a fully sorted order, the index must be a CSI one. A reverse 

2864 sorted copy can be achieved by specifying a negative value for the 

2865 step keyword. If sortby is omitted or None, the original table 

2866 order is used. 

2867 checkCSI 

2868 If true and a CSI index does not exist for the sortby column, an 

2869 error will be raised. If false (the default), it does nothing. 

2870 You can use this flag in order to explicitly check for the 

2871 existence of a CSI index. 

2872 propindexes 

2873 If true, the existing indexes in the source table are propagated 

2874 (created) to the new one. If false (the default), the indexes are 

2875 not propagated. 

2876 

2877 """ 

2878 

2879 return super().copy( 

2880 newparent, newname, overwrite, createparents, **kwargs) 

2881 

2882 def flush(self): 

2883 """Flush the table buffers.""" 

2884 

2885 if self._v_file._iswritable(): 

2886 # Flush rows that remains to be appended 

2887 if 'row' in self.__dict__: 

2888 self.row._flush_buffered_rows() 

2889 if self.indexed and self.autoindex: 

2890 # Flush any unindexed row 

2891 rowsadded = self.flush_rows_to_index(_lastrow=True) 

2892 assert rowsadded <= 0 or self._indexedrows == self.nrows, \ 

2893 ("internal error: the number of indexed rows (%d) " 

2894 "and rows in the table (%d) is not equal; " 

2895 "please report this to the authors." 

2896 % (self._indexedrows, self.nrows)) 

2897 if self._dirtyindexes: 

2898 # Finally, re-index any dirty column 

2899 self.reindex_dirty() 

2900 

2901 super().flush() 

2902 

2903 def _g_pre_kill_hook(self): 

2904 """Code to be called before killing the node.""" 

2905 

2906 # Flush the buffers before to clean-up them 

2907 # self.flush() 

2908 # It seems that flushing during the __del__ phase is a sure receipt for 

2909 # bringing all kind of problems: 

2910 # 1. Illegal Instruction 

2911 # 2. Malloc(): trying to call free() twice 

2912 # 3. Bus Error 

2913 # 4. Segmentation fault 

2914 # So, the best would be doing *nothing* at all in this __del__ phase. 

2915 # As a consequence, the I/O will not be cleaned until a call to 

2916 # Table.flush() would be done. This could lead to a potentially large 

2917 # memory consumption. 

2918 # NOTE: The user should make a call to Table.flush() whenever he has 

2919 # finished working with his table. 

2920 # I've added a Performance warning in order to compel the user to 

2921 # call self.flush() before the table is being preempted. 

2922 # F. Alted 2006-08-03 

2923 if (('row' in self.__dict__ and self.row._get_unsaved_nrows() > 0) or 

2924 (self.indexed and self.autoindex and 

2925 (self._unsaved_indexedrows > 0 or self._dirtyindexes))): 

2926 warnings.warn(("table ``%s`` is being preempted from alive nodes " 

2927 "without its buffers being flushed or with some " 

2928 "index being dirty. This may lead to very " 

2929 "ineficient use of resources and even to fatal " 

2930 "errors in certain situations. Please do a call " 

2931 "to the .flush() or .reindex_dirty() methods on " 

2932 "this table before start using other nodes.") 

2933 % (self._v_pathname), PerformanceWarning) 

2934 # Get rid of the IO buffers (if they have been created at all) 

2935 mydict = self.__dict__ 

2936 if '_v_iobuf' in mydict: 

2937 del mydict['_v_iobuf'] 

2938 if '_v_wdflts' in mydict: 

2939 del mydict['_v_wdflts'] 

2940 

2941 def _f_close(self, flush=True): 

2942 if not self._v_isopen: 

2943 return # the node is already closed 

2944 

2945 # .. note:: 

2946 # 

2947 # As long as ``Table`` objects access their indices on closing, 

2948 # ``File.close()`` will need to make *two separate passes* 

2949 # to first close ``Table`` objects and then ``Index`` hierarchies. 

2950 # 

2951 

2952 # Flush right now so the row object does not get in the middle. 

2953 if flush: 

2954 self.flush() 

2955 

2956 # Some warnings can be issued after calling `self._g_set_location()` 

2957 # in `self.__init__()`. If warnings are turned into exceptions, 

2958 # `self._g_post_init_hook` may not be called and `self.cols` not set. 

2959 # One example of this is 

2960 # ``test_create.createTestCase.test05_maxFieldsExceeded()``. 

2961 cols = self.cols 

2962 if cols is not None: 

2963 cols._g_close() 

2964 

2965 # Clean address cache 

2966 self._clean_chunk_addrs() 

2967 

2968 # Close myself as a leaf. 

2969 super()._f_close(False) 

2970 

2971 def __repr__(self): 

2972 """This provides column metainfo in addition to standard __str__""" 

2973 

2974 if self.indexed: 

2975 format = """\ 

2976%s 

2977 description := %r 

2978 byteorder := %r 

2979 chunkshape := %r 

2980 autoindex := %r 

2981 colindexes := %r""" 

2982 return format % (str(self), self.description, self.byteorder, 

2983 self.chunkshape, self.autoindex, 

2984 _ColIndexes(self.colindexes)) 

2985 else: 

2986 return """\ 

2987%s 

2988 description := %r 

2989 byteorder := %r 

2990 chunkshape := %r""" % \ 

2991 (str(self), self.description, self.byteorder, self.chunkshape) 

2992 

2993 

2994class Cols: 

2995 """Container for columns in a table or nested column. 

2996 

2997 This class is used as an *accessor* to the columns in a table or nested 

2998 column. It supports the *natural naming* convention, so that you can 

2999 access the different columns as attributes which lead to Column instances 

3000 (for non-nested columns) or other Cols instances (for nested columns). 

3001 

3002 For instance, if table.cols is a Cols instance with a column named col1 

3003 under it, the later can be accessed as table.cols.col1. If col1 is nested 

3004 and contains a col2 column, this can be accessed as table.cols.col1.col2 

3005 and so on. Because of natural naming, the names of members start with 

3006 special prefixes, like in the Group class (see :ref:`GroupClassDescr`). 

3007 

3008 Like the Column class (see :ref:`ColumnClassDescr`), Cols supports item 

3009 access to read and write ranges of values in the table or nested column. 

3010 

3011 

3012 .. rubric:: Cols attributes 

3013 

3014 .. attribute:: _v_colnames 

3015 

3016 A list of the names of the columns hanging directly 

3017 from the associated table or nested column. The order of 

3018 the names matches the order of their respective columns in 

3019 the containing table. 

3020 

3021 .. attribute:: _v_colpathnames 

3022 

3023 A list of the pathnames of all the columns under the 

3024 associated table or nested column (in preorder). If it does 

3025 not contain nested columns, this is exactly the same as the 

3026 :attr:`Cols._v_colnames` attribute. 

3027 

3028 .. attribute:: _v_desc 

3029 

3030 The associated Description instance (see 

3031 :ref:`DescriptionClassDescr`). 

3032 

3033 """ 

3034 

3035 @property 

3036 def _v_table(self): 

3037 """The parent Table instance (see :ref:`TableClassDescr`).""" 

3038 return self._v__tableFile._get_node(self._v__tablePath) 

3039 

3040 def __init__(self, table, desc): 

3041 myDict = self.__dict__ 

3042 myDict['_v__tableFile'] = table._v_file 

3043 myDict['_v__tablePath'] = table._v_pathname 

3044 myDict['_v_desc'] = desc 

3045 myDict['_v_colnames'] = desc._v_names 

3046 myDict['_v_colpathnames'] = table.description._v_pathnames 

3047 # Put the column in the local dictionary 

3048 for name in desc._v_names: 

3049 if name in desc._v_types: 

3050 myDict[name] = Column(table, name, desc) 

3051 else: 

3052 myDict[name] = Cols(table, desc._v_colobjects[name]) 

3053 

3054 def _g_update_table_location(self, table): 

3055 """Updates the location information about the associated `table`.""" 

3056 

3057 myDict = self.__dict__ 

3058 myDict['_v__tableFile'] = table._v_file 

3059 myDict['_v__tablePath'] = table._v_pathname 

3060 

3061 # Update the locations in individual columns. 

3062 for colname in self._v_colnames: 

3063 myDict[colname]._g_update_table_location(table) 

3064 

3065 def __len__(self): 

3066 """Get the number of top level columns in table.""" 

3067 

3068 return len(self._v_colnames) 

3069 

3070 def _f_col(self, colname): 

3071 """Get an accessor to the column colname. 

3072 

3073 This method returns a Column instance (see :ref:`ColumnClassDescr`) if 

3074 the requested column is not nested, and a Cols instance (see 

3075 :ref:`ColsClassDescr`) if it is. You may use full column pathnames in 

3076 colname. 

3077 

3078 Calling cols._f_col('col1/col2') is equivalent to using cols.col1.col2. 

3079 However, the first syntax is more intended for programmatic use. It is 

3080 also better if you want to access columns with names that are not valid 

3081 Python identifiers. 

3082 

3083 """ 

3084 

3085 if not isinstance(colname, str): 

3086 raise TypeError("Parameter can only be an string. You passed " 

3087 "object: %s" % colname) 

3088 if ((colname.find('/') > -1 and 

3089 colname not in self._v_colpathnames) and 

3090 colname not in self._v_colnames): 

3091 raise KeyError(("Cols accessor ``%s.cols%s`` does not have a " 

3092 "column named ``%s``") 

3093 % (self._v__tablePath, self._v_desc._v_pathname, 

3094 colname)) 

3095 

3096 return self._g_col(colname) 

3097 

3098 def _g_col(self, colname): 

3099 """Like `self._f_col()` but it does not check arguments.""" 

3100 

3101 # Get the Column or Description object 

3102 inames = colname.split('/') 

3103 cols = self 

3104 for iname in inames: 

3105 cols = cols.__dict__[iname] 

3106 return cols 

3107 

3108 def __getitem__(self, key): 

3109 """Get a row or a range of rows from a table or nested column. 

3110 

3111 If key argument is an integer, the corresponding nested type row is 

3112 returned as a record of the current flavor. If key is a slice, the 

3113 range of rows determined by it is returned as a structured array of the 

3114 current flavor. 

3115 

3116 Examples 

3117 -------- 

3118 

3119 :: 

3120 

3121 record = table.cols[4] # equivalent to table[4] 

3122 recarray = table.cols.Info[4:1000:2] 

3123 

3124 Those statements are equivalent to:: 

3125 

3126 nrecord = table.read(start=4)[0] 

3127 nrecarray = table.read(start=4, stop=1000, step=2).field('Info') 

3128 

3129 Here you can see how a mix of natural naming, indexing and slicing can 

3130 be used as shorthands for the :meth:`Table.read` method. 

3131 

3132 """ 

3133 table = self._v_table 

3134 nrows = table.nrows 

3135 if is_idx(key): 

3136 key = operator.index(key) 

3137 

3138 # Index out of range protection 

3139 if key >= nrows: 

3140 raise IndexError("Index out of range") 

3141 if key < 0: 

3142 # To support negative values 

3143 key += nrows 

3144 (start, stop, step) = table._process_range(key, key + 1, 1) 

3145 colgroup = self._v_desc._v_pathname 

3146 if colgroup == "": # The root group 

3147 return table.read(start, stop, step)[0] 

3148 else: 

3149 crecord = table.read(start, stop, step)[0] 

3150 return crecord[colgroup] 

3151 elif isinstance(key, slice): 

3152 (start, stop, step) = table._process_range( 

3153 key.start, key.stop, key.step) 

3154 colgroup = self._v_desc._v_pathname 

3155 if colgroup == "": # The root group 

3156 return table.read(start, stop, step) 

3157 else: 

3158 crecarray = table.read(start, stop, step) 

3159 if hasattr(crecarray, "field"): 

3160 return crecarray.field(colgroup) # RecArray case 

3161 else: 

3162 return get_nested_field(crecarray, colgroup) # numpy case 

3163 else: 

3164 raise TypeError(f"invalid index or slice: {key!r}") 

3165 

3166 def __setitem__(self, key, value): 

3167 """Set a row or a range of rows in a table or nested column. 

3168 

3169 If key argument is an integer, the corresponding row is set to 

3170 value. If key is a slice, the range of rows determined by it is set to 

3171 value. 

3172 

3173 Examples 

3174 -------- 

3175 

3176 :: 

3177 

3178 table.cols[4] = record 

3179 table.cols.Info[4:1000:2] = recarray 

3180 

3181 Those statements are equivalent to:: 

3182 

3183 table.modify_rows(4, rows=record) 

3184 table.modify_column(4, 1000, 2, colname='Info', column=recarray) 

3185 

3186 Here you can see how a mix of natural naming, indexing and slicing 

3187 can be used as shorthands for the :meth:`Table.modify_rows` and 

3188 :meth:`Table.modify_column` methods. 

3189 

3190 """ 

3191 

3192 table = self._v_table 

3193 nrows = table.nrows 

3194 if is_idx(key): 

3195 key = operator.index(key) 

3196 

3197 # Index out of range protection 

3198 if key >= nrows: 

3199 raise IndexError("Index out of range") 

3200 if key < 0: 

3201 # To support negative values 

3202 key += nrows 

3203 (start, stop, step) = table._process_range(key, key + 1, 1) 

3204 elif isinstance(key, slice): 

3205 (start, stop, step) = table._process_range( 

3206 key.start, key.stop, key.step) 

3207 else: 

3208 raise TypeError(f"invalid index or slice: {key!r}") 

3209 

3210 # Actually modify the correct columns 

3211 colgroup = self._v_desc._v_pathname 

3212 if colgroup == "": # The root group 

3213 table.modify_rows(start, stop, step, rows=value) 

3214 else: 

3215 table.modify_column( 

3216 start, stop, step, colname=colgroup, column=value) 

3217 

3218 def _g_close(self): 

3219 # First, close the columns (ie possible indices open) 

3220 for col in self._v_colnames: 

3221 colobj = self._g_col(col) 

3222 if isinstance(colobj, Column): 

3223 colobj.close() 

3224 # Delete the reference to column 

3225 del self.__dict__[col] 

3226 else: 

3227 colobj._g_close() 

3228 

3229 self.__dict__.clear() 

3230 

3231 def __str__(self): 

3232 """The string representation for this object.""" 

3233 

3234 # The pathname 

3235 descpathname = self._v_desc._v_pathname 

3236 if descpathname: 

3237 descpathname = "." + descpathname 

3238 return (f"{self._v__tablePath}.cols{descpathname} " 

3239 f"({self.__class__.__name__}), " 

3240 f"{len(self._v_colnames)} columns") 

3241 

3242 def __repr__(self): 

3243 """A detailed string representation for this object.""" 

3244 

3245 lines = [f'{self!s}'] 

3246 for name in self._v_colnames: 

3247 # Get this class name 

3248 classname = getattr(self, name).__class__.__name__ 

3249 # The type 

3250 if name in self._v_desc._v_dtypes: 

3251 tcol = self._v_desc._v_dtypes[name] 

3252 # The shape for this column 

3253 shape = (self._v_table.nrows,) + \ 

3254 self._v_desc._v_dtypes[name].shape 

3255 else: 

3256 tcol = "Description" 

3257 # Description doesn't have a shape currently 

3258 shape = () 

3259 lines.append(f" {name} ({classname}{shape}, {tcol})") 

3260 return '\n'.join(lines) + '\n' 

3261 

3262 

3263class Column: 

3264 """Accessor for a non-nested column in a table. 

3265 

3266 Each instance of this class is associated with one *non-nested* column of a 

3267 table. These instances are mainly used to read and write data from the 

3268 table columns using item access (like the Cols class - see 

3269 :ref:`ColsClassDescr`), but there are a few other associated methods to 

3270 deal with indexes. 

3271 

3272 .. rubric:: Column attributes 

3273 

3274 .. attribute:: descr 

3275 

3276 The Description (see :ref:`DescriptionClassDescr`) instance of the 

3277 parent table or nested column. 

3278 

3279 .. attribute:: name 

3280 

3281 The name of the associated column. 

3282 

3283 .. attribute:: pathname 

3284 

3285 The complete pathname of the associated column (the same as 

3286 Column.name if the column is not inside a nested column). 

3287 

3288 Parameters 

3289 ---------- 

3290 table 

3291 The parent table instance 

3292 name 

3293 The name of the column that is associated with this object 

3294 descr 

3295 The parent description object 

3296 

3297 """ 

3298 

3299 @lazyattr 

3300 def dtype(self): 

3301 """The NumPy dtype that most closely matches this column.""" 

3302 

3303 return self.descr._v_dtypes[self.name].base # Get rid of shape info 

3304 

3305 @lazyattr 

3306 def type(self): 

3307 """The PyTables type of the column (a string).""" 

3308 

3309 return self.descr._v_types[self.name] 

3310 

3311 @property 

3312 def table(self): 

3313 """The parent Table instance (see :ref:`TableClassDescr`).""" 

3314 return self._table_file._get_node(self._table_path) 

3315 

3316 @property 

3317 def index(self): 

3318 """The Index instance (see :ref:`IndexClassDescr`) associated with this 

3319 column (None if the column is not indexed).""" 

3320 indexPath = _index_pathname_of_column_(self._table_path, self.pathname) 

3321 try: 

3322 index = self._table_file._get_node(indexPath) 

3323 except NodeError: 

3324 index = None # The column is not indexed 

3325 return index 

3326 

3327 @lazyattr 

3328 def _itemtype(self): 

3329 return self.descr._v_dtypes[self.name] 

3330 

3331 @property 

3332 def shape(self): 

3333 """The shape of this column.""" 

3334 return (self.table.nrows,) + self.descr._v_dtypes[self.name].shape 

3335 

3336 @property 

3337 def is_indexed(self): 

3338 """True if the column is indexed, false otherwise.""" 

3339 if self.index is None: 

3340 return False 

3341 else: 

3342 return True 

3343 

3344 @property 

3345 def maindim(self): 

3346 """"The dimension along which iterators work. Its value is 0 (i.e. the 

3347 first dimension).""" 

3348 return 0 

3349 

3350 def __init__(self, table, name, descr): 

3351 self._table_file = table._v_file 

3352 self._table_path = table._v_pathname 

3353 self.name = name 

3354 """The name of the associated column.""" 

3355 self.pathname = descr._v_colobjects[name]._v_pathname 

3356 """The complete pathname of the associated column (the same as 

3357 Column.name if the column is not inside a nested column).""" 

3358 self.descr = descr 

3359 """The Description (see :ref:`DescriptionClassDescr`) instance of the 

3360 parent table or nested column.""" 

3361 

3362 def _g_update_table_location(self, table): 

3363 """Updates the location information about the associated `table`.""" 

3364 

3365 self._table_file = table._v_file 

3366 self._table_path = table._v_pathname 

3367 

3368 def __len__(self): 

3369 """Get the number of elements in the column. 

3370 

3371 This matches the length in rows of the parent table. 

3372 

3373 """ 

3374 

3375 return self.table.nrows 

3376 

3377 def __getitem__(self, key): 

3378 """Get a row or a range of rows from a column. 

3379 

3380 If key argument is an integer, the corresponding element in the column 

3381 is returned as an object of the current flavor. If key is a slice, the 

3382 range of elements determined by it is returned as an array of the 

3383 current flavor. 

3384 

3385 Examples 

3386 -------- 

3387 

3388 :: 

3389 

3390 print("Column handlers:") 

3391 for name in table.colnames: 

3392 print(table.cols._f_col(name)) 

3393 print("Select table.cols.name[1]-->", table.cols.name[1]) 

3394 print("Select table.cols.name[1:2]-->", table.cols.name[1:2]) 

3395 print("Select table.cols.name[:]-->", table.cols.name[:]) 

3396 print("Select table.cols._f_col('name')[:]-->", 

3397 table.cols._f_col('name')[:]) 

3398 

3399 The output of this for a certain arbitrary table is:: 

3400 

3401 Column handlers: 

3402 /table.cols.name (Column(), string, idx=None) 

3403 /table.cols.lati (Column(), int32, idx=None) 

3404 /table.cols.longi (Column(), int32, idx=None) 

3405 /table.cols.vector (Column(2,), int32, idx=None) 

3406 /table.cols.matrix2D (Column(2, 2), float64, idx=None) 

3407 Select table.cols.name[1]--> Particle: 11 

3408 Select table.cols.name[1:2]--> ['Particle: 11'] 

3409 Select table.cols.name[:]--> ['Particle: 10' 

3410 'Particle: 11' 'Particle: 12' 

3411 'Particle: 13' 'Particle: 14'] 

3412 Select table.cols._f_col('name')[:]--> ['Particle: 10' 

3413 'Particle: 11' 'Particle: 12' 

3414 'Particle: 13' 'Particle: 14'] 

3415 

3416 See the :file:`examples/table2.py` file for a more complete example. 

3417 

3418 """ 

3419 

3420 table = self.table 

3421 

3422 # Generalized key support not there yet, but at least allow 

3423 # for a tuple with one single element (the main dimension). 

3424 # (key,) --> key 

3425 if isinstance(key, tuple) and len(key) == 1: 

3426 key = key[0] 

3427 

3428 if is_idx(key): 

3429 key = operator.index(key) 

3430 

3431 # Index out of range protection 

3432 if key >= table.nrows: 

3433 raise IndexError("Index out of range") 

3434 if key < 0: 

3435 # To support negative values 

3436 key += table.nrows 

3437 (start, stop, step) = table._process_range(key, key + 1, 1) 

3438 return table.read(start, stop, step, self.pathname)[0] 

3439 elif isinstance(key, slice): 

3440 (start, stop, step) = table._process_range( 

3441 key.start, key.stop, key.step) 

3442 return table.read(start, stop, step, self.pathname) 

3443 else: 

3444 raise TypeError( 

3445 "'%s' key type is not valid in this context" % key) 

3446 

3447 def __iter__(self): 

3448 """Iterate through all items in the column.""" 

3449 

3450 table = self.table 

3451 itemsize = self.dtype.itemsize 

3452 nrowsinbuf = table._v_file.params['IO_BUFFER_SIZE'] // itemsize 

3453 buf = np.empty((nrowsinbuf, ), self._itemtype) 

3454 max_row = len(self) 

3455 for start_row in range(0, len(self), nrowsinbuf): 

3456 end_row = min(start_row + nrowsinbuf, max_row) 

3457 buf_slice = buf[0:end_row - start_row] 

3458 table.read(start_row, end_row, 1, field=self.pathname, 

3459 out=buf_slice) 

3460 yield from buf_slice 

3461 

3462 def __setitem__(self, key, value): 

3463 """Set a row or a range of rows in a column. 

3464 

3465 If key argument is an integer, the corresponding element is set to 

3466 value. If key is a slice, the range of elements determined by it is 

3467 set to value. 

3468 

3469 Examples 

3470 -------- 

3471 

3472 :: 

3473 

3474 # Modify row 1 

3475 table.cols.col1[1] = -1 

3476 

3477 # Modify rows 1 and 3 

3478 table.cols.col1[1::2] = [2,3] 

3479 

3480 Which is equivalent to:: 

3481 

3482 # Modify row 1 

3483 table.modify_columns(start=1, columns=[[-1]], names=['col1']) 

3484 

3485 # Modify rows 1 and 3 

3486 columns = numpy.rec.fromarrays([[2,3]], formats='i4') 

3487 table.modify_columns(start=1, step=2, columns=columns, 

3488 names=['col1']) 

3489 

3490 """ 

3491 

3492 table = self.table 

3493 table._v_file._check_writable() 

3494 

3495 # Generalized key support not there yet, but at least allow 

3496 # for a tuple with one single element (the main dimension). 

3497 # (key,) --> key 

3498 if isinstance(key, tuple) and len(key) == 1: 

3499 key = key[0] 

3500 

3501 if is_idx(key): 

3502 key = operator.index(key) 

3503 

3504 # Index out of range protection 

3505 if key >= table.nrows: 

3506 raise IndexError("Index out of range") 

3507 if key < 0: 

3508 # To support negative values 

3509 key += table.nrows 

3510 return table.modify_column(key, key + 1, 1, 

3511 [[value]], self.pathname) 

3512 elif isinstance(key, slice): 

3513 (start, stop, step) = table._process_range( 

3514 key.start, key.stop, key.step) 

3515 return table.modify_column(start, stop, step, 

3516 value, self.pathname) 

3517 else: 

3518 raise ValueError("Non-valid index or slice: %s" % key) 

3519 

3520 def create_index(self, optlevel=6, kind="medium", filters=None, 

3521 tmp_dir=None, _blocksizes=None, _testmode=False, 

3522 _verbose=False): 

3523 """Create an index for this column. 

3524 

3525 .. warning:: 

3526 

3527 In some situations it is useful to get a completely sorted 

3528 index (CSI). For those cases, it is best to use the 

3529 :meth:`Column.create_csindex` method instead. 

3530 

3531 Parameters 

3532 ---------- 

3533 optlevel : int 

3534 The optimization level for building the index. The levels ranges 

3535 from 0 (no optimization) up to 9 (maximum optimization). Higher 

3536 levels of optimization mean better chances for reducing the entropy 

3537 of the index at the price of using more CPU, memory and I/O 

3538 resources for creating the index. 

3539 kind : str 

3540 The kind of the index to be built. It can take the 'ultralight', 

3541 'light', 'medium' or 'full' values. Lighter kinds ('ultralight' 

3542 and 'light') mean that the index takes less space on disk, but will 

3543 perform queries slower. Heavier kinds ('medium' and 'full') mean 

3544 better chances for reducing the entropy of the index (increasing 

3545 the query speed) at the price of using more disk space as well as 

3546 more CPU, memory and I/O resources for creating the index. 

3547 

3548 Note that selecting a full kind with an optlevel of 9 (the maximum) 

3549 guarantees the creation of an index with zero entropy, that is, a 

3550 completely sorted index (CSI) - provided that the number of rows in 

3551 the table does not exceed the 2**48 figure (that is more than 100 

3552 trillions of rows). See :meth:`Column.create_csindex` method for a 

3553 more direct way to create a CSI index. 

3554 filters : Filters 

3555 Specify the Filters instance used to compress the index. If None, 

3556 default index filters will be used (currently, zlib level 1 with 

3557 shuffling). 

3558 tmp_dir 

3559 When kind is other than 'ultralight', a temporary file is created 

3560 during the index build process. You can use the tmp_dir argument 

3561 to specify the directory for this temporary file. The default is 

3562 to create it in the same directory as the file containing the 

3563 original table. 

3564 

3565 """ 

3566 

3567 kinds = ['ultralight', 'light', 'medium', 'full'] 

3568 if kind not in kinds: 

3569 raise ValueError("Kind must have any of these values: %s" % kinds) 

3570 if (not isinstance(optlevel, int) or 

3571 (optlevel < 0 or optlevel > 9)): 

3572 raise ValueError("Optimization level must be an integer in the " 

3573 "range 0-9") 

3574 if filters is None: 

3575 filters = default_index_filters 

3576 if tmp_dir is None: 

3577 tmp_dir = str(Path(self._table_file.filename).parent) 

3578 else: 

3579 if not Path(tmp_dir).is_dir(): 

3580 raise ValueError( 

3581 f"Temporary directory '{tmp_dir}' does not exist" 

3582 ) 

3583 if (_blocksizes is not None and 

3584 (not isinstance(_blocksizes, tuple) or len(_blocksizes) != 4)): 

3585 raise ValueError("_blocksizes must be a tuple with exactly 4 " 

3586 "elements") 

3587 idxrows = _column__create_index(self, optlevel, kind, filters, 

3588 tmp_dir, _blocksizes, _verbose) 

3589 return SizeType(idxrows) 

3590 

3591 def create_csindex(self, filters=None, tmp_dir=None, 

3592 _blocksizes=None, _testmode=False, _verbose=False): 

3593 """Create a completely sorted index (CSI) for this column. 

3594 

3595 This method guarantees the creation of an index with zero entropy, that 

3596 is, a completely sorted index (CSI) -- provided that the number of rows 

3597 in the table does not exceed the 2**48 figure (that is more than 100 

3598 trillions of rows). A CSI index is needed for some table methods (like 

3599 :meth:`Table.itersorted` or :meth:`Table.read_sorted`) in order to 

3600 ensure completely sorted results. 

3601 

3602 For the meaning of filters and tmp_dir arguments see 

3603 :meth:`Column.create_index`. 

3604 

3605 Notes 

3606 ----- 

3607 This method is equivalent to 

3608 Column.create_index(optlevel=9, kind='full', ...). 

3609 

3610 """ 

3611 

3612 return self.create_index( 

3613 kind='full', optlevel=9, filters=filters, tmp_dir=tmp_dir, 

3614 _blocksizes=_blocksizes, _testmode=_testmode, _verbose=_verbose) 

3615 

3616 def _do_reindex(self, dirty): 

3617 """Common code for reindex() and reindex_dirty() codes.""" 

3618 

3619 index = self.index 

3620 dodirty = True 

3621 if dirty and not index.dirty: 

3622 dodirty = False 

3623 if index is not None and dodirty: 

3624 self._table_file._check_writable() 

3625 # Get the old index parameters 

3626 kind = index.kind 

3627 optlevel = index.optlevel 

3628 filters = index.filters 

3629 # We *need* to tell the index that it is going to be undirty. 

3630 # This is needed here so as to unnail() the condition cache. 

3631 index.dirty = False 

3632 # Delete the existing Index 

3633 index._f_remove() 

3634 # Create a new Index with the previous parameters 

3635 return SizeType(self.create_index( 

3636 kind=kind, optlevel=optlevel, filters=filters)) 

3637 else: 

3638 return SizeType(0) # The column is not intended for indexing 

3639 

3640 def reindex(self): 

3641 """Recompute the index associated with this column. 

3642 

3643 This can be useful when you suspect that, for any reason, 

3644 the index information is no longer valid and you want to rebuild it. 

3645 

3646 This method does nothing if the column is not indexed. 

3647 

3648 """ 

3649 

3650 self._do_reindex(dirty=False) 

3651 

3652 def reindex_dirty(self): 

3653 """Recompute the associated index only if it is dirty. 

3654 

3655 This can be useful when you have set :attr:`Table.autoindex` to false 

3656 for the table and you want to update the column's index after an 

3657 invalidating index operation (like :meth:`Table.remove_rows`). 

3658 

3659 This method does nothing if the column is not indexed. 

3660 

3661 """ 

3662 

3663 self._do_reindex(dirty=True) 

3664 

3665 def remove_index(self): 

3666 """Remove the index associated with this column. 

3667 

3668 This method does nothing if the column is not indexed. The removed 

3669 index can be created again by calling the :meth:`Column.create_index` 

3670 method. 

3671 

3672 """ 

3673 

3674 self._table_file._check_writable() 

3675 

3676 # Remove the index if existing. 

3677 if self.is_indexed: 

3678 index = self.index 

3679 index._f_remove() 

3680 self.table._set_column_indexing(self.pathname, False) 

3681 

3682 def close(self): 

3683 """Close this column.""" 

3684 

3685 self.__dict__.clear() 

3686 

3687 def __str__(self): 

3688 """The string representation for this object.""" 

3689 

3690 return (f"{self._table_path}.cols.{self.pathname.replace('/', '.')} " 

3691 f"({self.__class__.__name__}{self.shape}, " 

3692 f"{self.descr._v_types[self.name]}, idx={self.index})") 

3693 

3694 def __repr__(self): 

3695 """A detailed string representation for this object.""" 

3696 

3697 return str(self)