Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/file.py: 23%

1"""Create PyTables files and the object tree.

3This module support importing generic HDF5 files, on top of which

4PyTables files are created, read or extended. If a file exists, an

5object tree mirroring their hierarchical structure is created in memory.

6File class offer methods to traverse the tree, as well as to create new

7nodes.

9"""

11import atexit

12import datetime

13import os

14import weakref

15import warnings

16from collections import defaultdict

17from pathlib import Path

19import numexpr as ne

20import numpy as np

22from . import hdf5extension

23from . import utilsextension

24from . import parameters

25from .exceptions import (ClosedFileError, FileModeError, NodeError,

26 NoSuchNodeError, UnclosedFileWarning, UndoRedoError, ClosedNodeError,

27 PerformanceWarning)

28from .registry import get_class_by_name

29from .path import join_path, split_path

30from . import undoredo

31from .description import (IsDescription, UInt8Col, StringCol,

32 descr_from_dtype, dtype_from_descr)

33from .filters import Filters

34from .node import Node, NotLoggedMixin

35from .group import Group, RootGroup

36from .group import TransactionGroupG, TransactionG, MarkG

37from .leaf import Leaf

38from .array import Array

39from .carray import CArray

40from .earray import EArray

41from .vlarray import VLArray

42from .table import Table

43from . import linkextension

44from .utils import detect_number_of_cores

45from . import lrucacheextension

46from .flavor import flavor_of, array_as_internal

47from .atom import Atom

49from .link import SoftLink, ExternalLink

52# format_version = "1.0" # Initial format

53# format_version = "1.1" # Changes in ucl compression

54# format_version = "1.2" # Support for enlargeable arrays and VLA's

55# # 1.2 was introduced in PyTables 0.8

56# format_version = "1.3" # Support for indexes in Tables

57# # 1.3 was introduced in PyTables 0.9

58# format_version = "1.4" # Support for multidimensional attributes

59# # 1.4 was introduced in PyTables 1.1

60# format_version = "1.5" # Support for persistent defaults in tables

61# # 1.5 was introduced in PyTables 1.2

62# format_version = "1.6" # Support for NumPy objects and new flavors for

63# # objects.

64# # 1.6 was introduced in pytables 1.3

65# format_version = "2.0" # Pickles are not used anymore in system attrs

66# # 2.0 was introduced in PyTables 2.0

67format_version = "2.1" # Numeric and numarray flavors are gone.

69compatible_formats = [] # Old format versions we can read

70# # Empty means that we support all the old formats

73class _FileRegistry:

74 def __init__(self):

75 self._name_mapping = defaultdict(set)

76 self._handlers = set()

78 @property

79 def filenames(self):

80 return list(self._name_mapping)

82 @property

83 def handlers(self):

84 # return set(self._handlers) # return a copy

85 return self._handlers

87 def __len__(self):

88 return len(self._handlers)

90 def __contains__(self, filename):

91 return filename in self.filenames

93 def add(self, handler):

94 self._name_mapping[handler.filename].add(handler)

95 self._handlers.add(handler)

97 def remove(self, handler):

98 filename = handler.filename

99 self._name_mapping[filename].remove(handler)

100 # remove enpty keys

101 if not self._name_mapping[filename]:

102 del self._name_mapping[filename]

103 self._handlers.remove(handler)

104

105 def get_handlers_by_name(self, filename):

106 # return set(self._name_mapping[filename]) # return a copy

107 return self._name_mapping[filename]

108

109 def close_all(self):

110 handlers = list(self._handlers) # make a copy

111 for fileh in handlers:

112 msg = f"Closing remaining open file: {fileh.filename}"

113 warnings.warn(UnclosedFileWarning(msg))

114 fileh.close()

115

116

117# Dict of opened files (keys are filenames and values filehandlers)

118_open_files = _FileRegistry()

119

120# Opcodes for do-undo actions

121_op_to_code = {

122 "MARK": 0,

123 "CREATE": 1,

124 "REMOVE": 2,

125 "MOVE": 3,

126 "ADDATTR": 4,

127 "DELATTR": 5,

128}

129

130_code_to_op = ["MARK", "CREATE", "REMOVE", "MOVE", "ADDATTR", "DELATTR"]

131

132

133# Paths and names for hidden nodes related with transactions.

134_trans_version = '1.0'

135

136_trans_group_parent = '/'

137_trans_group_name = '_p_transactions'

138_trans_group_path = join_path(_trans_group_parent, _trans_group_name)

139

140_action_log_parent = _trans_group_path

141_action_log_name = 'actionlog'

142_action_log_path = join_path(_action_log_parent, _action_log_name)

143

144_trans_parent = _trans_group_path

145_trans_name = 't%d' # %d -> transaction number

146_trans_path = join_path(_trans_parent, _trans_name)

147

148_markParent = _trans_path

149_markName = 'm%d' # %d -> mark number

150_markPath = join_path(_markParent, _markName)

151

152_shadow_parent = _markPath

153_shadow_name = 'a%d' # %d -> action number

154_shadow_path = join_path(_shadow_parent, _shadow_name)

155

156

157def _checkfilters(filters):

158 if not (filters is None or

159 isinstance(filters, Filters)):

160 raise TypeError("filter parameter has to be None or a Filter "

161 "instance and the passed type is: '%s'" %

162 type(filters))

163

164

165def copy_file(srcfilename, dstfilename, overwrite=False, **kwargs):

166 """An easy way of copying one PyTables file to another.

167

168 This function allows you to copy an existing PyTables file named

169 srcfilename to another file called dstfilename. The source file

170 must exist and be readable. The destination file can be

171 overwritten in place if existing by asserting the overwrite

172 argument.

173

174 This function is a shorthand for the :meth:`File.copy_file` method,

175 which acts on an already opened file. kwargs takes keyword

176 arguments used to customize the copying process. See the

177 documentation of :meth:`File.copy_file` for a description of those

178 arguments.

179

180 """

181

182 # Open the source file.

183 srcfileh = open_file(srcfilename, mode="r")

184

185 try:

186 # Copy it to the destination file.

187 srcfileh.copy_file(dstfilename, overwrite=overwrite, **kwargs)

188 finally:

189 # Close the source file.

190 srcfileh.close()

191

192

193hdf5_version_str = utilsextension.get_hdf5_version()

194hdf5_version_tup = tuple(map(int, hdf5_version_str.split('-')[0].split('.')))

195_FILE_OPEN_POLICY = 'strict' if hdf5_version_tup < (1, 8, 7) else 'default'

196

197

198def open_file(filename, mode="r", title="", root_uep="/", filters=None,

199 **kwargs):

200 """Open a PyTables (or generic HDF5) file and return a File object.

201

202 Parameters

203 ----------

204 filename : str

205 The name of the file (supports environment variable expansion).

206 It is suggested that file names have any of the .h5, .hdf or

207 .hdf5 extensions, although this is not mandatory.

208 mode : str

209 The mode to open the file. It can be one of the

210 following:

211

212 * *'r'*: Read-only; no data can be modified.

213 * *'w'*: Write; a new file is created (an existing file

214 with the same name would be deleted).

215 * *'a'*: Append; an existing file is opened for reading and

216 writing, and if the file does not exist it is created.

217 * *'r+'*: It is similar to 'a', but the file must already

218 exist.

219

220 title : str

221 If the file is to be created, a TITLE string attribute will be

222 set on the root group with the given value. Otherwise, the

223 title will be read from disk, and this will not have any effect.

224 root_uep : str

225 The root User Entry Point. This is a group in the HDF5 hierarchy

226 which will be taken as the starting point to create the object

227 tree. It can be whatever existing group in the file, named by

228 its HDF5 path. If it does not exist, an HDF5ExtError is issued.

229 Use this if you do not want to build the *entire* object tree,

230 but rather only a *subtree* of it.

231

232 .. versionchanged:: 3.0

233 The *rootUEP* parameter has been renamed into *root_uep*.

234

235 filters : Filters

236 An instance of the Filters (see :ref:`FiltersClassDescr`) class

237 that provides information about the desired I/O filters

238 applicable to the leaves that hang directly from the *root group*,

239 unless other filter properties are specified for these leaves.

240 Besides, if you do not specify filter properties for child groups,

241 they will inherit these ones, which will in turn propagate to

242 child nodes.

243

244 Notes

245 -----

246 In addition, it recognizes the (lowercase) names of parameters

247 present in :file:`tables/parameters.py` as additional keyword

248 arguments.

249 See :ref:`parameter_files` for a detailed info on the supported

250 parameters.

251

252 .. note::

253

254 If you need to deal with a large number of nodes in an

255 efficient way, please see :ref:`LRUOptim` for more info and

256 advices about the integrated node cache engine.

257

258 """

259 filename = os.fspath(filename)

260 # XXX filename normalization ??

261

262 # Check already opened files

263 if _FILE_OPEN_POLICY == 'strict':

264 # This policy do not allows to open the same file multiple times

265 # even in read-only mode

266 if filename in _open_files:

267 raise ValueError(

268 "The file '%s' is already opened. "

269 "Please close it before reopening. "

270 "HDF5 v.%s, FILE_OPEN_POLICY = '%s'" % (

271 filename, utilsextension.get_hdf5_version(),

272 _FILE_OPEN_POLICY))

273 else:

274 for filehandle in _open_files.get_handlers_by_name(filename):

275 omode = filehandle.mode

276 # 'r' is incompatible with everything except 'r' itself

277 if mode == 'r' and omode != 'r':

278 raise ValueError(

279 "The file '%s' is already opened, but "

280 "not in read-only mode (as requested)." % filename)

281 # 'a' and 'r+' are compatible with everything except 'r'

282 elif mode in ('a', 'r+') and omode == 'r':

283 raise ValueError(

284 "The file '%s' is already opened, but "

285 "in read-only mode. Please close it before "

286 "reopening in append mode." % filename)

287 # 'w' means that we want to destroy existing contents

288 elif mode == 'w':

289 raise ValueError(

290 "The file '%s' is already opened. Please "

291 "close it before reopening in write mode." % filename)

292

293 # Finally, create the File instance, and return it

294 return File(filename, mode, title, root_uep, filters, **kwargs)

295

296

297# A dumb class that doesn't keep nothing at all

298class _NoCache:

299 def __len__(self):

300 return 0

301

302 def __contains__(self, key):

303 return False

304

305 def __iter__(self):

306 return iter([])

307

308 def __setitem__(self, key, value):

309 pass

310

311 __marker = object()

312

313 def pop(self, key, d=__marker):

314 if d is not self.__marker:

315 return d

316 raise KeyError(key)

317

318

319class _DictCache(dict):

320 def __init__(self, nslots):

321 if nslots < 1:

322 raise ValueError("Invalid number of slots: %d" % nslots)

323 self.nslots = nslots

324 super().__init__()

325

326 def __setitem__(self, key, value):

327 # Check if we are running out of space

328 if len(self) > self.nslots:

329 warnings.warn(

330 "the dictionary of node cache is exceeding the recommended "

331 "maximum number (%d); be ready to see PyTables asking for "

332 "*lots* of memory and possibly slow I/O." % (

333 self.nslots), PerformanceWarning)

334 super().__setitem__(key, value)

335

336

337class NodeManager:

338 def __init__(self, nslots=64, node_factory=None):

339 super().__init__()

340

341 self.registry = weakref.WeakValueDictionary()

342

343 if nslots > 0:

344 cache = lrucacheextension.NodeCache(nslots)

345 elif nslots == 0:

346 cache = _NoCache()

347 else:

348 # nslots < 0

349 cache = _DictCache(-nslots)

350

351 self.cache = cache

352

353 # node_factory(node_path)

354 self.node_factory = node_factory

355

356 def register_node(self, node, key):

357 if key is None:

358 key = node._v_pathname

359

360 if key in self.registry:

361 if not self.registry[key]._v_isopen:

362 del self.registry[key]

363 self.registry[key] = node

364 elif self.registry[key] is not node:

365 raise RuntimeError('trying to register a node with an '

366 'existing key: ``%s``' % key)

367 else:

368 self.registry[key] = node

369

370 def cache_node(self, node, key=None):

371 if key is None:

372 key = node._v_pathname

373

374 self.register_node(node, key)

375 if key in self.cache:

376 oldnode = self.cache.pop(key)

377 if oldnode is not node and oldnode._v_isopen:

378 raise RuntimeError('trying to cache a node with an '

379 'existing key: ``%s``' % key)

380

381 self.cache[key] = node

382

383 def get_node(self, key):

384 node = self.cache.pop(key, None)

385 if node is not None:

386 if node._v_isopen:

387 self.cache_node(node, key)

388 return node

389 else:

390 # this should not happen

391 warnings.warn("a closed node found in the cache: ``%s``" % key)

392

393 if key in self.registry:

394 node = self.registry[key]

395 if node is None:

396 # this should not happen since WeakValueDictionary drops all

397 # dead weakrefs

398 warnings.warn("None is stored in the registry for key: "

399 "``%s``" % key)

400 elif node._v_isopen:

401 self.cache_node(node, key)

402 return node

403 else:

404 # this should not happen

405 warnings.warn("a closed node found in the registry: "

406 "``%s``" % key)

407 del self.registry[key]

408 node = None

409

410 if self.node_factory:

411 node = self.node_factory(key)

412 self.cache_node(node, key)

413

414 return node

415

416 def rename_node(self, oldkey, newkey):

417 for cache in (self.cache, self.registry):

418 if oldkey in cache:

419 node = cache.pop(oldkey)

420 cache[newkey] = node

421

422 def drop_from_cache(self, nodepath):

423 """Remove the node from cache"""

424

425 # Remove the node from the cache.

426 self.cache.pop(nodepath, None)

427

428 def drop_node(self, node, check_unregistered=True):

429 """Drop the `node`.

430

431 Remove the node from the cache and, if it has no more references,

432 close it.

433

434 """

435

436 # Remove all references to the node.

437 nodepath = node._v_pathname

438

439 self.drop_from_cache(nodepath)

440

441 if nodepath in self.registry:

442 if not node._v_isopen:

443 del self.registry[nodepath]

444 elif check_unregistered:

445 # If the node is not in the registry (this should never happen)

446 # we close it forcibly since it is not ensured that the __del__

447 # method is called for object that are still alive when the

448 # interpreter is shut down

449 if node._v_isopen:

450 warnings.warn("dropping a node that is not in the registry: "

451 "``%s``" % nodepath)

452

453 node._g_pre_kill_hook()

454 node._f_close()

455

456 def flush_nodes(self):

457 # Only iter on the nodes in the registry since nodes in the cahce

458 # should always have an entry in the registry

459 closed_keys = []

460 for path, node in list(self.registry.items()):

461 if not node._v_isopen:

462 closed_keys.append(path)

463 elif '/_i_' not in path: # Indexes are not necessary to be flushed

464 if isinstance(node, Leaf):

465 node.flush()

466

467 for path in closed_keys:

468 # self.cache.pop(path, None)

469 if path in self.cache:

470 warnings.warn("closed node the cache: ``%s``" % path)

471 self.cache.pop(path, None)

472 self.registry.pop(path)

473

474 @staticmethod

475 def _close_nodes(nodepaths, get_node):

476 for nodepath in nodepaths:

477 try:

478 node = get_node(nodepath)

479 except KeyError:

480 pass

481 else:

482 if not node._v_isopen or node._v__deleting:

483 continue

484

485 try:

486 # Avoid descendent nodes to also iterate over

487 # their descendents, which are already to be

488 # closed by this loop.

489 if hasattr(node, '_f_get_child'):

490 node._g_close()

491 else:

492 node._f_close()

493 del node

494 except ClosedNodeError:

495 # import traceback

496 # type_, value, tb = sys.exc_info()

497 # exception_dump = ''.join(

498 # traceback.format_exception(type_, value, tb))

499 # warnings.warn(

500 # "A '%s' exception occurred trying to close a node "

501 # "that was supposed to be open.\n"

502 # "%s" % (type_.__name__, exception_dump))

503 pass

504

505 def close_subtree(self, prefix='/'):

506 if not prefix.endswith('/'):

507 prefix = prefix + '/'

508

509 cache = self.cache

510 registry = self.registry

511

512 # Ensure tables are closed before their indices

513 paths = [

514 path for path in cache

515 if path.startswith(prefix) and '/_i_' not in path

516 ]

517 self._close_nodes(paths, cache.pop)

518

519 # Close everything else (i.e. indices)

520 paths = [path for path in cache if path.startswith(prefix)]

521 self._close_nodes(paths, cache.pop)

522

523 # Ensure tables are closed before their indices

524 paths = [

525 path for path in registry

526 if path.startswith(prefix) and '/_i_' not in path

527 ]

528 self._close_nodes(paths, registry.pop)

529

530 # Close everything else (i.e. indices)

531 paths = [path for path in registry if path.startswith(prefix)]

532 self._close_nodes(paths, registry.pop)

533

534 def shutdown(self):

535 registry = self.registry

536 cache = self.cache

537

538 # self.close_subtree('/')

539

540 keys = list(cache) # copy

541 for key in keys:

542 node = cache.pop(key)

543 if node._v_isopen:

544 registry.pop(node._v_pathname, None)

545 node._f_close()

546

547 while registry:

548 key, node = registry.popitem()

549 if node._v_isopen:

550 node._f_close()

551

552

553class File(hdf5extension.File):

554 """The in-memory representation of a PyTables file.

555

556 An instance of this class is returned when a PyTables file is

557 opened with the :func:`tables.open_file` function. It offers methods

558 to manipulate (create, rename, delete...) nodes and handle their

559 attributes, as well as methods to traverse the object tree.

560 The *user entry point* to the object tree attached to the HDF5 file

561 is represented in the root_uep attribute.

562 Other attributes are available.

563

564 File objects support an *Undo/Redo mechanism* which can be enabled

565 with the :meth:`File.enable_undo` method. Once the Undo/Redo

566 mechanism is enabled, explicit *marks* (with an optional unique

567 name) can be set on the state of the database using the

568 :meth:`File.mark`

569 method. There are two implicit marks which are always available:

570 the initial mark (0) and the final mark (-1). Both the identifier

571 of a mark and its name can be used in *undo* and *redo* operations.

572

573 Hierarchy manipulation operations (node creation, movement and

574 removal) and attribute handling operations (setting and deleting)

575 made after a mark can be undone by using the :meth:`File.undo`

576 method, which returns the database to the state of a past mark.

577 If undo() is not followed by operations that modify the hierarchy

578 or attributes, the :meth:`File.redo` method can be used to return

579 the database to the state of a future mark. Else, future states of

580 the database are forgotten.

581

582 Note that data handling operations can not be undone nor redone by

583 now. Also, hierarchy manipulation operations on nodes that do not

584 support the Undo/Redo mechanism issue an UndoRedoWarning *before*

585 changing the database.

586

587 The Undo/Redo mechanism is persistent between sessions and can

588 only be disabled by calling the :meth:`File.disable_undo` method.

589

590 File objects can also act as context managers when using the with

591 statement introduced in Python 2.5. When exiting a context, the

592 file is automatically closed.

593

594 Parameters

595 ----------

596 filename : str

597 The name of the file (supports environment variable expansion).

598 It is suggested that file names have any of the .h5, .hdf or

599 .hdf5 extensions, although this is not mandatory.

600

601 mode : str

602 The mode to open the file. It can be one of the

603 following:

604

605 * *'r'*: Read-only; no data can be modified.

606 * *'w'*: Write; a new file is created (an existing file

607 with the same name would be deleted).

608 * *'a'*: Append; an existing file is opened for reading

609 and writing, and if the file does not exist it is created.

610 * *'r+'*: It is similar to 'a', but the file must already

611 exist.

612

613 title : str

614 If the file is to be created, a TITLE string attribute will be

615 set on the root group with the given value. Otherwise, the

616 title will be read from disk, and this will not have any effect.

617

618 root_uep : str

619 The root User Entry Point. This is a group in the HDF5 hierarchy

620 which will be taken as the starting point to create the object

621 tree. It can be whatever existing group in the file, named by

622 its HDF5 path. If it does not exist, an HDF5ExtError is issued.

623 Use this if you do not want to build the *entire* object tree,

624 but rather only a *subtree* of it.

625

626 .. versionchanged:: 3.0

627 The *rootUEP* parameter has been renamed into *root_uep*.

628

629 filters : Filters

630 An instance of the Filters (see :ref:`FiltersClassDescr`) class that

631 provides information about the desired I/O filters applicable to the

632 leaves that hang directly from the *root group*, unless other filter

633 properties are specified for these leaves. Besides, if you do not

634 specify filter properties for child groups, they will inherit these

635 ones, which will in turn propagate to child nodes.

636

637 Notes

638 -----

639 In addition, it recognizes the (lowercase) names of parameters

640 present in :file:`tables/parameters.py` as additional keyword

641 arguments.

642 See :ref:`parameter_files` for a detailed info on the supported

643 parameters.

644

645

646 .. rubric:: File attributes

647

648 .. attribute:: filename

649

650 The name of the opened file.

651

652 .. attribute:: format_version

653

654 The PyTables version number of this file.

655

656 .. attribute:: isopen

657

658 True if the underlying file is open, false otherwise.

659

660 .. attribute:: mode

661

662 The mode in which the file was opened.

663

664 .. attribute:: root

665

666 The *root* of the object tree hierarchy (a Group instance).

667

668 .. attribute:: root_uep

669

670 The UEP (user entry point) group name in the file (see

671 the :func:`open_file` function).

672

673 .. versionchanged:: 3.0

674 The *rootUEP* attribute has been renamed into *root_uep*.

675

676 """

677

678 # The top level kinds. Group must go first!

679 _node_kinds = ('Group', 'Leaf', 'Link', 'Unknown')

680

681 @property

682 def title(self):

683 """The title of the root group in the file."""

684 return self.root._v_title

685

686 @title.setter

687 def title(self, title):

688 self.root._v_title = title

689

690 @title.deleter

691 def title(self):

692 del self.root._v_title

693

694 @property

695 def filters(self):

696 """Default filter properties for the root group

697 (see :ref:`FiltersClassDescr`)."""

698 return self.root._v_filters

699

700 @filters.setter

701 def filters(self, filters):

702 self.root._v_filters = filters

703

704 @filters.deleter

705 def filters(self):

706 del self.root._v_filters

707

708 def __init__(self, filename, mode="r", title="",

709 root_uep="/", filters=None, **kwargs):

710

711 self.filename = os.fspath(filename)

712 """The name of the opened file."""

713

714 self.mode = mode

715 """The mode in which the file was opened."""

716

717 if mode not in ('r', 'r+', 'a', 'w'):

718 raise ValueError("invalid mode string ``%s``. Allowed modes are: "

719 "'r', 'r+', 'a' and 'w'" % mode)

720

721 # Get all the parameters in parameter file(s)

722 params = {k: v for k, v in parameters.__dict__.items()

723 if k.isupper() and not k.startswith('_')}

724 # Update them with possible keyword arguments

725 if [k for k in kwargs if k.isupper()]:

726 warnings.warn("The use of uppercase keyword parameters is "

727 "deprecated", DeprecationWarning)

728

729 kwargs = {k.upper(): v for k, v in kwargs.items()}

730 params.update(kwargs)

731

732 # If MAX_ * _THREADS is not set yet, set it to the number of cores

733 # on this machine.

734

735 if params['MAX_NUMEXPR_THREADS'] is None:

736 params['MAX_NUMEXPR_THREADS'] = detect_number_of_cores()

737

738 if params['MAX_BLOSC_THREADS'] is None:

739 params['MAX_BLOSC_THREADS'] = detect_number_of_cores()

740

741 self.params = params

742

743 # Now, it is time to initialize the File extension

744 self._g_new(filename, mode, **params)

745

746 # Check filters and set PyTables format version for new files.

747 new = self._v_new

748 if new:

749 _checkfilters(filters)

750 self.format_version = format_version

751 """The PyTables version number of this file."""

752

753 # The node manager must be initialized before the root group

754 # initialization but the node_factory attribute is set onl later

755 # because it is a bount method of the root grop itself.

756 node_cache_slots = params['NODE_CACHE_SLOTS']

757 self._node_manager = NodeManager(nslots=node_cache_slots)

758

759 # For the moment Undo/Redo is not enabled.

760 self._undoEnabled = False

761

762 # Set the flag to indicate that the file has been opened.

763 # It must be set before opening the root group

764 # to allow some basic access to its attributes.

765 self.isopen = 1

766 """True if the underlying file os open, False otherwise."""

767

768 # Append the name of the file to the global dict of files opened.

769 _open_files.add(self)

770

771 # Set the number of times this file has been opened to 1

772 self._open_count = 1

773

774 # Get the root group from this file

775 self.root = root = self.__get_root_group(root_uep, title, filters)

776 """The *root* of the object tree hierarchy (a Group instance)."""

777 # Complete the creation of the root node

778 # (see the explanation in ``RootGroup.__init__()``.

779 root._g_post_init_hook()

780 self._node_manager.node_factory = self.root._g_load_child

781

782 # Save the PyTables format version for this file.

783 if new:

784 if params['PYTABLES_SYS_ATTRS']:

785 root._v_attrs._g__setattr(

786 'PYTABLES_FORMAT_VERSION', format_version)

787

788 # If the file is old, and not opened in "read-only" mode,

789 # check if it has a transaction log

790 if not new and self.mode != "r" and _trans_group_path in self:

791 # It does. Enable the undo.

792 self.enable_undo()

793

794 # Set the maximum number of threads for Numexpr

795 ne.set_vml_num_threads(params['MAX_NUMEXPR_THREADS'])

796

797 def __get_root_group(self, root_uep, title, filters):

798 """Returns a Group instance which will act as the root group in the

799 hierarchical tree.

800

801 If file is opened in "r", "r+" or "a" mode, and the file already

802 exists, this method dynamically builds a python object tree

803 emulating the structure present on file.

804

805 """

806

807 self._v_objectid = self._get_file_id()

808

809 if root_uep in [None, ""]:

810 root_uep = "/"

811 # Save the User Entry Point in a variable class

812 self.root_uep = root_uep

813

814 new = self._v_new

815

816 # Get format version *before* getting the object tree

817 if not new:

818 # Firstly, get the PyTables format version for this file

819 self.format_version = utilsextension.read_f_attr(

820 self._v_objectid, 'PYTABLES_FORMAT_VERSION')

821 if not self.format_version:

822 # PYTABLES_FORMAT_VERSION attribute is not present

823 self.format_version = "unknown"

824 self._isPTFile = False

825 elif not isinstance(self.format_version, str):

826 # system attributes should always be str

827 self.format_version = self.format_version.decode('utf-8')

828

829 # Create new attributes for the root Group instance and

830 # create the object tree

831 return RootGroup(self, root_uep, title=title, new=new, filters=filters)

832

833 def _get_or_create_path(self, path, create):

834 """Get the given `path` or create it if `create` is true.

835

836 If `create` is true, `path` *must* be a string path and not a

837 node, otherwise a `TypeError`will be raised.

838

839 """

840

841 if create:

842 return self._create_path(path)

843 else:

844 return self.get_node(path)

845

846 def _create_path(self, path):

847 """Create the groups needed for the `path` to exist.

848

849 The group associated with the given `path` is returned.

850

851 """

852

853 if not hasattr(path, 'split'):

854 raise TypeError("when creating parents, parent must be a path")

855

856 if path == '/':

857 return self.root

858

859 parent, create_group = self.root, self.create_group

860 for pcomp in path.split('/')[1:]:

861 try:

862 child = parent._f_get_child(pcomp)

863 except NoSuchNodeError:

864 child = create_group(parent, pcomp)

865 parent = child

866 return parent

867

868 def create_group(self, where, name, title="", filters=None,

869 createparents=False):

870 """Create a new group.

871

872 Parameters

873 ----------

874 where : str or Group

875 The parent group from which the new group will hang. It can be a

876 path string (for example '/level1/leaf5'), or a Group instance

877 (see :ref:`GroupClassDescr`).

878 name : str

879 The name of the new group.

880 title : str, optional

881 A description for this node (it sets the TITLE HDF5 attribute on

882 disk).

883 filters : Filters

884 An instance of the Filters class (see :ref:`FiltersClassDescr`)

885 that provides information about the desired I/O filters applicable

886 to the leaves that hang directly from this new group (unless other

887 filter properties are specified for these leaves). Besides, if you

888 do not specify filter properties for its child groups, they will

889 inherit these ones.

890 createparents : bool

891 Whether to create the needed groups for the parent

892 path to exist (not done by default).

893

894 See Also

895 --------

896 Group : for more information on groups

897

898 """

899

900 parentnode = self._get_or_create_path(where, createparents)

901 _checkfilters(filters)

902 return Group(parentnode, name,

903 title=title, new=True, filters=filters)

904

905 def create_table(self, where, name, description=None, title="",

906 filters=None, expectedrows=10_000,

907 chunkshape=None, byteorder=None,

908 createparents=False, obj=None, track_times=True):

909 """Create a new table with the given name in where location.

910

911 Parameters

912 ----------

913 where : str or Group

914 The parent group from which the new table will hang. It can be a

915 path string (for example '/level1/leaf5'), or a Group instance

916 (see :ref:`GroupClassDescr`).

917 name : str

918 The name of the new table.

919 description : Description

920 This is an object that describes the table, i.e. how

921 many columns it has, their names, types, shapes, etc. It

922 can be any of the following:

923

924 * *A user-defined class*: This should inherit from the

925 IsDescription class (see :ref:`IsDescriptionClassDescr`)

926 where table fields are specified.

927 * *A dictionary*: For example, when you do not know

928 beforehand which structure your table will have).

929 * *A Description instance*: You can use the description

930 attribute of another table to create a new one with the

931 same structure.

932 * *A NumPy dtype*: A completely general structured NumPy

933 dtype.

934 * *A NumPy (structured) array instance*: The dtype of

935 this structured array will be used as the description.

936 Also, in case the array has actual data, it will be

937 injected into the newly created table.

938

939 .. versionchanged:: 3.0

940 The *description* parameter can be None (default) if *obj* is

941 provided. In that case the structure of the table is deduced

942 by *obj*.

943

944 title : str

945 A description for this node (it sets the TITLE HDF5 attribute

946 on disk).

947 filters : Filters

948 An instance of the Filters class (see :ref:`FiltersClassDescr`)

949 that provides information about the desired I/O filters to be

950 applied during the life of this object.

951 expectedrows : int

952 A user estimate of the number of records that will be in the table.

953 If not provided, the default value is EXPECTED_ROWS_TABLE (see

954 :file:`tables/parameters.py`). If you plan to create a bigger

955 table try providing a guess; this will optimize the HDF5 B-Tree

956 creation and management process time and memory used.

957 chunkshape

958 The shape of the data chunk to be read or written in a

959 single HDF5 I/O operation. Filters are applied to those

960 chunks of data. The rank of the chunkshape for tables must

961 be 1. If None, a sensible value is calculated based on the

962 expectedrows parameter (which is recommended).

963 byteorder : str

964 The byteorder of data *on disk*, specified as 'little' or 'big'.

965 If this is not specified, the byteorder is that of the platform,

966 unless you passed an array as the description, in which case

967 its byteorder will be used.

968 createparents : bool

969 Whether to create the needed groups for the parent path to exist

970 (not done by default).

971 obj : python object

972 The recarray to be saved. Accepted types are NumPy record

973 arrays.

974

975 The *obj* parameter is optional and it can be provided in

976 alternative to the *description* parameter.

977 If both *obj* and *description* are provided they must

978 be consistent with each other.

979

980 .. versionadded:: 3.0

981

982 track_times

983 Whether time data associated with the leaf are recorded (object

984 access time, raw data modification time, metadata change time,

985 object birth time); default True. Semantics of these times

986 depend on their implementation in the HDF5 library: refer to

987 documentation of the H5O_info_t data structure. As of HDF5

988 1.8.15, only ctime (metadata change time) is implemented.

989

990 .. versionadded:: 3.4.3

991

992 See Also

993 --------

994 Table : for more information on tables

995

996 """

997

998 if obj is not None:

999 if not isinstance(obj, np.ndarray):

1000 raise TypeError('invalid obj parameter %r' % obj)

1001

1002 descr, _ = descr_from_dtype(obj.dtype, ptparams=self.params)

1003 if (description is not None and

1004 dtype_from_descr(description,

1005 ptparams=self.params) != obj.dtype):

1006 raise TypeError('the desctiption parameter is not consistent '

1007 'with the data type of the obj parameter')

1008 elif description is None:

1009 description = descr

1010

1011 parentnode = self._get_or_create_path(where, createparents)

1012 if description is None:

1013 raise ValueError("invalid table description: None")

1014 _checkfilters(filters)

1015

1016 ptobj = Table(parentnode, name,

1017 description=description, title=title,

1018 filters=filters, expectedrows=expectedrows,

1019 chunkshape=chunkshape, byteorder=byteorder,

1020 track_times=track_times)

1021

1022 if obj is not None:

1023 ptobj.append(obj)

1024

1025 return ptobj

1026

1027 def create_array(self, where, name, obj=None, title="",

1028 byteorder=None, createparents=False,

1029 atom=None, shape=None, track_times=True):

1030 """Create a new array.

1031

1032 Parameters

1033 ----------

1034 where : str or Group

1035 The parent group from which the new array will hang. It can be a

1036 path string (for example '/level1/leaf5'), or a Group instance

1037 (see :ref:`GroupClassDescr`).

1038 name : str

1039 The name of the new array

1040 obj : python object

1041 The array or scalar to be saved. Accepted types are NumPy

1042 arrays and scalars, as well as native Python sequences and

1043 scalars, provided that values are regular (i.e. they are

1044 not like ``[[1,2],2]``) and homogeneous (i.e. all the

1045 elements are of the same type).

1046

1047 Also, objects that have some of their dimensions equal to 0

1048 are not supported (use an EArray node (see

1049 :ref:`EArrayClassDescr`) if you want to store an array with

1050 one of its dimensions equal to 0).

1051

1052 .. versionchanged:: 3.0

1053 The *Object parameter has been renamed into *obj*.*

1054

1055 title : str

1056 A description for this node (it sets the TITLE HDF5 attribute on

1057 disk).

1058 byteorder : str

1059 The byteorder of the data *on disk*, specified as 'little' or

1060 'big'. If this is not specified, the byteorder is that of the

1061 given object.

1062 createparents : bool, optional

1063 Whether to create the needed groups for the parent path to exist

1064 (not done by default).

1065 atom : Atom

1066 An Atom (see :ref:`AtomClassDescr`) instance representing

1067 the *type* and *shape* of the atomic objects to be saved.

1068

1069 .. versionadded:: 3.0

1070

1071 shape : tuple of ints

1072 The shape of the stored array.

1073

1074 .. versionadded:: 3.0

1075

1076 track_times

1077 Whether time data associated with the leaf are recorded (object

1078 access time, raw data modification time, metadata change time,

1079 object birth time); default True. Semantics of these times

1080 depend on their implementation in the HDF5 library: refer to

1081 documentation of the H5O_info_t data structure. As of HDF5

1082 1.8.15, only ctime (metadata change time) is implemented.

1083

1084 .. versionadded:: 3.4.3

1085

1086 See Also

1087 --------

1088 Array : for more information on arrays

1089 create_table : for more information on the rest of parameters

1090

1091 """

1092

1093 if obj is None:

1094 if atom is None or shape is None:

1095 raise TypeError('if the obj parameter is not specified '

1096 '(or None) then both the atom and shape '

1097 'parametes should be provided.')

1098 else:

1099 # Making strides=(0,...) below is a trick to create the

1100 # array fast and without memory consumption

1101 dflt = np.zeros((), dtype=atom.dtype)

1102 obj = np.ndarray(shape, dtype=atom.dtype, buffer=dflt,

1103 strides=(0,)*len(shape))

1104 else:

1105 flavor = flavor_of(obj)

1106 # use a temporary object because converting obj at this stage

1107 # breaks some test. This is solution performs a double,

1108 # potentially expensive, conversion of the obj parameter.

1109 _obj = array_as_internal(obj, flavor)

1110

1111 if shape is not None and shape != _obj.shape:

1112 raise TypeError('the shape parameter do not match obj.shape')

1113

1114 if atom is not None and atom.dtype != _obj.dtype:

1115 raise TypeError('the atom parameter is not consistent with '

1116 'the data type of the obj parameter')

1117

1118 parentnode = self._get_or_create_path(where, createparents)

1119 return Array(parentnode, name,

1120 obj=obj, title=title, byteorder=byteorder,

1121 track_times=track_times)

1122

1123 def create_carray(self, where, name, atom=None, shape=None, title="",

1124 filters=None, chunkshape=None,

1125 byteorder=None, createparents=False, obj=None,

1126 track_times=True):

1127 """Create a new chunked array.

1128

1129 Parameters

1130 ----------

1131 where : str or Group

1132 The parent group from which the new array will hang. It can

1133 be a path string (for example '/level1/leaf5'), or a Group

1134 instance (see :ref:`GroupClassDescr`).

1135 name : str

1136 The name of the new array

1137 atom : Atom

1138 An Atom (see :ref:`AtomClassDescr`) instance representing

1139 the *type* and *shape* of the atomic objects to be saved.

1140

1141 .. versionchanged:: 3.0

1142 The *atom* parameter can be None (default) if *obj* is

1143 provided.

1144

1145 shape : tuple

1146 The shape of the new array.

1147

1148 .. versionchanged:: 3.0

1149 The *shape* parameter can be None (default) if *obj* is

1150 provided.

1151

1152 title : str, optional

1153 A description for this node (it sets the TITLE HDF5 attribute

1154 on disk).

1155 filters : Filters, optional

1156 An instance of the Filters class (see :ref:`FiltersClassDescr`)

1157 that provides information about the desired I/O filters to

1158 be applied during the life of this object.

1159 chunkshape : tuple or number or None, optional

1160 The shape of the data chunk to be read or written in a

1161 single HDF5 I/O operation. Filters are applied to those

1162 chunks of data. The dimensionality of chunkshape must be

1163 the same as that of shape. If None, a sensible value is

1164 calculated (which is recommended).

1165 byteorder : str, optional

1166 The byteorder of the data *on disk*, specified as 'little'

1167 or 'big'. If this is not specified, the byteorder is that

1168 of the given object.

1169 createparents : bool, optional

1170 Whether to create the needed groups for the parent path to

1171 exist (not done by default).

1172 obj : python object

1173 The array or scalar to be saved. Accepted types are NumPy

1174 arrays and scalars, as well as native Python sequences and

1175 scalars, provided that values are regular (i.e. they are

1176 not like ``[[1,2],2]``) and homogeneous (i.e. all the

1177 elements are of the same type).

1178

1179 Also, objects that have some of their dimensions equal to 0

1180 are not supported. Please use an EArray node (see

1181 :ref:`EArrayClassDescr`) if you want to store an array with

1182 one of its dimensions equal to 0.

1183

1184 The *obj* parameter is optional and it can be provided in

1185 alternative to the *atom* and *shape* parameters.

1186 If both *obj* and *atom* and/or *shape* are provided they must

1187 be consistent with each other.

1188

1189 .. versionadded:: 3.0

1190

1191 track_times

1192 Whether time data associated with the leaf are recorded (object

1193 access time, raw data modification time, metadata change time,

1194 object birth time); default True. Semantics of these times

1195 depend on their implementation in the HDF5 library: refer to

1196 documentation of the H5O_info_t data structure. As of HDF5

1197 1.8.15, only ctime (metadata change time) is implemented.

1198

1199 .. versionadded:: 3.4.3

1200

1201 See Also

1202 --------

1203 CArray : for more information on chunked arrays

1204

1205 """

1206

1207 if obj is not None:

1208 flavor = flavor_of(obj)

1209 obj = array_as_internal(obj, flavor)

1210

1211 if shape is not None and shape != obj.shape:

1212 raise TypeError('the shape parameter do not match obj.shape')

1213 else:

1214 shape = obj.shape

1215

1216 if atom is not None and atom.dtype != obj.dtype:

1217 raise TypeError("the 'atom' parameter is not consistent with "

1218 "the data type of the 'obj' parameter")

1219 elif atom is None:

1220 atom = Atom.from_dtype(obj.dtype)

1221 else:

1222 if atom is None and shape is None:

1223 raise TypeError(

1224 "the 'atom' and 'shape' parameters or the 'obj' parameter "

1225 "must be provided")

1226

1227 parentnode = self._get_or_create_path(where, createparents)

1228 _checkfilters(filters)

1229 ptobj = CArray(parentnode, name,

1230 atom=atom, shape=shape, title=title, filters=filters,

1231 chunkshape=chunkshape, byteorder=byteorder,

1232 track_times=track_times)

1233

1234 if obj is not None:

1235 ptobj[...] = obj

1236

1237 return ptobj

1238

1239 def create_earray(self, where, name, atom=None, shape=None, title="",

1240 filters=None, expectedrows=1000,

1241 chunkshape=None, byteorder=None,

1242 createparents=False, obj=None, track_times=True):

1243 """Create a new enlargeable array.

1244

1245 Parameters

1246 ----------

1247 where : str or Group

1248 The parent group from which the new array will hang. It can be a

1249 path string (for example '/level1/leaf5'), or a Group instance

1250 (see :ref:`GroupClassDescr`).

1251 name : str

1252 The name of the new array

1253 atom : Atom

1254 An Atom (see :ref:`AtomClassDescr`) instance representing the

1255 *type* and *shape* of the atomic objects to be saved.

1256

1257 .. versionchanged:: 3.0

1258 The *atom* parameter can be None (default) if *obj* is

1259 provided.

1260

1261 shape : tuple

1262 The shape of the new array. One (and only one) of the shape

1263 dimensions *must* be 0. The dimension being 0 means that the

1264 resulting EArray object can be extended along it. Multiple

1265 enlargeable dimensions are not supported right now.

1266

1267 .. versionchanged:: 3.0

1268 The *shape* parameter can be None (default) if *obj* is

1269 provided.

1270

1271 title : str, optional

1272 A description for this node (it sets the TITLE HDF5 attribute on

1273 disk).

1274 expectedrows : int, optional

1275 A user estimate about the number of row elements that will be added

1276 to the growable dimension in the EArray node. If not provided, the

1277 default value is EXPECTED_ROWS_EARRAY (see tables/parameters.py).

1278 If you plan to create either a much smaller or a much bigger array

1279 try providing a guess; this will optimize the HDF5 B-Tree creation

1280 and management process time and the amount of memory used.

1281 chunkshape : tuple, numeric, or None, optional

1282 The shape of the data chunk to be read or written in a single HDF5

1283 I/O operation. Filters are applied to those chunks of data. The

1284 dimensionality of chunkshape must be the same as that of shape

1285 (beware: no dimension should be 0 this time!). If None, a sensible

1286 value is calculated based on the expectedrows parameter (which is

1287 recommended).

1288 byteorder : str, optional

1289 The byteorder of the data *on disk*, specified as 'little' or

1290 'big'. If this is not specified, the byteorder is that of the

1291 platform.

1292 createparents : bool, optional

1293 Whether to create the needed groups for the parent path to exist

1294 (not done by default).

1295 obj : python object

1296 The array or scalar to be saved. Accepted types are NumPy

1297 arrays and scalars, as well as native Python sequences and

1298 scalars, provided that values are regular (i.e. they are

1299 not like ``[[1,2],2]``) and homogeneous (i.e. all the

1300 elements are of the same type).

1301

1302 The *obj* parameter is optional and it can be provided in

1303 alternative to the *atom* and *shape* parameters.

1304 If both *obj* and *atom* and/or *shape* are provided they must

1305 be consistent with each other.

1306

1307 .. versionadded:: 3.0

1308

1309 track_times

1310 Whether time data associated with the leaf are recorded (object

1311 access time, raw data modification time, metadata change time,

1312 object birth time); default True. Semantics of these times

1313 depend on their implementation in the HDF5 library: refer to

1314 documentation of the H5O_info_t data structure. As of HDF5

1315 1.8.15, only ctime (metadata change time) is implemented.

1316

1317 .. versionadded:: 3.4.3

1318

1319 See Also

1320 --------

1321 EArray : for more information on enlargeable arrays

1322

1323 """

1324

1325 if obj is not None:

1326 flavor = flavor_of(obj)

1327 obj = array_as_internal(obj, flavor)

1328

1329 earray_shape = (0,) + obj.shape[1:]

1330

1331 if shape is not None and shape != earray_shape:

1332 raise TypeError('the shape parameter is not compatible '

1333 'with obj.shape.')

1334 else:

1335 shape = earray_shape

1336

1337 if atom is not None and atom.dtype != obj.dtype:

1338 raise TypeError('the atom parameter is not consistent with '

1339 'the data type of the obj parameter')

1340 elif atom is None:

1341 atom = Atom.from_dtype(obj.dtype)

1342

1343 parentnode = self._get_or_create_path(where, createparents)

1344 _checkfilters(filters)

1345 ptobj = EArray(parentnode, name,

1346 atom=atom, shape=shape, title=title,

1347 filters=filters, expectedrows=expectedrows,

1348 chunkshape=chunkshape, byteorder=byteorder,

1349 track_times=track_times)

1350

1351 if obj is not None:

1352 ptobj.append(obj)

1353

1354 return ptobj

1355

1356 def create_vlarray(self, where, name, atom=None, title="",

1357 filters=None, expectedrows=None,

1358 chunkshape=None, byteorder=None,

1359 createparents=False, obj=None,

1360 track_times=True):

1361 """Create a new variable-length array.

1362

1363 Parameters

1364 ----------

1365 where : str or Group

1366 The parent group from which the new array will hang. It can

1367 be a path string (for example '/level1/leaf5'), or a Group

1368 instance (see :ref:`GroupClassDescr`).

1369 name : str

1370 The name of the new array

1371 atom : Atom

1372 An Atom (see :ref:`AtomClassDescr`) instance representing

1373 the *type* and *shape* of the atomic objects to be saved.

1374

1375 .. versionchanged:: 3.0

1376 The *atom* parameter can be None (default) if *obj* is

1377 provided.

1378

1379 title : str, optional

1380 A description for this node (it sets the TITLE HDF5 attribute

1381 on disk).

1382 filters : Filters

1383 An instance of the Filters class (see :ref:`FiltersClassDescr`)

1384 that provides information about the desired I/O filters to

1385 be applied during the life of this object.

1386 expectedrows : int, optional

1387 A user estimate about the number of row elements that will

1388 be added to the growable dimension in the `VLArray` node.

1389 If not provided, the default value is ``EXPECTED_ROWS_VLARRAY``

1390 (see ``tables/parameters.py``). If you plan to create either

1391 a much smaller or a much bigger `VLArray` try providing a guess;

1392 this will optimize the HDF5 B-Tree creation and management

1393 process time and the amount of memory used.

1394

1395 .. versionadded:: 3.0

1396

1397 chunkshape : int or tuple of int, optional

1398 The shape of the data chunk to be read or written in a

1399 single HDF5 I/O operation. Filters are applied to those

1400 chunks of data. The dimensionality of chunkshape must be 1.

1401 If None, a sensible value is calculated (which is recommended).

1402 byteorder : str, optional

1403 The byteorder of the data *on disk*, specified as 'little' or

1404 'big'. If this is not specified, the byteorder is that of the

1405 platform.

1406 createparents : bool, optional

1407 Whether to create the needed groups for the parent path to

1408 exist (not done by default).

1409 obj : python object

1410 The array or scalar to be saved. Accepted types are NumPy

1411 arrays and scalars, as well as native Python sequences and

1412 scalars, provided that values are regular (i.e. they are

1413 not like ``[[1,2],2]``) and homogeneous (i.e. all the

1414 elements are of the same type).

1415

1416 The *obj* parameter is optional and it can be provided in

1417 alternative to the *atom* parameter.

1418 If both *obj* and *atom* and are provided they must

1419 be consistent with each other.

1420

1421 .. versionadded:: 3.0

1422

1423 track_times

1424 Whether time data associated with the leaf are recorded (object

1425 access time, raw data modification time, metadata change time,

1426 object birth time); default True. Semantics of these times

1427 depend on their implementation in the HDF5 library: refer to

1428 documentation of the H5O_info_t data structure. As of HDF5

1429 1.8.15, only ctime (metadata change time) is implemented.

1430

1431 .. versionadded:: 3.4.3

1432

1433 See Also

1434 --------

1435 VLArray : for more informationon variable-length arrays

1436

1437 .. versionchanged:: 3.0

1438 The *expectedsizeinMB* parameter has been replaced by

1439 *expectedrows*.

1440

1441 """

1442

1443 if obj is not None:

1444 flavor = flavor_of(obj)

1445 obj = array_as_internal(obj, flavor)

1446

1447 if atom is not None and atom.dtype != obj.dtype:

1448 raise TypeError('the atom parameter is not consistent with '

1449 'the data type of the obj parameter')

1450 if atom is None:

1451 atom = Atom.from_dtype(obj.dtype)

1452 elif atom is None:

1453 raise ValueError('atom parameter cannot be None')

1454

1455 parentnode = self._get_or_create_path(where, createparents)

1456 _checkfilters(filters)

1457 ptobj = VLArray(parentnode, name,

1458 atom=atom, title=title, filters=filters,

1459 expectedrows=expectedrows,

1460 chunkshape=chunkshape, byteorder=byteorder,

1461 track_times=track_times)

1462

1463 if obj is not None:

1464 ptobj.append(obj)

1465

1466 return ptobj

1467

1468 def create_hard_link(self, where, name, target, createparents=False):

1469 """Create a hard link.

1470

1471 Create a hard link to a `target` node with the given `name` in

1472 `where` location. `target` can be a node object or a path

1473 string. If `createparents` is true, the intermediate groups

1474 required for reaching `where` are created (the default is not

1475 doing so).

1476

1477 The returned node is a regular `Group` or `Leaf` instance.

1478

1479 """

1480

1481 targetnode = self.get_node(target)

1482 parentnode = self._get_or_create_path(where, createparents)

1483 linkextension._g_create_hard_link(parentnode, name, targetnode)

1484 # Refresh children names in link's parent node

1485 parentnode._g_add_children_names()

1486 # Return the target node

1487 return self.get_node(parentnode, name)

1488

1489 def create_soft_link(self, where, name, target, createparents=False):

1490 """Create a soft link (aka symbolic link) to a `target` node.

1491

1492 Create a soft link (aka symbolic link) to a `target` nodewith

1493 the given `name` in `where` location. `target` can be a node

1494 object or a path string. If `createparents` is true, the

1495 intermediate groups required for reaching `where` are created.

1496

1497 (the default is not doing so).

1498

1499 The returned node is a SoftLink instance. See the SoftLink

1500 class (in :ref:`SoftLinkClassDescr`) for more information on

1501 soft links.

1502

1503 """

1504

1505 if not isinstance(target, str):

1506 if hasattr(target, '_v_pathname'): # quacks like a Node

1507 target = target._v_pathname

1508 else:

1509 raise ValueError(

1510 "`target` has to be a string or a node object")

1511 parentnode = self._get_or_create_path(where, createparents)

1512 slink = SoftLink(parentnode, name, target)

1513 # Refresh children names in link's parent node

1514 parentnode._g_add_children_names()

1515 return slink

1516

1517 def create_external_link(self, where, name, target, createparents=False):

1518 """Create an external link.

1519

1520 Create an external link to a *target* node with the given *name*

1521 in *where* location. *target* can be a node object in another

1522 file or a path string in the form 'file:/path/to/node'. If

1523 *createparents* is true, the intermediate groups required for

1524 reaching *where* are created (the default is not doing so).

1525

1526 The returned node is an :class:`ExternalLink` instance.

1527

1528 """

1529

1530 if not isinstance(target, str):

1531 if hasattr(target, '_v_pathname'): # quacks like a Node

1532 target = target._v_file.filename + ':' + target._v_pathname

1533 else:

1534 raise ValueError(

1535 "`target` has to be a string or a node object")

1536 elif target.find(':/') == -1:

1537 raise ValueError(

1538 "`target` must expressed as 'file:/path/to/node'")

1539 parentnode = self._get_or_create_path(where, createparents)

1540 elink = ExternalLink(parentnode, name, target)

1541 # Refresh children names in link's parent node

1542 parentnode._g_add_children_names()

1543 return elink

1544

1545 def _get_node(self, nodepath):

1546 # The root node is always at hand.

1547 if nodepath == '/':

1548 return self.root

1549

1550 node = self._node_manager.get_node(nodepath)

1551 assert node is not None, "unable to instantiate node ``%s``" % nodepath

1552

1553 return node

1554

1555 def get_node(self, where, name=None, classname=None):

1556 """Get the node under where with the given name.

1557

1558 Parameters

1559 ----------

1560 where : str or Node

1561 This can be a path string leading to a node or a Node instance (see

1562 :ref:`NodeClassDescr`). If no name is specified, that node is

1563 returned.

1564

1565 .. note::

1566

1567 If where is a Node instance from a different file than the one

1568 on which this function is called, the returned node will also

1569 be from that other file.

1570

1571 name : str, optional

1572 If a name is specified, this must be a string with the name of

1573 a node under where. In this case the where argument can only

1574 lead to a Group (see :ref:`GroupClassDescr`) instance (else a

1575 TypeError is raised). The node called name under the group

1576 where is returned.

1577 classname : str, optional

1578 If the classname argument is specified, it must be the name of

1579 a class derived from Node (e.g. Table). If the node is found but it

1580 is not an instance of that class, a NoSuchNodeError is also raised.

1581

1582 If the node to be returned does not exist, a NoSuchNodeError is

1583 raised. Please note that hidden nodes are also considered.

1584

1585 """

1586

1587 self._check_open()

1588

1589 if isinstance(where, Node):

1590 where._g_check_open()

1591

1592 basepath = where._v_pathname

1593 nodepath = join_path(basepath, name or '') or '/'

1594 node = where._v_file._get_node(nodepath)

1595 elif isinstance(where, (str, np.str_)):

1596 if not where.startswith('/'):

1597 raise NameError("``where`` must start with a slash ('/')")

1598

1599 basepath = where

1600 nodepath = join_path(basepath, name or '') or '/'

1601 node = self._get_node(nodepath)

1602 else:

1603 raise TypeError(

1604 f"``where`` must be a string or a node: {where!r}")

1605

1606 # Finally, check whether the desired node is an instance

1607 # of the expected class.

1608 if classname:

1609 class_ = get_class_by_name(classname)

1610 if not isinstance(node, class_):

1611 npathname = node._v_pathname

1612 nclassname = node.__class__.__name__

1613 # This error message is right since it can never be shown

1614 # for ``classname in [None, 'Node']``.

1615 raise NoSuchNodeError(

1616 "could not find a ``%s`` node at ``%s``; "

1617 "instead, a ``%s`` node has been found there"

1618 % (classname, npathname, nclassname))

1619

1620 return node

1621

1622 def is_visible_node(self, path):

1623 """Is the node under `path` visible?

1624

1625 If the node does not exist, a NoSuchNodeError is raised.

1626

1627 """

1628

1629 # ``util.isvisiblepath()`` is still recommended for internal use.

1630 return self.get_node(path)._f_isvisible()

1631

1632 def rename_node(self, where, newname, name=None, overwrite=False):

1633 """Change the name of the node specified by where and name to newname.

1634

1635 Parameters

1636 ----------

1637 where, name

1638 These arguments work as in

1639 :meth:`File.get_node`, referencing the node to be acted upon.

1640 newname : str

1641 The new name to be assigned to the node (a string).

1642 overwrite : bool

1643 Whether to recursively remove a node with the same

1644 newname if it already exists (not done by default).

1645

1646 """

1647

1648 obj = self.get_node(where, name=name)

1649 obj._f_rename(newname, overwrite)

1650

1651 def move_node(self, where, newparent=None, newname=None, name=None,

1652 overwrite=False, createparents=False):

1653 """Move the node specified by where and name to newparent/newname.

1654

1655 Parameters

1656 ----------

1657 where, name : path

1658 These arguments work as in

1659 :meth:`File.get_node`, referencing the node to be acted upon.

1660 newparent

1661 The destination group the node will be moved into (a

1662 path name or a Group instance). If it is

1663 not specified or None, the current parent

1664 group is chosen as the new parent.

1665 newname

1666 The new name to be assigned to the node in its

1667 destination (a string). If it is not specified or

1668 None, the current name is chosen as the

1669 new name.

1670

1671 Notes

1672 -----

1673 The other arguments work as in :meth:`Node._f_move`.

1674

1675 """

1676

1677 obj = self.get_node(where, name=name)

1678 obj._f_move(newparent, newname, overwrite, createparents)

1679

1680 def copy_node(self, where, newparent=None, newname=None, name=None,

1681 overwrite=False, recursive=False, createparents=False,

1682 **kwargs):

1683 """Copy the node specified by where and name to newparent/newname.

1684

1685 Parameters

1686 ----------

1687 where : str

1688 These arguments work as in

1689 :meth:`File.get_node`, referencing the node to be acted

1690 upon.

1691 newparent : str or Group

1692 The destination group that the node will be copied

1693 into (a path name or a Group

1694 instance). If not specified or None, the

1695 current parent group is chosen as the new parent.

1696 newname : str

1697 The name to be assigned to the new copy in its

1698 destination (a string). If it is not specified or

1699 None, the current name is chosen as the

1700 new name.

1701 name : str

1702 These arguments work as in

1703 :meth:`File.get_node`, referencing the node to be acted

1704 upon.

1705 overwrite : bool, optional

1706 If True, the destination group will be overwritten if it already

1707 exists. Defaults to False.

1708 recursive : bool, optional

1709 If True, all descendant nodes of srcgroup are recursively copied.

1710 Defaults to False.

1711 createparents : bool, optional

1712 If True, any necessary parents of dstgroup will be created.

1713 Defaults to False.

1714 kwargs

1715 Additional keyword arguments can be used to customize the copying

1716 process. See the documentation of :meth:`Group._f_copy`

1717 for a description of those arguments.

1718

1719 Returns

1720 -------

1721 node : Node

1722 The newly created copy of the source node (i.e. the destination

1723 node). See :meth:`.Node._f_copy` for further details on the

1724 semantics of copying nodes.

1725

1726 """

1727

1728 obj = self.get_node(where, name=name)

1729 if obj._v_depth == 0 and newparent and not newname:

1730 npobj = self.get_node(newparent)

1731 if obj._v_file is not npobj._v_file:

1732 # Special case for copying file1:/ --> file2:/path

1733 self.root._f_copy_children(npobj, overwrite=overwrite,

1734 recursive=recursive, **kwargs)

1735 return npobj

1736 else:

1737 raise OSError(

1738 "You cannot copy a root group over the same file")

1739 return obj._f_copy(newparent, newname,

1740 overwrite, recursive, createparents, **kwargs)

1741

1742 def remove_node(self, where, name=None, recursive=False):

1743 """Remove the object node *name* under *where* location.

1744

1745 Parameters

1746 ----------

1747 where, name

1748 These arguments work as in

1749 :meth:`File.get_node`, referencing the node to be acted upon.

1750 recursive : bool

1751 If not supplied or false, the node will be removed

1752 only if it has no children; if it does, a

1753 NodeError will be raised. If supplied

1754 with a true value, the node and all its descendants will be

1755 completely removed.

1756

1757 """

1758

1759 obj = self.get_node(where, name=name)

1760 obj._f_remove(recursive)

1761

1762 def get_node_attr(self, where, attrname, name=None):

1763 """Get a PyTables attribute from the given node.

1764

1765 Parameters

1766 ----------

1767 where, name

1768 These arguments work as in :meth:`File.get_node`, referencing the

1769 node to be acted upon.

1770 attrname

1771 The name of the attribute to retrieve. If the named

1772 attribute does not exist, an AttributeError is raised.

1773

1774 """

1775

1776 obj = self.get_node(where, name=name)

1777 return obj._f_getattr(attrname)

1778

1779 def set_node_attr(self, where, attrname, attrvalue, name=None):

1780 """Set a PyTables attribute for the given node.

1781

1782 Parameters

1783 ----------

1784 where, name

1785 These arguments work as in

1786 :meth:`File.get_node`, referencing the node to be acted upon.

1787 attrname

1788 The name of the attribute to set.

1789 attrvalue

1790 The value of the attribute to set. Any kind of Python

1791 object (like strings, ints, floats, lists, tuples, dicts,

1792 small NumPy objects ...) can be stored as an attribute.

1793 However, if necessary, pickle is automatically used so as

1794 to serialize objects that you might want to save.

1795 See the :class:`AttributeSet` class for details.

1796

1797 Notes

1798 -----

1799 If the node already has a large number of attributes, a

1800 PerformanceWarning is issued.

1801

1802 """

1803

1804 obj = self.get_node(where, name=name)

1805 obj._f_setattr(attrname, attrvalue)

1806

1807 def del_node_attr(self, where, attrname, name=None):

1808 """Delete a PyTables attribute from the given node.

1809

1810 Parameters

1811 ----------

1812 where, name

1813 These arguments work as in :meth:`File.get_node`, referencing the

1814 node to be acted upon.

1815 attrname

1816 The name of the attribute to delete. If the named

1817 attribute does not exist, an AttributeError is raised.

1818

1819 """

1820

1821 obj = self.get_node(where, name=name)

1822 obj._f_delattr(attrname)

1823

1824 def copy_node_attrs(self, where, dstnode, name=None):

1825 """Copy PyTables attributes from one node to another.

1826

1827 Parameters

1828 ----------

1829 where, name

1830 These arguments work as in :meth:`File.get_node`, referencing the

1831 node to be acted upon.

1832 dstnode

1833 The destination node where the attributes will be copied to. It can

1834 be a path string or a Node instance (see :ref:`NodeClassDescr`).

1835

1836 """

1837

1838 srcobject = self.get_node(where, name=name)

1839 dstobject = self.get_node(dstnode)

1840 srcobject._v_attrs._f_copy(dstobject)

1841

1842 def copy_children(self, srcgroup, dstgroup,

1843 overwrite=False, recursive=False,

1844 createparents=False, **kwargs):

1845 """Copy the children of a group into another group.

1846

1847 Parameters

1848 ----------

1849 srcgroup : str

1850 The group to copy from.

1851 dstgroup : str

1852 The destination group.

1853 overwrite : bool, optional

1854 If True, the destination group will be overwritten if it already

1855 exists. Defaults to False.

1856 recursive : bool, optional

1857 If True, all descendant nodes of srcgroup are recursively copied.

1858 Defaults to False.

1859 createparents : bool, optional

1860 If True, any necessary parents of dstgroup will be created.

1861 Defaults to False.

1862 kwargs : dict

1863 Additional keyword arguments can be used to customize the copying

1864 process. See the documentation of :meth:`Group._f_copy_children`

1865 for a description of those arguments.

1866

1867 """

1868

1869 srcgroup = self.get_node(srcgroup) # Does the source node exist?

1870 self._check_group(srcgroup) # Is it a group?

1871

1872 srcgroup._f_copy_children(

1873 dstgroup, overwrite, recursive, createparents, **kwargs)

1874

1875 def copy_file(self, dstfilename, overwrite=False, **kwargs):

1876 """Copy the contents of this file to dstfilename.

1877

1878 Parameters

1879 ----------

1880 dstfilename : str

1881 A path string indicating the name of the destination file. If

1882 it already exists, the copy will fail with an IOError, unless

1883 the overwrite argument is true.

1884 overwrite : bool, optional

1885 If true, the destination file will be overwritten if it already

1886 exists. In this case, the destination file must be closed, or

1887 errors will occur. Defaults to False.

1888 kwargs

1889 Additional keyword arguments discussed below.

1890

1891 Notes

1892 -----

1893 Additional keyword arguments may be passed to customize the

1894 copying process. For instance, title and filters may be changed,

1895 user attributes may be or may not be copied, data may be

1896 sub-sampled, stats may be collected, etc. Arguments unknown to

1897 nodes are simply ignored. Check the documentation for copying

1898 operations of nodes to see which options they support.

1899

1900 In addition, it recognizes the names of parameters present in

1901 :file:`tables/parameters.py` as additional keyword arguments.

1902 See :ref:`parameter_files` for a detailed info on the supported

1903 parameters.

1904

1905 Copying a file usually has the beneficial side effect of

1906 creating a more compact and cleaner version of the original

1907 file.

1908

1909 """

1910

1911 self._check_open()

1912

1913 # Check that we are not treading our own shoes

1914 if Path(self.filename).resolve() == Path(dstfilename).resolve():

1915 raise OSError("You cannot copy a file over itself")

1916

1917 # Compute default arguments.

1918 # These are *not* passed on.

1919 filters = kwargs.pop('filters', None)

1920 if filters is None:

1921 # By checking the HDF5 attribute, we avoid setting filters

1922 # in the destination file if not explicitly set in the

1923 # source file. Just by assigning ``self.filters`` we would

1924 # not be able to tell.

1925 filters = getattr(self.root._v_attrs, 'FILTERS', None)

1926 copyuserattrs = kwargs.get('copyuserattrs', True)

1927 title = kwargs.pop('title', self.title)

1928

1929 if Path(dstfilename).is_file() and not overwrite:

1930 raise OSError(

1931 f"file ``{dstfilename}`` already exists; you may want to "

1932 f"use the ``overwrite`` argument"

1933 )

1934

1935 # Create destination file, overwriting it.

1936 dstfileh = open_file(

1937 dstfilename, mode="w", title=title, filters=filters, **kwargs)

1938

1939 try:

1940 # Maybe copy the user attributes of the root group.

1941 if copyuserattrs:

1942 self.root._v_attrs._f_copy(dstfileh.root)

1943

1944 # Copy the rest of the hierarchy.

1945 self.root._f_copy_children(dstfileh.root, recursive=True, **kwargs)

1946 finally:

1947 dstfileh.close()

1948

1949 def list_nodes(self, where, classname=None):

1950 """Return a *list* with children nodes hanging from where.

1951

1952 This is a list-returning version of :meth:`File.iter_nodes`.

1953

1954 """

1955

1956 group = self.get_node(where) # Does the parent exist?

1957 self._check_group(group) # Is it a group?

1958

1959 return group._f_list_nodes(classname)

1960

1961 def iter_nodes(self, where, classname=None):

1962 """Iterate over children nodes hanging from where.

1963

1964 Parameters

1965 ----------

1966 where

1967 This argument works as in :meth:`File.get_node`, referencing the

1968 node to be acted upon.

1969 classname

1970 If the name of a class derived from

1971 Node (see :ref:`NodeClassDescr`) is supplied, only instances of

1972 that class (or subclasses of it) will be returned.

1973

1974 Notes

1975 -----

1976 The returned nodes are alphanumerically sorted by their name.

1977 This is an iterator version of :meth:`File.list_nodes`.

1978

1979 """

1980

1981 group = self.get_node(where) # Does the parent exist?

1982 self._check_group(group) # Is it a group?

1983

1984 return group._f_iter_nodes(classname)

1985

1986 def __contains__(self, path):

1987 """Is there a node with that path?

1988

1989 Returns True if the file has a node with the given path (a

1990 string), False otherwise.

1991

1992 """

1993

1994 try:

1995 self.get_node(path)

1996 except NoSuchNodeError:

1997 return False

1998 else:

1999 return True

2000

2001 def __iter__(self):

2002 """Recursively iterate over the nodes in the tree.

2003

2004 This is equivalent to calling :meth:`File.walk_nodes` with no

2005 arguments.

2006

2007 Examples

2008 --------

2009

2010 ::

2011

2012 # Recursively list all the nodes in the object tree.

2013 h5file = tables.open_file('vlarray1.h5')

2014 print("All nodes in the object tree:")

2015 for node in h5file:

2016 print(node)

2017

2018 """

2019

2020 return self.walk_nodes('/')

2021

2022 def walk_nodes(self, where="/", classname=None):

2023 """Recursively iterate over nodes hanging from where.

2024

2025 Parameters

2026 ----------

2027 where : str or Group, optional

2028 If supplied, the iteration starts from (and includes)

2029 this group. It can be a path string or a

2030 Group instance (see :ref:`GroupClassDescr`).

2031 classname

2032 If the name of a class derived from

2033 Node (see :ref:`GroupClassDescr`) is supplied, only instances of

2034 that class (or subclasses of it) will be returned.

2035

2036 Notes

2037 -----

2038 This version iterates over the leaves in the same group in order

2039 to avoid having a list referencing to them and thus, preventing

2040 the LRU cache to remove them after their use.

2041

2042 Examples

2043 --------

2044

2045 ::

2046

2047 # Recursively print all the nodes hanging from '/detector'.

2048 print("Nodes hanging from group '/detector':")

2049 for node in h5file.walk_nodes('/detector', classname='EArray'):

2050 print(node)

2051

2052 """

2053

2054 class_ = get_class_by_name(classname)

2055

2056 if class_ is Group: # only groups

2057 yield from self.walk_groups(where)

2058 elif class_ is Node: # all nodes

2059 yield self.get_node(where)

2060 for group in self.walk_groups(where):

2061 yield from self.iter_nodes(group)

2062 else: # only nodes of the named type

2063 for group in self.walk_groups(where):

2064 yield from self.iter_nodes(group, classname)

2065

2066 def walk_groups(self, where="/"):

2067 """Recursively iterate over groups (not leaves) hanging from where.

2068

2069 The where group itself is listed first (preorder), then each of its

2070 child groups (following an alphanumerical order) is also traversed,

2071 following the same procedure. If where is not supplied, the root

2072 group is used.

2073

2074 The where argument can be a path string

2075 or a Group instance (see :ref:`GroupClassDescr`).

2076

2077 """

2078

2079 group = self.get_node(where) # Does the parent exist?

2080 self._check_group(group) # Is it a group?

2081 return group._f_walk_groups()

2082

2083 def _check_open(self):

2084 """Check the state of the file.

2085

2086 If the file is closed, a `ClosedFileError` is raised.

2087

2088 """

2089

2090 if not self.isopen:

2091 raise ClosedFileError("the file object is closed")

2092

2093 def _iswritable(self):

2094 """Is this file writable?"""

2095

2096 return self.mode in ('w', 'a', 'r+')

2097

2098 def _check_writable(self):

2099 """Check whether the file is writable.

2100

2101 If the file is not writable, a `FileModeError` is raised.

2102

2103 """

2104

2105 if not self._iswritable():

2106 raise FileModeError("the file is not writable")

2107

2108 def _check_group(self, node):

2109 # `node` must already be a node.

2110 if not isinstance(node, Group):

2111 raise TypeError(f"node ``{node._v_pathname}`` is not a group")

2112

2113 def is_undo_enabled(self):

2114 """Is the Undo/Redo mechanism enabled?

2115

2116 Returns True if the Undo/Redo mechanism has been enabled for

2117 this file, False otherwise. Please note that this mechanism is

2118 persistent, so a newly opened PyTables file may already have

2119 Undo/Redo support enabled.

2120

2121 """

2122

2123 self._check_open()

2124 return self._undoEnabled

2125

2126 def _check_undo_enabled(self):

2127 if not self._undoEnabled:

2128 raise UndoRedoError("Undo/Redo feature is currently disabled!")

2129

2130 def _create_transaction_group(self):

2131 tgroup = TransactionGroupG(

2132 self.root, _trans_group_name,

2133 "Transaction information container", new=True)

2134 # The format of the transaction container.

2135 tgroup._v_attrs._g__setattr('FORMATVERSION', _trans_version)

2136 return tgroup

2137

2138 def _create_transaction(self, troot, tid):

2139 return TransactionG(

2140 troot, _trans_name % tid,

2141 "Transaction number %d" % tid, new=True)

2142

2143 def _create_mark(self, trans, mid):

2144 return MarkG(

2145 trans, _markName % mid,

2146 "Mark number %d" % mid, new=True)

2147

2148 def enable_undo(self, filters=Filters(complevel=1)):

2149 """Enable the Undo/Redo mechanism.

2150

2151 This operation prepares the database for undoing and redoing

2152 modifications in the node hierarchy. This

2153 allows :meth:`File.mark`, :meth:`File.undo`, :meth:`File.redo` and

2154 other methods to be called.

2155

2156 The filters argument, when specified,

2157 must be an instance of class Filters (see :ref:`FiltersClassDescr`) and

2158 is meant for setting the compression values for the action log. The

2159 default is having compression enabled, as the gains in terms of

2160 space can be considerable. You may want to disable compression if

2161 you want maximum speed for Undo/Redo operations.

2162

2163 Calling this method when the Undo/Redo mechanism is already

2164 enabled raises an UndoRedoError.

2165

2166 """

2167

2168 maxundo = self.params['MAX_UNDO_PATH_LENGTH']

2169

2170 class ActionLog(NotLoggedMixin, Table):

2171 pass

2172

2173 class ActionLogDesc(IsDescription):

2174 opcode = UInt8Col(pos=0)

2175 arg1 = StringCol(maxundo, pos=1, dflt=b"")

2176 arg2 = StringCol(maxundo, pos=2, dflt=b"")

2177

2178 self._check_open()

2179

2180 # Enabling several times is not allowed to avoid the user having

2181 # the illusion that a new implicit mark has been created

2182 # when calling enable_undo for the second time.

2183

2184 if self.is_undo_enabled():

2185 raise UndoRedoError("Undo/Redo feature is already enabled!")

2186

2187 self._markers = {}

2188 self._seqmarkers = []

2189 self._nmarks = 0

2190 self._curtransaction = 0

2191 self._curmark = -1 # No marks yet

2192

2193 # Get the Group for keeping user actions

2194 try:

2195 tgroup = self.get_node(_trans_group_path)

2196 except NodeError:

2197 # The file is going to be changed.

2198 self._check_writable()

2199

2200 # A transaction log group does not exist. Create it

2201 tgroup = self._create_transaction_group()

2202

2203 # Create a transaction.

2204 self._trans = self._create_transaction(

2205 tgroup, self._curtransaction)

2206

2207 # Create an action log

2208 self._actionlog = ActionLog(

2209 tgroup, _action_log_name, ActionLogDesc, "Action log",

2210 filters=filters)

2211

2212 # Create an implicit mark

2213 self._actionlog.append([(_op_to_code["MARK"], str(0), '')])

2214 self._nmarks += 1

2215 self._seqmarkers.append(0) # current action is 0

2216

2217 # Create a group for mark 0

2218 self._create_mark(self._trans, 0)

2219 # Initialize the marker pointer

2220 self._curmark = int(self._nmarks - 1)

2221 # Initialize the action pointer

2222 self._curaction = self._actionlog.nrows - 1

2223 else:

2224 # The group seems to exist already

2225 # Get the default transaction

2226 self._trans = tgroup._f_get_child(

2227 _trans_name % self._curtransaction)

2228 # Open the action log and go to the end of it

2229 self._actionlog = tgroup.actionlog

2230 for row in self._actionlog:

2231 if row["opcode"] == _op_to_code["MARK"]:

2232 name = row["arg2"].decode('utf-8')

2233 self._markers[name] = self._nmarks

2234 self._seqmarkers.append(row.nrow)

2235 self._nmarks += 1

2236 # Get the current mark and current action

2237 self._curmark = int(self._actionlog.attrs.CURMARK)

2238 self._curaction = self._actionlog.attrs.CURACTION

2239

2240 # The Undo/Redo mechanism has been enabled.

2241 self._undoEnabled = True

2242

2243 def disable_undo(self):

2244 """Disable the Undo/Redo mechanism.

2245

2246 Disabling the Undo/Redo mechanism leaves the database in the

2247 current state and forgets past and future database states. This

2248 makes :meth:`File.mark`, :meth:`File.undo`, :meth:`File.redo` and other

2249 methods fail with an UndoRedoError.

2250

2251 Calling this method when the Undo/Redo mechanism is already

2252 disabled raises an UndoRedoError.

2253

2254 """

2255

2256 self._check_open()

2257

2258 if not self.is_undo_enabled():

2259 raise UndoRedoError("Undo/Redo feature is already disabled!")

2260

2261 # The file is going to be changed.

2262 self._check_writable()

2263

2264 del self._markers

2265 del self._seqmarkers

2266 del self._curmark

2267 del self._curaction

2268 del self._curtransaction

2269 del self._nmarks

2270 del self._actionlog

2271 # Recursively delete the transaction group

2272 tnode = self.get_node(_trans_group_path)

2273 tnode._g_remove(recursive=1)

2274

2275 # The Undo/Redo mechanism has been disabled.

2276 self._undoEnabled = False

2277

2278 def mark(self, name=None):

2279 """Mark the state of the database.

2280

2281 Creates a mark for the current state of the database. A unique (and

2282 immutable) identifier for the mark is returned. An optional name (a

2283 string) can be assigned to the mark. Both the identifier of a mark and

2284 its name can be used in :meth:`File.undo` and :meth:`File.redo`

2285 operations. When the name has already been used for another mark,

2286 an UndoRedoError is raised.

2287

2288 This method can only be called when the Undo/Redo mechanism has been

2289 enabled. Otherwise, an UndoRedoError is raised.

2290

2291 """

2292

2293 self._check_open()

2294 self._check_undo_enabled()

2295

2296 if name is None:

2297 name = ''

2298 else:

2299 if not isinstance(name, str):

2300 raise TypeError("Only strings are allowed as mark names. "

2301 "You passed object: '%s'" % name)

2302 if name in self._markers:

2303 raise UndoRedoError("Name '%s' is already used as a marker "

2304 "name. Try another one." % name)

2305

2306 # The file is going to be changed.

2307 self._check_writable()

2308

2309 self._markers[name] = self._curmark + 1

2310

2311 # Create an explicit mark

2312 # Insert the mark in the action log

2313 self._log("MARK", str(self._curmark + 1), name)

2314 self._curmark += 1

2315 self._nmarks = self._curmark + 1

2316 self._seqmarkers.append(self._curaction)

2317 # Create a group for the current mark

2318 self._create_mark(self._trans, self._curmark)

2319 return self._curmark

2320

2321 def _log(self, action, *args):

2322 """Log an action.

2323

2324 The `action` must be an all-uppercase string identifying it.

2325 Arguments must also be strings.

2326

2327 This method should be called once the action has been completed.

2328

2329 This method can only be called when the Undo/Redo mechanism has

2330 been enabled. Otherwise, an `UndoRedoError` is raised.

2331

2332 """

2333

2334 assert self.is_undo_enabled()

2335

2336 maxundo = self.params['MAX_UNDO_PATH_LENGTH']

2337 # Check whether we are at the end of the action log or not

2338 if self._curaction != self._actionlog.nrows - 1:

2339 # We are not, so delete the trailing actions

2340 self._actionlog.remove_rows(self._curaction + 1,

2341 self._actionlog.nrows)

2342 # Reset the current marker group

2343 mnode = self.get_node(_markPath % (self._curtransaction,

2344 self._curmark))

2345 mnode._g_reset()

2346 # Delete the marker groups with backup objects

2347 for mark in range(self._curmark + 1, self._nmarks):

2348 mnode = self.get_node(_markPath % (self._curtransaction, mark))

2349 mnode._g_remove(recursive=1)

2350 # Update the new number of marks

2351 self._nmarks = self._curmark + 1

2352 self._seqmarkers = self._seqmarkers[:self._nmarks]

2353

2354 if action not in _op_to_code: # INTERNAL

2355 raise UndoRedoError("Action ``%s`` not in ``_op_to_code`` "

2356 "dictionary: %r" % (action, _op_to_code))

2357

2358 arg1 = ""

2359 arg2 = ""

2360 if len(args) <= 1:

2361 arg1 = args[0]

2362 elif len(args) <= 2:

2363 arg1 = args[0]

2364 arg2 = args[1]

2365 else: # INTERNAL

2366 raise UndoRedoError("Too many parameters for action log: "

2367 "%r").with_traceback(args)

2368 if (len(arg1) > maxundo

2369 or len(arg2) > maxundo): # INTERNAL

2370 raise UndoRedoError("Parameter arg1 or arg2 is too long: "

2371 "(%r, %r)" % (arg1, arg2))

2372 # print("Logging-->", (action, arg1, arg2))

2373 self._actionlog.append([(_op_to_code[action],

2374 arg1.encode('utf-8'),

2375 arg2.encode('utf-8'))])

2376 self._curaction += 1

2377

2378 def _get_mark_id(self, mark):

2379 """Get an integer markid from a mark sequence number or name."""

2380

2381 if isinstance(mark, int):

2382 markid = mark

2383 elif isinstance(mark, str):

2384 if mark not in self._markers:

2385 lmarkers = sorted(self._markers)

2386 raise UndoRedoError("The mark that you have specified has not "

2387 "been found in the internal marker list: "

2388 "%r" % lmarkers)

2389 markid = self._markers[mark]

2390 else:

2391 raise TypeError("Parameter mark can only be an integer or a "

2392 "string, and you passed a type <%s>" % type(mark))

2393 # print("markid, self._nmarks:", markid, self._nmarks)

2394 return markid

2395

2396 def _get_final_action(self, markid):

2397 """Get the action to go.

2398

2399 It does not touch the self private attributes

2400

2401 """

2402

2403 if markid > self._nmarks - 1:

2404 # The required mark is beyond the end of the action log

2405 # The final action is the last row

2406 return self._actionlog.nrows

2407 elif markid <= 0:

2408 # The required mark is the first one

2409 # return the first row

2410 return 0

2411

2412 return self._seqmarkers[markid]

2413

2414 def _doundo(self, finalaction, direction):

2415 """Undo/Redo actions up to final action in the specificed direction."""

2416

2417 if direction < 0:

2418 actionlog = \

2419 self._actionlog[finalaction + 1:self._curaction + 1][::-1]

2420 else:

2421 actionlog = self._actionlog[self._curaction:finalaction]

2422

2423 # Uncomment this for debugging

2424# print("curaction, finalaction, direction", \

2425# self._curaction, finalaction, direction)

2426 for i in range(len(actionlog)):

2427 if actionlog['opcode'][i] != _op_to_code["MARK"]:

2428 # undo/redo the action

2429 if direction > 0:

2430 # Uncomment this for debugging

2431 # print("redo-->", \

2432 # _code_to_op[actionlog['opcode'][i]],\

2433 # actionlog['arg1'][i],\

2434 # actionlog['arg2'][i])

2435 undoredo.redo(self,

2436 # _code_to_op[actionlog['opcode'][i]],

2437 # The next is a workaround for python < 2.5

2438 _code_to_op[int(actionlog['opcode'][i])],

2439 actionlog['arg1'][i].decode('utf8'),

2440 actionlog['arg2'][i].decode('utf8'))

2441 else:

2442 # Uncomment this for debugging

2443 # print("undo-->", \

2444 # _code_to_op[actionlog['opcode'][i]],\

2445 # actionlog['arg1'][i].decode('utf8'),\

2446 # actionlog['arg2'][i].decode('utf8'))

2447 undoredo.undo(self,

2448 # _code_to_op[actionlog['opcode'][i]],

2449 # The next is a workaround for python < 2.5

2450 _code_to_op[int(actionlog['opcode'][i])],

2451 actionlog['arg1'][i].decode('utf8'),

2452 actionlog['arg2'][i].decode('utf8'))

2453 else:

2454 if direction > 0:

2455 self._curmark = int(actionlog['arg1'][i])

2456 else:

2457 self._curmark = int(actionlog['arg1'][i]) - 1

2458 # Protection against negative marks

2459 if self._curmark < 0:

2460 self._curmark = 0

2461 self._curaction += direction

2462

2463 def undo(self, mark=None):

2464 """Go to a past state of the database.

2465

2466 Returns the database to the state associated with the specified mark.

2467 Both the identifier of a mark and its name can be used. If the mark is

2468 omitted, the last created mark is used. If there are no past

2469 marks, or the specified mark is not older than the current one, an

2470 UndoRedoError is raised.

2471

2472 This method can only be called when the Undo/Redo mechanism

2473 has been enabled. Otherwise, an UndoRedoError

2474 is raised.

2475

2476 """

2477

2478 self._check_open()

2479 self._check_undo_enabled()

2480

2481# print("(pre)UNDO: (curaction, curmark) = (%s,%s)" % \

2482# (self._curaction, self._curmark))

2483 if mark is None:

2484 markid = self._curmark

2485 # Correction if we are settled on top of a mark

2486 opcode = self._actionlog.cols.opcode

2487 if opcode[self._curaction] == _op_to_code["MARK"]:

2488 markid -= 1

2489 else:

2490 # Get the mark ID number

2491 markid = self._get_mark_id(mark)

2492 # Get the final action ID to go

2493 finalaction = self._get_final_action(markid)

2494 if finalaction > self._curaction:

2495 raise UndoRedoError("Mark ``%s`` is newer than the current mark. "

2496 "Use `redo()` or `goto()` instead." % (mark,))

2497

2498 # The file is going to be changed.

2499 self._check_writable()

2500

2501 # Try to reach this mark by unwinding actions in the log

2502 self._doundo(finalaction - 1, -1)

2503 if self._curaction < self._actionlog.nrows - 1:

2504 self._curaction += 1

2505 self._curmark = int(self._actionlog.cols.arg1[self._curaction])

2506# print("(post)UNDO: (curaction, curmark) = (%s,%s)" % \

2507# (self._curaction, self._curmark))

2508

2509 def redo(self, mark=None):

2510 """Go to a future state of the database.

2511

2512 Returns the database to the state associated with the specified

2513 mark. Both the identifier of a mark and its name can be used.

2514 If the `mark` is omitted, the next created mark is used. If

2515 there are no future marks, or the specified mark is not newer

2516 than the current one, an UndoRedoError is raised.

2517

2518 This method can only be called when the Undo/Redo mechanism has

2519 been enabled. Otherwise, an UndoRedoError is raised.

2520

2521 """

2522

2523 self._check_open()

2524 self._check_undo_enabled()

2525

2526# print("(pre)REDO: (curaction, curmark) = (%s, %s)" % \

2527# (self._curaction, self._curmark))

2528 if self._curaction >= self._actionlog.nrows - 1:

2529 # We are at the end of log, so no action

2530 return

2531

2532 if mark is None:

2533 mark = self._curmark + 1

2534 elif mark == -1:

2535 mark = int(self._nmarks) # Go beyond the mark bounds up to the end

2536 # Get the mark ID number

2537 markid = self._get_mark_id(mark)

2538 finalaction = self._get_final_action(markid)

2539 if finalaction < self._curaction + 1:

2540 raise UndoRedoError("Mark ``%s`` is older than the current mark. "

2541 "Use `redo()` or `goto()` instead." % (mark,))

2542

2543 # The file is going to be changed.

2544 self._check_writable()

2545

2546 # Get the final action ID to go

2547 self._curaction += 1

2548

2549 # Try to reach this mark by redoing the actions in the log

2550 self._doundo(finalaction, 1)

2551 # Increment the current mark only if we are not at the end of marks

2552 if self._curmark < self._nmarks - 1:

2553 self._curmark += 1

2554 if self._curaction > self._actionlog.nrows - 1:

2555 self._curaction = self._actionlog.nrows - 1

2556# print("(post)REDO: (curaction, curmark) = (%s,%s)" % \

2557# (self._curaction, self._curmark))

2558

2559 def goto(self, mark):

2560 """Go to a specific mark of the database.

2561

2562 Returns the database to the state associated with the specified mark.

2563 Both the identifier of a mark and its name can be used.

2564

2565 This method can only be called when the Undo/Redo mechanism has been

2566 enabled. Otherwise, an UndoRedoError is raised.

2567

2568 """

2569

2570 self._check_open()

2571 self._check_undo_enabled()

2572

2573 if mark == -1: # Special case

2574 mark = self._nmarks # Go beyond the mark bounds up to the end

2575 # Get the mark ID number

2576 markid = self._get_mark_id(mark)

2577 finalaction = self._get_final_action(markid)

2578 if finalaction < self._curaction:

2579 self.undo(mark)

2580 else:

2581 self.redo(mark)

2582

2583 def get_current_mark(self):

2584 """Get the identifier of the current mark.

2585

2586 Returns the identifier of the current mark. This can be used

2587 to know the state of a database after an application crash, or to

2588 get the identifier of the initial implicit mark after a call

2589 to :meth:`File.enable_undo`.

2590

2591 This method can only be called when the Undo/Redo mechanism

2592 has been enabled. Otherwise, an UndoRedoError

2593 is raised.

2594

2595 """

2596

2597 self._check_open()

2598 self._check_undo_enabled()

2599 return self._curmark

2600

2601 def _shadow_name(self):

2602 """Compute and return a shadow name.

2603

2604 Computes the current shadow name according to the current

2605 transaction, mark and action. It returns a tuple with the

2606 shadow parent node and the name of the shadow in it.

2607

2608 """

2609

2610 parent = self.get_node(

2611 _shadow_parent % (self._curtransaction, self._curmark))

2612 name = _shadow_name % (self._curaction,)

2613

2614 return (parent, name)

2615

2616 def flush(self):

2617 """Flush all the alive leaves in the object tree."""

2618

2619 self._check_open()

2620

2621 # Flush the cache to disk

2622 self._node_manager.flush_nodes()

2623 self._flush_file(0) # 0 means local scope, 1 global (virtual) scope

2624

2625 def close(self):

2626 """Flush all the alive leaves in object tree and close the file."""

2627

2628 # If the file is already closed, return immediately

2629 if not self.isopen:

2630 return

2631

2632 # If this file has been opened more than once, decrease the

2633 # counter and return

2634 if self._open_count > 1:

2635 self._open_count -= 1

2636 return

2637

2638 filename = self.filename

2639

2640 if self._undoEnabled and self._iswritable():

2641 # Save the current mark and current action

2642 self._actionlog.attrs._g__setattr("CURMARK", self._curmark)

2643 self._actionlog.attrs._g__setattr("CURACTION", self._curaction)

2644

2645 # Close all loaded nodes.

2646 self.root._f_close()

2647

2648 self._node_manager.shutdown()

2649

2650 # Post-conditions

2651 assert len(self._node_manager.cache) == 0, \

2652 ("cached nodes remain after closing: %s"

2653 % list(self._node_manager.cache))

2654

2655 # No other nodes should have been revived.

2656 assert len(self._node_manager.registry) == 0, \

2657 ("alive nodes remain after closing: %s"

2658 % list(self._node_manager.registry))

2659

2660 # Close the file

2661 self._close_file()

2662

2663 # After the objects are disconnected, destroy the

2664 # object dictionary using the brute force ;-)

2665 # This should help to the garbage collector

2666 self.__dict__.clear()

2667

2668 # Set the flag to indicate that the file is closed

2669 self.isopen = 0

2670

2671 # Restore the filename attribute that is used by _FileRegistry

2672 self.filename = filename

2673

2674 # Delete the entry from he registry of opened files

2675 _open_files.remove(self)

2676

2677 def __enter__(self):

2678 """Enter a context and return the same file."""

2679

2680 return self

2681

2682 def __exit__(self, *exc_info):

2683 """Exit a context and close the file."""

2684

2685 self.close()

2686 return False # do not hide exceptions

2687

2688 def __str__(self):

2689 """Return a short string representation of the object tree.

2690

2691 Examples

2692 --------

2693

2694 ::

2695

2696 >>> import tables

2697 >>> f = tables.open_file('tables/tests/Tables_lzo2.h5')

2698 >>> print(f)

2699 tables/tests/Tables_lzo2.h5 (File) 'Table Benchmark'

2700 Last modif.: '...'

2701 Object Tree:

2702 / (RootGroup) 'Table Benchmark'

2703 /tuple0 (Table(100,)lzo(1)) 'This is the table title'

2704 /group0 (Group) ''

2705 /group0/tuple1 (Table(100,)lzo(1)) 'This is the table title'

2706 /group0/group1 (Group) ''

2707 /group0/group1/tuple2 (Table(100,)lzo(1)) 'This is the table title'

2708 /group0/group1/group2 (Group) ''

2709 >>> f.close()

2710

2711 """

2712 if not self.isopen:

2713 return "<closed File>"

2714

2715 # Print all the nodes (Group and Leaf objects) on object tree

2716 try:

2717 date = datetime.datetime.fromtimestamp(

2718 Path(self.filename).stat().st_mtime, datetime.timezone.utc

2719 ).isoformat(timespec='seconds')

2720 except OSError:

2721 # in-memory file

2722 date = "<in-memory file>"

2723 lines = [f'{self.filename} (File) {self.title!r}',

2724 f'Last modif.: {date!r}',

2725 'Object Tree: ']

2726

2727 for group in self.walk_groups("/"):

2728 lines.append(f'{group}')

2729 for kind in self._node_kinds[1:]:

2730 for node in self.list_nodes(group, kind):

2731 lines.append(f'{node}')

2732 return '\n'.join(lines) + '\n'

2733

2734 def __repr__(self):

2735 """Return a detailed string representation of the object tree."""

2736

2737 if not self.isopen:

2738 return "<closed File>"

2739

2740 # Print all the nodes (Group and Leaf objects) on object tree

2741 lines = [

2742 f'File(filename={self.filename!s}, title={self.title!r}, '

2743 f'mode={self.mode!r}, root_uep={self.root_uep!r}, '

2744 f'filters={self.filters!r})']

2745 for group in self.walk_groups("/"):

2746 lines.append(f'{group}')

2747 for kind in self._node_kinds[1:]:

2748 for node in self.list_nodes(group, kind):

2749 lines.append(f'{node!r}')

2750 return '\n'.join(lines) + '\n'

2751

2752 def _update_node_locations(self, oldpath, newpath):

2753 """Update location information of nodes under `oldpath`.

2754

2755 This only affects *already loaded* nodes.

2756

2757 """

2758

2759 oldprefix = oldpath + '/' # root node can not be renamed, anyway

2760 oldprefix_len = len(oldprefix)

2761

2762 # Update alive and dead descendents.

2763 for cache in [self._node_manager.cache, self._node_manager.registry]:

2764 for nodepath in list(cache):

2765 if nodepath.startswith(oldprefix) and nodepath != oldprefix:

2766 nodesuffix = nodepath[oldprefix_len:]

2767 newnodepath = join_path(newpath, nodesuffix)

2768 newnodeppath = split_path(newnodepath)[0]

2769 descendent_node = self._get_node(nodepath)

2770 descendent_node._g_update_location(newnodeppath)

2771

2772

2773# If a user hits ^C during a run, it is wise to gracefully close the

2774# opened files.

2775atexit.register(_open_files.close_all)