Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/group.py: 22%
267 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:30 +0000
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 Implements support for high-level access to HDF5 groups.
12"""
14from contextlib import contextmanager
15import posixpath as pp
16import numpy
19from .compat import filename_decode, filename_encode
21from .. import h5, h5g, h5i, h5o, h5r, h5t, h5l, h5p
22from . import base
23from .base import HLObject, MutableMappingHDF5, phil, with_phil
24from . import dataset
25from . import datatype
26from .vds import vds_support
29class Group(HLObject, MutableMappingHDF5):
31 """ Represents an HDF5 group.
32 """
34 def __init__(self, bind):
35 """ Create a new Group object by binding to a low-level GroupID.
36 """
37 with phil:
38 if not isinstance(bind, h5g.GroupID):
39 raise ValueError("%s is not a GroupID" % bind)
40 super().__init__(bind)
42 _gcpl_crt_order = h5p.create(h5p.GROUP_CREATE)
43 _gcpl_crt_order.set_link_creation_order(
44 h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
45 _gcpl_crt_order.set_attr_creation_order(
46 h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
48 def create_group(self, name, track_order=None):
49 """ Create and return a new subgroup.
51 Name may be absolute or relative. Fails if the target name already
52 exists.
54 track_order
55 Track dataset/group/attribute creation order under this group
56 if True. If None use global default h5.get_config().track_order.
57 """
58 if track_order is None:
59 track_order = h5.get_config().track_order
61 with phil:
62 name, lcpl = self._e(name, lcpl=True)
63 gcpl = Group._gcpl_crt_order if track_order else None
64 gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
65 return Group(gid)
67 def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
68 """ Create a new HDF5 dataset
70 name
71 Name of the dataset (absolute or relative). Provide None to make
72 an anonymous dataset.
73 shape
74 Dataset shape. Use "()" for scalar datasets. Required if "data"
75 isn't provided.
76 dtype
77 Numpy dtype or string. If omitted, dtype('f') will be used.
78 Required if "data" isn't provided; otherwise, overrides data
79 array's dtype.
80 data
81 Provide data to initialize the dataset. If used, you can omit
82 shape and dtype arguments.
84 Keyword-only arguments:
86 chunks
87 (Tuple or int) Chunk shape, or True to enable auto-chunking. Integers can
88 be used for 1D shape.
90 maxshape
91 (Tuple or int) Make the dataset resizable up to this shape. Use None for
92 axes you want to be unlimited. Integers can be used for 1D shape.
93 compression
94 (String or int) Compression strategy. Legal values are 'gzip',
95 'szip', 'lzf'. If an integer in range(10), this indicates gzip
96 compression level. Otherwise, an integer indicates the number of a
97 dynamically loaded compression filter.
98 compression_opts
99 Compression settings. This is an integer for gzip, 2-tuple for
100 szip, etc. If specifying a dynamically loaded compression filter
101 number, this must be a tuple of values.
102 scaleoffset
103 (Integer) Enable scale/offset filter for (usually) lossy
104 compression of integer or floating-point data. For integer
105 data, the value of scaleoffset is the number of bits to
106 retain (pass 0 to let HDF5 determine the minimum number of
107 bits necessary for lossless compression). For floating point
108 data, scaleoffset is the number of digits after the decimal
109 place to retain; stored values thus have absolute error
110 less than 0.5*10**(-scaleoffset).
111 shuffle
112 (T/F) Enable shuffle filter.
113 fletcher32
114 (T/F) Enable fletcher32 error detection. Not permitted in
115 conjunction with the scale/offset filter.
116 fillvalue
117 (Scalar) Use this value for uninitialized parts of the dataset.
118 track_times
119 (T/F) Enable dataset creation timestamps.
120 track_order
121 (T/F) Track attribute creation order if True. If omitted use
122 global default h5.get_config().track_order.
123 external
124 (Iterable of tuples) Sets the external storage property, thus
125 designating that the dataset will be stored in one or more
126 non-HDF5 files external to the HDF5 file. Adds each tuple
127 of (name, offset, size) to the dataset's list of external files.
128 Each name must be a str, bytes, or os.PathLike; each offset and
129 size, an integer. If only a name is given instead of an iterable
130 of tuples, it is equivalent to [(name, 0, h5py.h5f.UNLIMITED)].
131 efile_prefix
132 (String) External dataset file prefix for dataset access property
133 list. Does not persist in the file.
134 virtual_prefix
135 (String) Virtual dataset file prefix for dataset access property
136 list. Does not persist in the file.
137 allow_unknown_filter
138 (T/F) Do not check that the requested filter is available for use.
139 This should only be used with ``write_direct_chunk``, where the caller
140 compresses the data before handing it to h5py.
141 rdcc_nbytes
142 Total size of the dataset's chunk cache in bytes. The default size
143 is 1024**2 (1 MiB).
144 rdcc_w0
145 The chunk preemption policy for this dataset. This must be
146 between 0 and 1 inclusive and indicates the weighting according to
147 which chunks which have been fully read or written are penalized
148 when determining which chunks to flush from cache. A value of 0
149 means fully read or written chunks are treated no differently than
150 other chunks (the preemption is strictly LRU) while a value of 1
151 means fully read or written chunks are always preempted before
152 other chunks. If your application only reads or writes data once,
153 this can be safely set to 1. Otherwise, this should be set lower
154 depending on how often you re-read or re-write the same data. The
155 default value is 0.75.
156 rdcc_nslots
157 The number of chunk slots in the dataset's chunk cache. Increasing
158 this value reduces the number of cache collisions, but slightly
159 increases the memory used. Due to the hashing strategy, this value
160 should ideally be a prime number. As a rule of thumb, this value
161 should be at least 10 times the number of chunks that can fit in
162 rdcc_nbytes bytes. For maximum performance, this value should be set
163 approximately 100 times that number of chunks. The default value is
164 521.
165 """
166 if 'track_order' not in kwds:
167 kwds['track_order'] = h5.get_config().track_order
169 if 'efile_prefix' in kwds:
170 kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
172 if 'virtual_prefix' in kwds:
173 kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
175 with phil:
176 group = self
177 if name:
178 name = self._e(name)
179 if b'/' in name.lstrip(b'/'):
180 parent_path, name = name.rsplit(b'/', 1)
181 group = self.require_group(parent_path)
183 dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
184 dset = dataset.Dataset(dsid)
185 return dset
187 if vds_support:
188 def create_virtual_dataset(self, name, layout, fillvalue=None):
189 """Create a new virtual dataset in this group.
191 See virtual datasets in the docs for more information.
193 name
194 (str) Name of the new dataset
196 layout
197 (VirtualLayout) Defines the sources for the virtual dataset
199 fillvalue
200 The value to use where there is no data.
202 """
203 with phil:
204 group = self
206 if name:
207 name = self._e(name)
208 if b'/' in name.lstrip(b'/'):
209 parent_path, name = name.rsplit(b'/', 1)
210 group = self.require_group(parent_path)
212 dsid = layout.make_dataset(
213 group, name=name, fillvalue=fillvalue,
214 )
215 dset = dataset.Dataset(dsid)
217 return dset
219 @contextmanager
220 def build_virtual_dataset(
221 self, name, shape, dtype, maxshape=None, fillvalue=None
222 ):
223 """Assemble a virtual dataset in this group.
225 This is used as a context manager::
227 with f.build_virtual_dataset('virt', (10, 1000), np.uint32) as layout:
228 layout[0] = h5py.VirtualSource('foo.h5', 'data', (1000,))
230 name
231 (str) Name of the new dataset
232 shape
233 (tuple) Shape of the dataset
234 dtype
235 A numpy dtype for data read from the virtual dataset
236 maxshape
237 (tuple, optional) Maximum dimensions if the dataset can grow.
238 Use None for unlimited dimensions.
239 fillvalue
240 The value used where no data is available.
241 """
242 from .vds import VirtualLayout
243 layout = VirtualLayout(shape, dtype, maxshape, self.file.filename)
244 yield layout
246 self.create_virtual_dataset(name, layout, fillvalue)
248 def require_dataset(self, name, shape, dtype, exact=False, **kwds):
249 """ Open a dataset, creating it if it doesn't exist.
251 If keyword "exact" is False (default), an existing dataset must have
252 the same shape and a conversion-compatible dtype to be returned. If
253 True, the shape and dtype must match exactly.
255 If keyword "maxshape" is given, the maxshape and dtype must match
256 instead.
258 If any of the keywords "rdcc_nslots", "rdcc_nbytes", or "rdcc_w0" are
259 given, they will be used to configure the dataset's chunk cache.
261 Other dataset keywords (see create_dataset) may be provided, but are
262 only used if a new dataset is to be created.
264 Raises TypeError if an incompatible object already exists, or if the
265 shape, maxshape or dtype don't match according to the above rules.
266 """
267 if 'efile_prefix' in kwds:
268 kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
270 if 'virtual_prefix' in kwds:
271 kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
273 with phil:
274 if name not in self:
275 return self.create_dataset(name, *(shape, dtype), **kwds)
277 if isinstance(shape, int):
278 shape = (shape,)
280 try:
281 dsid = dataset.open_dset(self, self._e(name), **kwds)
282 dset = dataset.Dataset(dsid)
283 except KeyError:
284 dset = self[name]
285 raise TypeError("Incompatible object (%s) already exists" % dset.__class__.__name__)
287 if shape != dset.shape:
288 if "maxshape" not in kwds:
289 raise TypeError("Shapes do not match (existing %s vs new %s)" % (dset.shape, shape))
290 elif kwds["maxshape"] != dset.maxshape:
291 raise TypeError("Max shapes do not match (existing %s vs new %s)" % (dset.maxshape, kwds["maxshape"]))
293 if exact:
294 if dtype != dset.dtype:
295 raise TypeError("Datatypes do not exactly match (existing %s vs new %s)" % (dset.dtype, dtype))
296 elif not numpy.can_cast(dtype, dset.dtype):
297 raise TypeError("Datatypes cannot be safely cast (existing %s vs new %s)" % (dset.dtype, dtype))
299 return dset
301 def create_dataset_like(self, name, other, **kwupdate):
302 """ Create a dataset similar to `other`.
304 name
305 Name of the dataset (absolute or relative). Provide None to make
306 an anonymous dataset.
307 other
308 The dataset which the new dataset should mimic. All properties, such
309 as shape, dtype, chunking, ... will be taken from it, but no data
310 or attributes are being copied.
312 Any dataset keywords (see create_dataset) may be provided, including
313 shape and dtype, in which case the provided values take precedence over
314 those from `other`.
315 """
316 for k in ('shape', 'dtype', 'chunks', 'compression',
317 'compression_opts', 'scaleoffset', 'shuffle', 'fletcher32',
318 'fillvalue'):
319 kwupdate.setdefault(k, getattr(other, k))
320 # TODO: more elegant way to pass these (dcpl to create_dataset?)
321 dcpl = other.id.get_create_plist()
322 kwupdate.setdefault('track_times', dcpl.get_obj_track_times())
323 kwupdate.setdefault('track_order', dcpl.get_attr_creation_order() > 0)
325 # Special case: the maxshape property always exists, but if we pass it
326 # to create_dataset, the new dataset will automatically get chunked
327 # layout. So we copy it only if it is different from shape.
328 if other.maxshape != other.shape:
329 kwupdate.setdefault('maxshape', other.maxshape)
331 return self.create_dataset(name, **kwupdate)
333 def require_group(self, name):
334 # TODO: support kwargs like require_dataset
335 """Return a group, creating it if it doesn't exist.
337 TypeError is raised if something with that name already exists that
338 isn't a group.
339 """
340 with phil:
341 if name not in self:
342 return self.create_group(name)
343 grp = self[name]
344 if not isinstance(grp, Group):
345 raise TypeError("Incompatible object (%s) already exists" % grp.__class__.__name__)
346 return grp
348 @with_phil
349 def __getitem__(self, name):
350 """ Open an object in the file """
352 if isinstance(name, h5r.Reference):
353 oid = h5r.dereference(name, self.id)
354 if oid is None:
355 raise ValueError("Invalid HDF5 object reference")
356 elif isinstance(name, (bytes, str)):
357 oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
358 else:
359 raise TypeError("Accessing a group is done with bytes or str, "
360 "not {}".format(type(name)))
362 otype = h5i.get_type(oid)
363 if otype == h5i.GROUP:
364 return Group(oid)
365 elif otype == h5i.DATASET:
366 return dataset.Dataset(oid, readonly=(self.file.mode == 'r'))
367 elif otype == h5i.DATATYPE:
368 return datatype.Datatype(oid)
369 else:
370 raise TypeError("Unknown object type")
372 def get(self, name, default=None, getclass=False, getlink=False):
373 """ Retrieve an item or other information.
375 "name" given only:
376 Return the item, or "default" if it doesn't exist
378 "getclass" is True:
379 Return the class of object (Group, Dataset, etc.), or "default"
380 if nothing with that name exists
382 "getlink" is True:
383 Return HardLink, SoftLink or ExternalLink instances. Return
384 "default" if nothing with that name exists.
386 "getlink" and "getclass" are True:
387 Return HardLink, SoftLink and ExternalLink classes. Return
388 "default" if nothing with that name exists.
390 Example:
392 >>> cls = group.get('foo', getclass=True)
393 >>> if cls == SoftLink:
394 """
395 # pylint: disable=arguments-differ
397 with phil:
398 if not (getclass or getlink):
399 try:
400 return self[name]
401 except KeyError:
402 return default
404 if name not in self:
405 return default
407 elif getclass and not getlink:
408 typecode = h5o.get_info(self.id, self._e(name), lapl=self._lapl).type
410 try:
411 return {h5o.TYPE_GROUP: Group,
412 h5o.TYPE_DATASET: dataset.Dataset,
413 h5o.TYPE_NAMED_DATATYPE: datatype.Datatype}[typecode]
414 except KeyError:
415 raise TypeError("Unknown object type")
417 elif getlink:
418 typecode = self.id.links.get_info(self._e(name), lapl=self._lapl).type
420 if typecode == h5l.TYPE_SOFT:
421 if getclass:
422 return SoftLink
423 linkbytes = self.id.links.get_val(self._e(name), lapl=self._lapl)
424 return SoftLink(self._d(linkbytes))
426 elif typecode == h5l.TYPE_EXTERNAL:
427 if getclass:
428 return ExternalLink
429 filebytes, linkbytes = self.id.links.get_val(self._e(name), lapl=self._lapl)
430 return ExternalLink(
431 filename_decode(filebytes), self._d(linkbytes)
432 )
434 elif typecode == h5l.TYPE_HARD:
435 return HardLink if getclass else HardLink()
437 else:
438 raise TypeError("Unknown link type")
440 def __setitem__(self, name, obj):
441 """ Add an object to the group. The name must not already be in use.
443 The action taken depends on the type of object assigned:
445 Named HDF5 object (Dataset, Group, Datatype)
446 A hard link is created at "name" which points to the
447 given object.
449 SoftLink or ExternalLink
450 Create the corresponding link.
452 Numpy ndarray
453 The array is converted to a dataset object, with default
454 settings (contiguous storage, etc.).
456 Numpy dtype
457 Commit a copy of the datatype as a named datatype in the file.
459 Anything else
460 Attempt to convert it to an ndarray and store it. Scalar
461 values are stored as scalar datasets. Raise ValueError if we
462 can't understand the resulting array dtype.
463 """
464 with phil:
465 name, lcpl = self._e(name, lcpl=True)
467 if isinstance(obj, HLObject):
468 h5o.link(obj.id, self.id, name, lcpl=lcpl, lapl=self._lapl)
470 elif isinstance(obj, SoftLink):
471 self.id.links.create_soft(name, self._e(obj.path), lcpl=lcpl, lapl=self._lapl)
473 elif isinstance(obj, ExternalLink):
474 fn = filename_encode(obj.filename)
475 self.id.links.create_external(name, fn, self._e(obj.path),
476 lcpl=lcpl, lapl=self._lapl)
478 elif isinstance(obj, numpy.dtype):
479 htype = h5t.py_create(obj, logical=True)
480 htype.commit(self.id, name, lcpl=lcpl)
482 else:
483 ds = self.create_dataset(None, data=obj)
484 h5o.link(ds.id, self.id, name, lcpl=lcpl)
486 @with_phil
487 def __delitem__(self, name):
488 """ Delete (unlink) an item from this group. """
489 self.id.unlink(self._e(name))
491 @with_phil
492 def __len__(self):
493 """ Number of members attached to this group """
494 return self.id.get_num_objs()
496 @with_phil
497 def __iter__(self):
498 """ Iterate over member names """
499 for x in self.id.__iter__():
500 yield self._d(x)
502 @with_phil
503 def __reversed__(self):
504 """ Iterate over member names in reverse order. """
505 for x in self.id.__reversed__():
506 yield self._d(x)
508 @with_phil
509 def __contains__(self, name):
510 """ Test if a member name exists """
511 if hasattr(h5g, "_path_valid"):
512 if not self.id:
513 return False
514 return h5g._path_valid(self.id, self._e(name), self._lapl)
515 return self._e(name) in self.id
517 def copy(self, source, dest, name=None,
518 shallow=False, expand_soft=False, expand_external=False,
519 expand_refs=False, without_attrs=False):
520 """Copy an object or group.
522 The source can be a path, Group, Dataset, or Datatype object. The
523 destination can be either a path or a Group object. The source and
524 destinations need not be in the same file.
526 If the source is a Group object, all objects contained in that group
527 will be copied recursively.
529 When the destination is a Group object, by default the target will
530 be created in that group with its current name (basename of obj.name).
531 You can override that by setting "name" to a string.
533 There are various options which all default to "False":
535 - shallow: copy only immediate members of a group.
537 - expand_soft: expand soft links into new objects.
539 - expand_external: expand external links into new objects.
541 - expand_refs: copy objects that are pointed to by references.
543 - without_attrs: copy object without copying attributes.
545 Example:
547 >>> f = File('myfile.hdf5', 'w')
548 >>> f.create_group("MyGroup")
549 >>> list(f.keys())
550 ['MyGroup']
551 >>> f.copy('MyGroup', 'MyCopy')
552 >>> list(f.keys())
553 ['MyGroup', 'MyCopy']
555 """
556 with phil:
557 if isinstance(source, HLObject):
558 source_path = '.'
559 else:
560 # Interpret source as a path relative to this group
561 source_path = source
562 source = self
564 if isinstance(dest, Group):
565 if name is not None:
566 dest_path = name
567 elif source_path == '.':
568 dest_path = pp.basename(h5i.get_name(source.id))
569 else:
570 # copy source into dest group: dest_name/source_name
571 dest_path = pp.basename(h5i.get_name(source[source_path].id))
573 elif isinstance(dest, HLObject):
574 raise TypeError("Destination must be path or Group object")
575 else:
576 # Interpret destination as a path relative to this group
577 dest_path = dest
578 dest = self
580 flags = 0
581 if shallow:
582 flags |= h5o.COPY_SHALLOW_HIERARCHY_FLAG
583 if expand_soft:
584 flags |= h5o.COPY_EXPAND_SOFT_LINK_FLAG
585 if expand_external:
586 flags |= h5o.COPY_EXPAND_EXT_LINK_FLAG
587 if expand_refs:
588 flags |= h5o.COPY_EXPAND_REFERENCE_FLAG
589 if without_attrs:
590 flags |= h5o.COPY_WITHOUT_ATTR_FLAG
591 if flags:
592 copypl = h5p.create(h5p.OBJECT_COPY)
593 copypl.set_copy_object(flags)
594 else:
595 copypl = None
597 h5o.copy(source.id, self._e(source_path), dest.id, self._e(dest_path),
598 copypl, base.dlcpl)
600 def move(self, source, dest):
601 """ Move a link to a new location in the file.
603 If "source" is a hard link, this effectively renames the object. If
604 "source" is a soft or external link, the link itself is moved, with its
605 value unmodified.
606 """
607 with phil:
608 if source == dest:
609 return
610 self.id.links.move(self._e(source), self.id, self._e(dest),
611 lapl=self._lapl, lcpl=self._lcpl)
613 def visit(self, func):
614 """ Recursively visit all names in this group and subgroups (HDF5 1.8).
616 You supply a callable (function, method or callable object); it
617 will be called exactly once for each link in this group and every
618 group below it. Your callable must conform to the signature:
620 func(<member name>) => <None or return value>
622 Returning None continues iteration, returning anything else stops
623 and immediately returns that value from the visit method. No
624 particular order of iteration within groups is guaranteed.
626 Example:
628 >>> # List the entire contents of the file
629 >>> f = File("foo.hdf5")
630 >>> list_of_names = []
631 >>> f.visit(list_of_names.append)
632 """
633 with phil:
634 def proxy(name):
635 """ Call the function with the text name, not bytes """
636 return func(self._d(name))
637 return h5o.visit(self.id, proxy)
639 def visititems(self, func):
640 """ Recursively visit names and objects in this group (HDF5 1.8).
642 You supply a callable (function, method or callable object); it
643 will be called exactly once for each link in this group and every
644 group below it. Your callable must conform to the signature:
646 func(<member name>, <object>) => <None or return value>
648 Returning None continues iteration, returning anything else stops
649 and immediately returns that value from the visit method. No
650 particular order of iteration within groups is guaranteed.
652 Example:
654 # Get a list of all datasets in the file
655 >>> mylist = []
656 >>> def func(name, obj):
657 ... if isinstance(obj, Dataset):
658 ... mylist.append(name)
659 ...
660 >>> f = File('foo.hdf5')
661 >>> f.visititems(func)
662 """
663 with phil:
664 def proxy(name):
665 """ Use the text name of the object, not bytes """
666 name = self._d(name)
667 return func(name, self[name])
668 return h5o.visit(self.id, proxy)
670 @with_phil
671 def __repr__(self):
672 if not self:
673 r = u"<Closed HDF5 group>"
674 else:
675 namestr = (
676 '"%s"' % self.name
677 ) if self.name is not None else u"(anonymous)"
678 r = '<HDF5 group %s (%d members)>' % (namestr, len(self))
680 return r
683class HardLink:
685 """
686 Represents a hard link in an HDF5 file. Provided only so that
687 Group.get works in a sensible way. Has no other function.
688 """
690 pass
693class SoftLink:
695 """
696 Represents a symbolic ("soft") link in an HDF5 file. The path
697 may be absolute or relative. No checking is performed to ensure
698 that the target actually exists.
699 """
701 @property
702 def path(self):
703 """ Soft link value. Not guaranteed to be a valid path. """
704 return self._path
706 def __init__(self, path):
707 self._path = str(path)
709 def __repr__(self):
710 return '<SoftLink to "%s">' % self.path
713class ExternalLink:
715 """
716 Represents an HDF5 external link. Paths may be absolute or relative.
717 No checking is performed to ensure either the target or file exists.
718 """
720 @property
721 def path(self):
722 """ Soft link path, i.e. the part inside the HDF5 file. """
723 return self._path
725 @property
726 def filename(self):
727 """ Path to the external HDF5 file in the filesystem. """
728 return self._filename
730 def __init__(self, filename, path):
731 self._filename = filename_decode(filename_encode(filename))
732 self._path = path
734 def __repr__(self):
735 return '<ExternalLink to "%s" in file "%s"' % (self.path,
736 self.filename)