Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/vds.py: 23%
116 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 06:32 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 06:32 +0000
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 High-level interface for creating HDF5 virtual datasets
12"""
14from copy import deepcopy as copy
15from collections import namedtuple
17import numpy as np
19from .compat import filename_encode
20from .datatype import Datatype
21from .selections import SimpleSelection, select
22from .. import h5d, h5p, h5s, h5t, h5
23from .. import version
26class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name',
27 'dset_name', 'src_space'))):
28 '''Defines a region in a virtual dataset mapping to part of a source dataset
29 '''
32vds_support = False
33hdf5_version = version.hdf5_version_tuple[0:3]
35if hdf5_version >= h5.get_config().vds_min_hdf5_version:
36 vds_support = True
39def _convert_space_for_key(space, key):
40 """
41 Converts the space with the given key. Mainly used to allow unlimited
42 dimensions in virtual space selection.
43 """
44 key = key if isinstance(key, tuple) else (key,)
45 type_code = space.get_select_type()
47 # check for unlimited selections in case where selection is regular
48 # hyperslab, which is the only allowed case for h5s.UNLIMITED to be
49 # in the selection
50 if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab():
51 rank = space.get_simple_extent_ndims()
52 nargs = len(key)
54 idx_offset = 0
55 start, stride, count, block = space.get_regular_hyperslab()
56 # iterate through keys. we ignore numeral indices. if we get a
57 # slice, we check for an h5s.UNLIMITED value as the stop
58 # if we get an ellipsis, we offset index by (rank - nargs)
59 for i, sl in enumerate(key):
60 if isinstance(sl, slice):
61 if sl.stop == h5s.UNLIMITED:
62 counts = list(count)
63 idx = i + idx_offset
64 counts[idx] = h5s.UNLIMITED
65 count = tuple(counts)
66 elif sl is Ellipsis:
67 idx_offset = rank - nargs
69 space.select_hyperslab(start, count, stride, block)
72class VirtualSource:
73 """Source definition for virtual data sets.
75 Instantiate this class to represent an entire source dataset, and then
76 slice it to indicate which regions should be used in the virtual dataset.
78 path_or_dataset
79 The path to a file, or an h5py dataset. If a dataset is given,
80 no other parameters are allowed, as the relevant values are taken from
81 the dataset instead.
82 name
83 The name of the source dataset within the file.
84 shape
85 A tuple giving the shape of the dataset.
86 dtype
87 Numpy dtype or string.
88 maxshape
89 The source dataset is resizable up to this shape. Use None for
90 axes you want to be unlimited.
91 """
92 def __init__(self, path_or_dataset, name=None,
93 shape=None, dtype=None, maxshape=None):
94 from .dataset import Dataset
95 if isinstance(path_or_dataset, Dataset):
96 failed = {k: v
97 for k, v in
98 {'name': name, 'shape': shape,
99 'dtype': dtype, 'maxshape': maxshape}.items()
100 if v is not None}
101 if failed:
102 raise TypeError("If a Dataset is passed as the first argument "
103 "then no other arguments may be passed. You "
104 "passed {failed}".format(failed=failed))
105 ds = path_or_dataset
106 path = ds.file.filename
107 name = ds.name
108 shape = ds.shape
109 dtype = ds.dtype
110 maxshape = ds.maxshape
111 else:
112 path = path_or_dataset
113 if name is None:
114 raise TypeError("The name parameter is required when "
115 "specifying a source by path")
116 if shape is None:
117 raise TypeError("The shape parameter is required when "
118 "specifying a source by path")
119 elif isinstance(shape, int):
120 shape = (shape,)
122 if isinstance(maxshape, int):
123 maxshape = (maxshape,)
125 self.path = path
126 self.name = name
127 self.dtype = dtype
129 if maxshape is None:
130 self.maxshape = shape
131 else:
132 self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix
133 for ix in maxshape])
134 self.sel = SimpleSelection(shape)
136 @property
137 def shape(self):
138 return self.sel.array_shape
140 def __getitem__(self, key):
141 tmp = copy(self)
142 tmp.sel = select(self.shape, key, dataset=None)
143 _convert_space_for_key(tmp.sel.id, key)
144 return tmp
146class VirtualLayout:
147 """Object for building a virtual dataset.
149 Instantiate this class to define a virtual dataset, assign to slices of it
150 (using VirtualSource objects), and then pass it to
151 group.create_virtual_dataset() to add the virtual dataset to a file.
153 This class does not allow access to the data; the virtual dataset must
154 be created in a file before it can be used.
156 shape
157 A tuple giving the shape of the dataset.
158 dtype
159 Numpy dtype or string.
160 maxshape
161 The virtual dataset is resizable up to this shape. Use None for
162 axes you want to be unlimited.
163 filename
164 The name of the destination file, if known in advance. Mappings from
165 data in the same file will be stored with filename '.', allowing the
166 file to be renamed later.
167 """
168 def __init__(self, shape, dtype, maxshape=None, filename=None):
169 self.shape = (shape,) if isinstance(shape, int) else shape
170 self.dtype = dtype
171 self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape
172 self._filename = filename
173 self._src_filenames = set()
174 self.dcpl = h5p.create(h5p.DATASET_CREATE)
176 def __setitem__(self, key, source):
177 sel = select(self.shape, key, dataset=None)
178 _convert_space_for_key(sel.id, key)
179 src_filename = self._source_file_name(source.path, self._filename)
181 self.dcpl.set_virtual(
182 sel.id, src_filename, source.name.encode('utf-8'), source.sel.id
183 )
184 if self._filename is None:
185 self._src_filenames.add(src_filename)
187 @staticmethod
188 def _source_file_name(src_filename, dst_filename) -> bytes:
189 src_filename = filename_encode(src_filename)
190 if dst_filename and (src_filename == filename_encode(dst_filename)):
191 # use relative path if the source dataset is in the same
192 # file, in order to keep the virtual dataset valid in case
193 # the file is renamed.
194 return b'.'
195 return filename_encode(src_filename)
197 def _get_dcpl(self, dst_filename):
198 """Get the property list containing virtual dataset mappings
200 If the destination filename wasn't known when the VirtualLayout was
201 created, it is handled here.
202 """
203 dst_filename = filename_encode(dst_filename)
204 if self._filename is not None:
205 # filename was known in advance; check dst_filename matches
206 if dst_filename != filename_encode(self._filename):
207 raise Exception(f"{dst_filename!r} != {self._filename!r}")
208 return self.dcpl
210 # destination file not known in advance
211 if dst_filename in self._src_filenames:
212 # At least 1 source file is the same as the destination file,
213 # but we didn't know this when making the mapping. Copy the mappings
214 # to a new property list, replacing the dest filename with '.'
215 new_dcpl = h5p.create(h5p.DATASET_CREATE)
216 for i in range(self.dcpl.get_virtual_count()):
217 src_filename = self.dcpl.get_virtual_filename(i)
218 new_dcpl.set_virtual(
219 self.dcpl.get_virtual_vspace(i),
220 self._source_file_name(src_filename, dst_filename),
221 self.dcpl.get_virtual_dsetname(i).encode('utf-8'),
222 self.dcpl.get_virtual_srcspace(i),
223 )
224 return new_dcpl
225 else:
226 return self.dcpl # Mappings are all from other files
228 def make_dataset(self, parent, name, fillvalue=None):
229 """ Return a new low-level dataset identifier for a virtual dataset """
230 dcpl = self._get_dcpl(parent.file.filename)
232 if fillvalue is not None:
233 dcpl.set_fill_value(np.array([fillvalue]))
235 maxshape = self.maxshape
236 if maxshape is not None:
237 maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
239 virt_dspace = h5s.create_simple(self.shape, maxshape)
241 if isinstance(self.dtype, Datatype):
242 # Named types are used as-is
243 tid = self.dtype.id
244 else:
245 dtype = np.dtype(self.dtype)
246 tid = h5t.py_create(dtype, logical=1)
248 return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace,
249 dcpl=dcpl)