Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/vds.py: 20%
116 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 High-level interface for creating HDF5 virtual datasets
12"""
14from copy import deepcopy as copy
15from collections import namedtuple
17import numpy as np
19from .compat import filename_encode
20from .datatype import Datatype
21from .selections import SimpleSelection, select
22from .. import h5d, h5p, h5s, h5t
25class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name',
26 'dset_name', 'src_space'))):
27 '''Defines a region in a virtual dataset mapping to part of a source dataset
28 '''
31vds_support = True
34def _convert_space_for_key(space, key):
35 """
36 Converts the space with the given key. Mainly used to allow unlimited
37 dimensions in virtual space selection.
38 """
39 key = key if isinstance(key, tuple) else (key,)
40 type_code = space.get_select_type()
42 # check for unlimited selections in case where selection is regular
43 # hyperslab, which is the only allowed case for h5s.UNLIMITED to be
44 # in the selection
45 if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab():
46 rank = space.get_simple_extent_ndims()
47 nargs = len(key)
49 idx_offset = 0
50 start, stride, count, block = space.get_regular_hyperslab()
51 # iterate through keys. we ignore numeral indices. if we get a
52 # slice, we check for an h5s.UNLIMITED value as the stop
53 # if we get an ellipsis, we offset index by (rank - nargs)
54 for i, sl in enumerate(key):
55 if isinstance(sl, slice):
56 if sl.stop == h5s.UNLIMITED:
57 counts = list(count)
58 idx = i + idx_offset
59 counts[idx] = h5s.UNLIMITED
60 count = tuple(counts)
61 elif sl is Ellipsis:
62 idx_offset = rank - nargs
64 space.select_hyperslab(start, count, stride, block)
67class VirtualSource:
68 """Source definition for virtual data sets.
70 Instantiate this class to represent an entire source dataset, and then
71 slice it to indicate which regions should be used in the virtual dataset.
73 path_or_dataset
74 The path to a file, or an h5py dataset. If a dataset is given,
75 no other parameters are allowed, as the relevant values are taken from
76 the dataset instead.
77 name
78 The name of the source dataset within the file.
79 shape
80 A tuple giving the shape of the dataset.
81 dtype
82 Numpy dtype or string.
83 maxshape
84 The source dataset is resizable up to this shape. Use None for
85 axes you want to be unlimited.
86 """
87 def __init__(self, path_or_dataset, name=None,
88 shape=None, dtype=None, maxshape=None):
89 from .dataset import Dataset
90 if isinstance(path_or_dataset, Dataset):
91 failed = {k: v
92 for k, v in
93 {'name': name, 'shape': shape,
94 'dtype': dtype, 'maxshape': maxshape}.items()
95 if v is not None}
96 if failed:
97 raise TypeError("If a Dataset is passed as the first argument "
98 "then no other arguments may be passed. You "
99 "passed {failed}".format(failed=failed))
100 ds = path_or_dataset
101 path = ds.file.filename
102 name = ds.name
103 shape = ds.shape
104 dtype = ds.dtype
105 maxshape = ds.maxshape
106 else:
107 path = path_or_dataset
108 if name is None:
109 raise TypeError("The name parameter is required when "
110 "specifying a source by path")
111 if shape is None:
112 raise TypeError("The shape parameter is required when "
113 "specifying a source by path")
114 elif isinstance(shape, int):
115 shape = (shape,)
117 if isinstance(maxshape, int):
118 maxshape = (maxshape,)
120 self.path = path
121 self.name = name
122 self.dtype = dtype
124 if maxshape is None:
125 self.maxshape = shape
126 else:
127 self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix
128 for ix in maxshape])
129 self.sel = SimpleSelection(shape)
130 self._all_selected = True
132 @property
133 def shape(self):
134 return self.sel.array_shape
136 def __getitem__(self, key):
137 if not self._all_selected:
138 raise RuntimeError("VirtualSource objects can only be sliced once.")
139 tmp = copy(self)
140 tmp.sel = select(self.shape, key, dataset=None)
141 _convert_space_for_key(tmp.sel.id, key)
142 tmp._all_selected = False
143 return tmp
145class VirtualLayout:
146 """Object for building a virtual dataset.
148 Instantiate this class to define a virtual dataset, assign to slices of it
149 (using VirtualSource objects), and then pass it to
150 group.create_virtual_dataset() to add the virtual dataset to a file.
152 This class does not allow access to the data; the virtual dataset must
153 be created in a file before it can be used.
155 shape
156 A tuple giving the shape of the dataset.
157 dtype
158 Numpy dtype or string.
159 maxshape
160 The virtual dataset is resizable up to this shape. Use None for
161 axes you want to be unlimited.
162 filename
163 The name of the destination file, if known in advance. Mappings from
164 data in the same file will be stored with filename '.', allowing the
165 file to be renamed later.
166 """
167 def __init__(self, shape, dtype, maxshape=None, filename=None):
168 self.shape = (shape,) if isinstance(shape, int) else shape
169 self.dtype = dtype
170 self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape
171 self._filename = filename
172 self._src_filenames = set()
173 self.dcpl = h5p.create(h5p.DATASET_CREATE)
175 def __setitem__(self, key, source):
176 sel = select(self.shape, key, dataset=None)
177 _convert_space_for_key(sel.id, key)
178 src_filename = self._source_file_name(source.path, self._filename)
180 self.dcpl.set_virtual(
181 sel.id, src_filename, source.name.encode('utf-8'), source.sel.id
182 )
183 if self._filename is None:
184 self._src_filenames.add(src_filename)
186 @staticmethod
187 def _source_file_name(src_filename, dst_filename) -> bytes:
188 src_filename = filename_encode(src_filename)
189 if dst_filename and (src_filename == filename_encode(dst_filename)):
190 # use relative path if the source dataset is in the same
191 # file, in order to keep the virtual dataset valid in case
192 # the file is renamed.
193 return b'.'
194 return filename_encode(src_filename)
196 def _get_dcpl(self, dst_filename):
197 """Get the property list containing virtual dataset mappings
199 If the destination filename wasn't known when the VirtualLayout was
200 created, it is handled here.
201 """
202 dst_filename = filename_encode(dst_filename)
203 if self._filename is not None:
204 # filename was known in advance; check dst_filename matches
205 if dst_filename != filename_encode(self._filename):
206 raise Exception(f"{dst_filename!r} != {self._filename!r}")
207 return self.dcpl
209 # destination file not known in advance
210 if dst_filename in self._src_filenames:
211 # At least 1 source file is the same as the destination file,
212 # but we didn't know this when making the mapping. Copy the mappings
213 # to a new property list, replacing the dest filename with '.'
214 new_dcpl = h5p.create(h5p.DATASET_CREATE)
215 for i in range(self.dcpl.get_virtual_count()):
216 src_filename = self.dcpl.get_virtual_filename(i)
217 new_dcpl.set_virtual(
218 self.dcpl.get_virtual_vspace(i),
219 self._source_file_name(src_filename, dst_filename),
220 self.dcpl.get_virtual_dsetname(i).encode('utf-8'),
221 self.dcpl.get_virtual_srcspace(i),
222 )
223 return new_dcpl
224 else:
225 return self.dcpl # Mappings are all from other files
227 def make_dataset(self, parent, name, fillvalue=None):
228 """ Return a new low-level dataset identifier for a virtual dataset """
229 dcpl = self._get_dcpl(parent.file.filename)
231 if fillvalue is not None:
232 dcpl.set_fill_value(np.array([fillvalue]))
234 maxshape = self.maxshape
235 if maxshape is not None:
236 maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
238 virt_dspace = h5s.create_simple(self.shape, maxshape)
240 if isinstance(self.dtype, Datatype):
241 # Named types are used as-is
242 tid = self.dtype.id
243 else:
244 dtype = np.dtype(self.dtype)
245 tid = h5t.py_create(dtype, logical=1)
247 return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace,
248 dcpl=dcpl)