Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/vds.py: 20%

116 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# This file is part of h5py, a Python interface to the HDF5 library. 

2# 

3# http://www.h5py.org 

4# 

5# Copyright 2008-2013 Andrew Collette and contributors 

6# 

7# License: Standard 3-clause BSD; see "license.txt" for full license terms 

8# and contributor agreement. 

9 

10""" 

11 High-level interface for creating HDF5 virtual datasets 

12""" 

13 

14from copy import deepcopy as copy 

15from collections import namedtuple 

16 

17import numpy as np 

18 

19from .compat import filename_encode 

20from .datatype import Datatype 

21from .selections import SimpleSelection, select 

22from .. import h5d, h5p, h5s, h5t 

23 

24 

25class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name', 

26 'dset_name', 'src_space'))): 

27 '''Defines a region in a virtual dataset mapping to part of a source dataset 

28 ''' 

29 

30 

31vds_support = True 

32 

33 

34def _convert_space_for_key(space, key): 

35 """ 

36 Converts the space with the given key. Mainly used to allow unlimited 

37 dimensions in virtual space selection. 

38 """ 

39 key = key if isinstance(key, tuple) else (key,) 

40 type_code = space.get_select_type() 

41 

42 # check for unlimited selections in case where selection is regular 

43 # hyperslab, which is the only allowed case for h5s.UNLIMITED to be 

44 # in the selection 

45 if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab(): 

46 rank = space.get_simple_extent_ndims() 

47 nargs = len(key) 

48 

49 idx_offset = 0 

50 start, stride, count, block = space.get_regular_hyperslab() 

51 # iterate through keys. we ignore numeral indices. if we get a 

52 # slice, we check for an h5s.UNLIMITED value as the stop 

53 # if we get an ellipsis, we offset index by (rank - nargs) 

54 for i, sl in enumerate(key): 

55 if isinstance(sl, slice): 

56 if sl.stop == h5s.UNLIMITED: 

57 counts = list(count) 

58 idx = i + idx_offset 

59 counts[idx] = h5s.UNLIMITED 

60 count = tuple(counts) 

61 elif sl is Ellipsis: 

62 idx_offset = rank - nargs 

63 

64 space.select_hyperslab(start, count, stride, block) 

65 

66 

67class VirtualSource: 

68 """Source definition for virtual data sets. 

69 

70 Instantiate this class to represent an entire source dataset, and then 

71 slice it to indicate which regions should be used in the virtual dataset. 

72 

73 path_or_dataset 

74 The path to a file, or an h5py dataset. If a dataset is given, 

75 no other parameters are allowed, as the relevant values are taken from 

76 the dataset instead. 

77 name 

78 The name of the source dataset within the file. 

79 shape 

80 A tuple giving the shape of the dataset. 

81 dtype 

82 Numpy dtype or string. 

83 maxshape 

84 The source dataset is resizable up to this shape. Use None for 

85 axes you want to be unlimited. 

86 """ 

87 def __init__(self, path_or_dataset, name=None, 

88 shape=None, dtype=None, maxshape=None): 

89 from .dataset import Dataset 

90 if isinstance(path_or_dataset, Dataset): 

91 failed = {k: v 

92 for k, v in 

93 {'name': name, 'shape': shape, 

94 'dtype': dtype, 'maxshape': maxshape}.items() 

95 if v is not None} 

96 if failed: 

97 raise TypeError("If a Dataset is passed as the first argument " 

98 "then no other arguments may be passed. You " 

99 "passed {failed}".format(failed=failed)) 

100 ds = path_or_dataset 

101 path = ds.file.filename 

102 name = ds.name 

103 shape = ds.shape 

104 dtype = ds.dtype 

105 maxshape = ds.maxshape 

106 else: 

107 path = path_or_dataset 

108 if name is None: 

109 raise TypeError("The name parameter is required when " 

110 "specifying a source by path") 

111 if shape is None: 

112 raise TypeError("The shape parameter is required when " 

113 "specifying a source by path") 

114 elif isinstance(shape, int): 

115 shape = (shape,) 

116 

117 if isinstance(maxshape, int): 

118 maxshape = (maxshape,) 

119 

120 self.path = path 

121 self.name = name 

122 self.dtype = dtype 

123 

124 if maxshape is None: 

125 self.maxshape = shape 

126 else: 

127 self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix 

128 for ix in maxshape]) 

129 self.sel = SimpleSelection(shape) 

130 self._all_selected = True 

131 

132 @property 

133 def shape(self): 

134 return self.sel.array_shape 

135 

136 def __getitem__(self, key): 

137 if not self._all_selected: 

138 raise RuntimeError("VirtualSource objects can only be sliced once.") 

139 tmp = copy(self) 

140 tmp.sel = select(self.shape, key, dataset=None) 

141 _convert_space_for_key(tmp.sel.id, key) 

142 tmp._all_selected = False 

143 return tmp 

144 

145class VirtualLayout: 

146 """Object for building a virtual dataset. 

147 

148 Instantiate this class to define a virtual dataset, assign to slices of it 

149 (using VirtualSource objects), and then pass it to 

150 group.create_virtual_dataset() to add the virtual dataset to a file. 

151 

152 This class does not allow access to the data; the virtual dataset must 

153 be created in a file before it can be used. 

154 

155 shape 

156 A tuple giving the shape of the dataset. 

157 dtype 

158 Numpy dtype or string. 

159 maxshape 

160 The virtual dataset is resizable up to this shape. Use None for 

161 axes you want to be unlimited. 

162 filename 

163 The name of the destination file, if known in advance. Mappings from 

164 data in the same file will be stored with filename '.', allowing the 

165 file to be renamed later. 

166 """ 

167 def __init__(self, shape, dtype, maxshape=None, filename=None): 

168 self.shape = (shape,) if isinstance(shape, int) else shape 

169 self.dtype = dtype 

170 self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape 

171 self._filename = filename 

172 self._src_filenames = set() 

173 self.dcpl = h5p.create(h5p.DATASET_CREATE) 

174 

175 def __setitem__(self, key, source): 

176 sel = select(self.shape, key, dataset=None) 

177 _convert_space_for_key(sel.id, key) 

178 src_filename = self._source_file_name(source.path, self._filename) 

179 

180 self.dcpl.set_virtual( 

181 sel.id, src_filename, source.name.encode('utf-8'), source.sel.id 

182 ) 

183 if self._filename is None: 

184 self._src_filenames.add(src_filename) 

185 

186 @staticmethod 

187 def _source_file_name(src_filename, dst_filename) -> bytes: 

188 src_filename = filename_encode(src_filename) 

189 if dst_filename and (src_filename == filename_encode(dst_filename)): 

190 # use relative path if the source dataset is in the same 

191 # file, in order to keep the virtual dataset valid in case 

192 # the file is renamed. 

193 return b'.' 

194 return filename_encode(src_filename) 

195 

196 def _get_dcpl(self, dst_filename): 

197 """Get the property list containing virtual dataset mappings 

198 

199 If the destination filename wasn't known when the VirtualLayout was 

200 created, it is handled here. 

201 """ 

202 dst_filename = filename_encode(dst_filename) 

203 if self._filename is not None: 

204 # filename was known in advance; check dst_filename matches 

205 if dst_filename != filename_encode(self._filename): 

206 raise Exception(f"{dst_filename!r} != {self._filename!r}") 

207 return self.dcpl 

208 

209 # destination file not known in advance 

210 if dst_filename in self._src_filenames: 

211 # At least 1 source file is the same as the destination file, 

212 # but we didn't know this when making the mapping. Copy the mappings 

213 # to a new property list, replacing the dest filename with '.' 

214 new_dcpl = h5p.create(h5p.DATASET_CREATE) 

215 for i in range(self.dcpl.get_virtual_count()): 

216 src_filename = self.dcpl.get_virtual_filename(i) 

217 new_dcpl.set_virtual( 

218 self.dcpl.get_virtual_vspace(i), 

219 self._source_file_name(src_filename, dst_filename), 

220 self.dcpl.get_virtual_dsetname(i).encode('utf-8'), 

221 self.dcpl.get_virtual_srcspace(i), 

222 ) 

223 return new_dcpl 

224 else: 

225 return self.dcpl # Mappings are all from other files 

226 

227 def make_dataset(self, parent, name, fillvalue=None): 

228 """ Return a new low-level dataset identifier for a virtual dataset """ 

229 dcpl = self._get_dcpl(parent.file.filename) 

230 

231 if fillvalue is not None: 

232 dcpl.set_fill_value(np.array([fillvalue])) 

233 

234 maxshape = self.maxshape 

235 if maxshape is not None: 

236 maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) 

237 

238 virt_dspace = h5s.create_simple(self.shape, maxshape) 

239 

240 if isinstance(self.dtype, Datatype): 

241 # Named types are used as-is 

242 tid = self.dtype.id 

243 else: 

244 dtype = np.dtype(self.dtype) 

245 tid = h5t.py_create(dtype, logical=1) 

246 

247 return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace, 

248 dcpl=dcpl)