Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/vds.py: 23%

116 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-05 06:32 +0000

1# This file is part of h5py, a Python interface to the HDF5 library. 

2# 

3# http://www.h5py.org 

4# 

5# Copyright 2008-2013 Andrew Collette and contributors 

6# 

7# License: Standard 3-clause BSD; see "license.txt" for full license terms 

8# and contributor agreement. 

9 

10""" 

11 High-level interface for creating HDF5 virtual datasets 

12""" 

13 

14from copy import deepcopy as copy 

15from collections import namedtuple 

16 

17import numpy as np 

18 

19from .compat import filename_encode 

20from .datatype import Datatype 

21from .selections import SimpleSelection, select 

22from .. import h5d, h5p, h5s, h5t, h5 

23from .. import version 

24 

25 

26class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name', 

27 'dset_name', 'src_space'))): 

28 '''Defines a region in a virtual dataset mapping to part of a source dataset 

29 ''' 

30 

31 

32vds_support = False 

33hdf5_version = version.hdf5_version_tuple[0:3] 

34 

35if hdf5_version >= h5.get_config().vds_min_hdf5_version: 

36 vds_support = True 

37 

38 

39def _convert_space_for_key(space, key): 

40 """ 

41 Converts the space with the given key. Mainly used to allow unlimited 

42 dimensions in virtual space selection. 

43 """ 

44 key = key if isinstance(key, tuple) else (key,) 

45 type_code = space.get_select_type() 

46 

47 # check for unlimited selections in case where selection is regular 

48 # hyperslab, which is the only allowed case for h5s.UNLIMITED to be 

49 # in the selection 

50 if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab(): 

51 rank = space.get_simple_extent_ndims() 

52 nargs = len(key) 

53 

54 idx_offset = 0 

55 start, stride, count, block = space.get_regular_hyperslab() 

56 # iterate through keys. we ignore numeral indices. if we get a 

57 # slice, we check for an h5s.UNLIMITED value as the stop 

58 # if we get an ellipsis, we offset index by (rank - nargs) 

59 for i, sl in enumerate(key): 

60 if isinstance(sl, slice): 

61 if sl.stop == h5s.UNLIMITED: 

62 counts = list(count) 

63 idx = i + idx_offset 

64 counts[idx] = h5s.UNLIMITED 

65 count = tuple(counts) 

66 elif sl is Ellipsis: 

67 idx_offset = rank - nargs 

68 

69 space.select_hyperslab(start, count, stride, block) 

70 

71 

72class VirtualSource: 

73 """Source definition for virtual data sets. 

74 

75 Instantiate this class to represent an entire source dataset, and then 

76 slice it to indicate which regions should be used in the virtual dataset. 

77 

78 path_or_dataset 

79 The path to a file, or an h5py dataset. If a dataset is given, 

80 no other parameters are allowed, as the relevant values are taken from 

81 the dataset instead. 

82 name 

83 The name of the source dataset within the file. 

84 shape 

85 A tuple giving the shape of the dataset. 

86 dtype 

87 Numpy dtype or string. 

88 maxshape 

89 The source dataset is resizable up to this shape. Use None for 

90 axes you want to be unlimited. 

91 """ 

92 def __init__(self, path_or_dataset, name=None, 

93 shape=None, dtype=None, maxshape=None): 

94 from .dataset import Dataset 

95 if isinstance(path_or_dataset, Dataset): 

96 failed = {k: v 

97 for k, v in 

98 {'name': name, 'shape': shape, 

99 'dtype': dtype, 'maxshape': maxshape}.items() 

100 if v is not None} 

101 if failed: 

102 raise TypeError("If a Dataset is passed as the first argument " 

103 "then no other arguments may be passed. You " 

104 "passed {failed}".format(failed=failed)) 

105 ds = path_or_dataset 

106 path = ds.file.filename 

107 name = ds.name 

108 shape = ds.shape 

109 dtype = ds.dtype 

110 maxshape = ds.maxshape 

111 else: 

112 path = path_or_dataset 

113 if name is None: 

114 raise TypeError("The name parameter is required when " 

115 "specifying a source by path") 

116 if shape is None: 

117 raise TypeError("The shape parameter is required when " 

118 "specifying a source by path") 

119 elif isinstance(shape, int): 

120 shape = (shape,) 

121 

122 if isinstance(maxshape, int): 

123 maxshape = (maxshape,) 

124 

125 self.path = path 

126 self.name = name 

127 self.dtype = dtype 

128 

129 if maxshape is None: 

130 self.maxshape = shape 

131 else: 

132 self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix 

133 for ix in maxshape]) 

134 self.sel = SimpleSelection(shape) 

135 

136 @property 

137 def shape(self): 

138 return self.sel.array_shape 

139 

140 def __getitem__(self, key): 

141 tmp = copy(self) 

142 tmp.sel = select(self.shape, key, dataset=None) 

143 _convert_space_for_key(tmp.sel.id, key) 

144 return tmp 

145 

146class VirtualLayout: 

147 """Object for building a virtual dataset. 

148 

149 Instantiate this class to define a virtual dataset, assign to slices of it 

150 (using VirtualSource objects), and then pass it to 

151 group.create_virtual_dataset() to add the virtual dataset to a file. 

152 

153 This class does not allow access to the data; the virtual dataset must 

154 be created in a file before it can be used. 

155 

156 shape 

157 A tuple giving the shape of the dataset. 

158 dtype 

159 Numpy dtype or string. 

160 maxshape 

161 The virtual dataset is resizable up to this shape. Use None for 

162 axes you want to be unlimited. 

163 filename 

164 The name of the destination file, if known in advance. Mappings from 

165 data in the same file will be stored with filename '.', allowing the 

166 file to be renamed later. 

167 """ 

168 def __init__(self, shape, dtype, maxshape=None, filename=None): 

169 self.shape = (shape,) if isinstance(shape, int) else shape 

170 self.dtype = dtype 

171 self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape 

172 self._filename = filename 

173 self._src_filenames = set() 

174 self.dcpl = h5p.create(h5p.DATASET_CREATE) 

175 

176 def __setitem__(self, key, source): 

177 sel = select(self.shape, key, dataset=None) 

178 _convert_space_for_key(sel.id, key) 

179 src_filename = self._source_file_name(source.path, self._filename) 

180 

181 self.dcpl.set_virtual( 

182 sel.id, src_filename, source.name.encode('utf-8'), source.sel.id 

183 ) 

184 if self._filename is None: 

185 self._src_filenames.add(src_filename) 

186 

187 @staticmethod 

188 def _source_file_name(src_filename, dst_filename) -> bytes: 

189 src_filename = filename_encode(src_filename) 

190 if dst_filename and (src_filename == filename_encode(dst_filename)): 

191 # use relative path if the source dataset is in the same 

192 # file, in order to keep the virtual dataset valid in case 

193 # the file is renamed. 

194 return b'.' 

195 return filename_encode(src_filename) 

196 

197 def _get_dcpl(self, dst_filename): 

198 """Get the property list containing virtual dataset mappings 

199 

200 If the destination filename wasn't known when the VirtualLayout was 

201 created, it is handled here. 

202 """ 

203 dst_filename = filename_encode(dst_filename) 

204 if self._filename is not None: 

205 # filename was known in advance; check dst_filename matches 

206 if dst_filename != filename_encode(self._filename): 

207 raise Exception(f"{dst_filename!r} != {self._filename!r}") 

208 return self.dcpl 

209 

210 # destination file not known in advance 

211 if dst_filename in self._src_filenames: 

212 # At least 1 source file is the same as the destination file, 

213 # but we didn't know this when making the mapping. Copy the mappings 

214 # to a new property list, replacing the dest filename with '.' 

215 new_dcpl = h5p.create(h5p.DATASET_CREATE) 

216 for i in range(self.dcpl.get_virtual_count()): 

217 src_filename = self.dcpl.get_virtual_filename(i) 

218 new_dcpl.set_virtual( 

219 self.dcpl.get_virtual_vspace(i), 

220 self._source_file_name(src_filename, dst_filename), 

221 self.dcpl.get_virtual_dsetname(i).encode('utf-8'), 

222 self.dcpl.get_virtual_srcspace(i), 

223 ) 

224 return new_dcpl 

225 else: 

226 return self.dcpl # Mappings are all from other files 

227 

228 def make_dataset(self, parent, name, fillvalue=None): 

229 """ Return a new low-level dataset identifier for a virtual dataset """ 

230 dcpl = self._get_dcpl(parent.file.filename) 

231 

232 if fillvalue is not None: 

233 dcpl.set_fill_value(np.array([fillvalue])) 

234 

235 maxshape = self.maxshape 

236 if maxshape is not None: 

237 maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape) 

238 

239 virt_dspace = h5s.create_simple(self.shape, maxshape) 

240 

241 if isinstance(self.dtype, Datatype): 

242 # Named types are used as-is 

243 tid = self.dtype.id 

244 else: 

245 dtype = np.dtype(self.dtype) 

246 tid = h5t.py_create(dtype, logical=1) 

247 

248 return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace, 

249 dcpl=dcpl)