Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/selections.py: 22%

200 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# This file is part of h5py, a Python interface to the HDF5 library. 

2# 

3# http://www.h5py.org 

4# 

5# Copyright 2008-2013 Andrew Collette and contributors 

6# 

7# License: Standard 3-clause BSD; see "license.txt" for full license terms 

8# and contributor agreement. 

9 

10""" 

11 High-level access to HDF5 dataspace selections 

12""" 

13 

14import numpy as np 

15 

16from .base import product 

17from .. import h5s, h5r, _selector 

18 

19def select(shape, args, dataset=None): 

20 """ High-level routine to generate a selection from arbitrary arguments 

21 to __getitem__. The arguments should be the following: 

22 

23 shape 

24 Shape of the "source" dataspace. 

25 

26 args 

27 Either a single argument or a tuple of arguments. See below for 

28 supported classes of argument. 

29 

30 dataset 

31 A h5py.Dataset instance representing the source dataset. 

32 

33 Argument classes: 

34 

35 Single Selection instance 

36 Returns the argument. 

37 

38 numpy.ndarray 

39 Must be a boolean mask. Returns a PointSelection instance. 

40 

41 RegionReference 

42 Returns a Selection instance. 

43 

44 Indices, slices, ellipses, MultiBlockSlices only 

45 Returns a SimpleSelection instance 

46 

47 Indices, slices, ellipses, lists or boolean index arrays 

48 Returns a FancySelection instance. 

49 """ 

50 if not isinstance(args, tuple): 

51 args = (args,) 

52 

53 # "Special" indexing objects 

54 if len(args) == 1: 

55 

56 arg = args[0] 

57 if isinstance(arg, Selection): 

58 if arg.shape != shape: 

59 raise TypeError("Mismatched selection shape") 

60 return arg 

61 

62 elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b': 

63 if arg.shape != shape: 

64 raise TypeError("Boolean indexing array has incompatible shape") 

65 return PointSelection.from_mask(arg) 

66 

67 elif isinstance(arg, h5r.RegionReference): 

68 if dataset is None: 

69 raise TypeError("Cannot apply a region reference without a dataset") 

70 sid = h5r.get_region(arg, dataset.id) 

71 if shape != sid.shape: 

72 raise TypeError("Reference shape does not match dataset shape") 

73 

74 return Selection(shape, spaceid=sid) 

75 

76 if dataset is not None: 

77 selector = dataset._selector 

78 else: 

79 space = h5s.create_simple(shape) 

80 selector = _selector.Selector(space) 

81 

82 return selector.make_selection(args) 

83 

84 

85class Selection: 

86 

87 """ 

88 Base class for HDF5 dataspace selections. Subclasses support the 

89 "selection protocol", which means they have at least the following 

90 members: 

91 

92 __init__(shape) => Create a new selection on "shape"-tuple 

93 __getitem__(args) => Perform a selection with the range specified. 

94 What args are allowed depends on the 

95 particular subclass in use. 

96 

97 id (read-only) => h5py.h5s.SpaceID instance 

98 shape (read-only) => The shape of the dataspace. 

99 mshape (read-only) => The shape of the selection region. 

100 Not guaranteed to fit within "shape", although 

101 the total number of points is less than 

102 product(shape). 

103 nselect (read-only) => Number of selected points. Always equal to 

104 product(mshape). 

105 

106 broadcast(target_shape) => Return an iterable which yields dataspaces 

107 for read, based on target_shape. 

108 

109 The base class represents "unshaped" selections (1-D). 

110 """ 

111 

112 def __init__(self, shape, spaceid=None): 

113 """ Create a selection. Shape may be None if spaceid is given. """ 

114 if spaceid is not None: 

115 self._id = spaceid 

116 self._shape = spaceid.shape 

117 else: 

118 shape = tuple(shape) 

119 self._shape = shape 

120 self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape)) 

121 self._id.select_all() 

122 

123 @property 

124 def id(self): 

125 """ SpaceID instance """ 

126 return self._id 

127 

128 @property 

129 def shape(self): 

130 """ Shape of whole dataspace """ 

131 return self._shape 

132 

133 @property 

134 def nselect(self): 

135 """ Number of elements currently selected """ 

136 return self._id.get_select_npoints() 

137 

138 @property 

139 def mshape(self): 

140 """ Shape of selection (always 1-D for this class) """ 

141 return (self.nselect,) 

142 

143 @property 

144 def array_shape(self): 

145 """Shape of array to read/write (always 1-D for this class)""" 

146 return self.mshape 

147 

148 # expand_shape and broadcast only really make sense for SimpleSelection 

149 def expand_shape(self, source_shape): 

150 if product(source_shape) != self.nselect: 

151 raise TypeError("Broadcasting is not supported for point-wise selections") 

152 return source_shape 

153 

154 def broadcast(self, source_shape): 

155 """ Get an iterable for broadcasting """ 

156 if product(source_shape) != self.nselect: 

157 raise TypeError("Broadcasting is not supported for point-wise selections") 

158 yield self._id 

159 

160 def __getitem__(self, args): 

161 raise NotImplementedError("This class does not support indexing") 

162 

163class PointSelection(Selection): 

164 

165 """ 

166 Represents a point-wise selection. You can supply sequences of 

167 points to the three methods append(), prepend() and set(), or 

168 instantiate it with a single boolean array using from_mask(). 

169 """ 

170 def __init__(self, shape, spaceid=None, points=None): 

171 super().__init__(shape, spaceid) 

172 if points is not None: 

173 self._perform_selection(points, h5s.SELECT_SET) 

174 

175 def _perform_selection(self, points, op): 

176 """ Internal method which actually performs the selection """ 

177 points = np.asarray(points, order='C', dtype='u8') 

178 if len(points.shape) == 1: 

179 points.shape = (1,points.shape[0]) 

180 

181 if self._id.get_select_type() != h5s.SEL_POINTS: 

182 op = h5s.SELECT_SET 

183 

184 if len(points) == 0: 

185 self._id.select_none() 

186 else: 

187 self._id.select_elements(points, op) 

188 

189 @classmethod 

190 def from_mask(cls, mask, spaceid=None): 

191 """Create a point-wise selection from a NumPy boolean array """ 

192 if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'): 

193 raise TypeError("PointSelection.from_mask only works with bool arrays") 

194 

195 points = np.transpose(mask.nonzero()) 

196 return cls(mask.shape, spaceid, points=points) 

197 

198 def append(self, points): 

199 """ Add the sequence of points to the end of the current selection """ 

200 self._perform_selection(points, h5s.SELECT_APPEND) 

201 

202 def prepend(self, points): 

203 """ Add the sequence of points to the beginning of the current selection """ 

204 self._perform_selection(points, h5s.SELECT_PREPEND) 

205 

206 def set(self, points): 

207 """ Replace the current selection with the given sequence of points""" 

208 self._perform_selection(points, h5s.SELECT_SET) 

209 

210 

211class SimpleSelection(Selection): 

212 

213 """ A single "rectangular" (regular) selection composed of only slices 

214 and integer arguments. Can participate in broadcasting. 

215 """ 

216 

217 @property 

218 def mshape(self): 

219 """ Shape of current selection """ 

220 return self._sel[1] 

221 

222 @property 

223 def array_shape(self): 

224 scalar = self._sel[3] 

225 return tuple(x for x, s in zip(self.mshape, scalar) if not s) 

226 

227 def __init__(self, shape, spaceid=None, hyperslab=None): 

228 super().__init__(shape, spaceid) 

229 if hyperslab is not None: 

230 self._sel = hyperslab 

231 else: 

232 # No hyperslab specified - select all 

233 rank = len(self.shape) 

234 self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank) 

235 

236 def expand_shape(self, source_shape): 

237 """Match the dimensions of an array to be broadcast to the selection 

238 

239 The returned shape describes an array of the same size as the input 

240 shape, but its dimensions 

241 

242 E.g. with a dataset shape (10, 5, 4, 2), writing like this:: 

243 

244 ds[..., 0] = np.ones((5, 4)) 

245 

246 The source shape (5, 4) will expand to (1, 5, 4, 1). 

247 Then the broadcast method below repeats that chunk 10 

248 times to write to an effective shape of (10, 5, 4, 1). 

249 """ 

250 start, count, step, scalar = self._sel 

251 

252 rank = len(count) 

253 remaining_src_dims = list(source_shape) 

254 

255 eshape = [] 

256 for idx in range(1, rank + 1): 

257 if len(remaining_src_dims) == 0 or scalar[-idx]: # Skip scalar axes 

258 eshape.append(1) 

259 else: 

260 t = remaining_src_dims.pop() 

261 if t == 1 or count[-idx] == t: 

262 eshape.append(t) 

263 else: 

264 raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape 

265 

266 if any([n > 1 for n in remaining_src_dims]): 

267 # All dimensions from target_shape should either have been popped 

268 # to match the selection shape, or be 1. 

269 raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape 

270 

271 # We have built eshape backwards, so now reverse it 

272 return tuple(eshape[::-1]) 

273 

274 

275 def broadcast(self, source_shape): 

276 """ Return an iterator over target dataspaces for broadcasting. 

277 

278 Follows the standard NumPy broadcasting rules against the current 

279 selection shape (self.mshape). 

280 """ 

281 if self.shape == (): 

282 if product(source_shape) != 1: 

283 raise TypeError("Can't broadcast %s to scalar" % source_shape) 

284 self._id.select_all() 

285 yield self._id 

286 return 

287 

288 start, count, step, scalar = self._sel 

289 

290 rank = len(count) 

291 tshape = self.expand_shape(source_shape) 

292 

293 chunks = tuple(x//y for x, y in zip(count, tshape)) 

294 nchunks = product(chunks) 

295 

296 if nchunks == 1: 

297 yield self._id 

298 else: 

299 sid = self._id.copy() 

300 sid.select_hyperslab((0,)*rank, tshape, step) 

301 for idx in range(nchunks): 

302 offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) 

303 sid.offset_simple(offset) 

304 yield sid 

305 

306 

307class FancySelection(Selection): 

308 

309 """ 

310 Implements advanced NumPy-style selection operations in addition to 

311 the standard slice-and-int behavior. 

312 

313 Indexing arguments may be ints, slices, lists of indices, or 

314 per-axis (1D) boolean arrays. 

315 

316 Broadcasting is not supported for these selections. 

317 """ 

318 

319 @property 

320 def mshape(self): 

321 return self._mshape 

322 

323 @property 

324 def array_shape(self): 

325 return self._array_shape 

326 

327 def __init__(self, shape, spaceid=None, mshape=None, array_shape=None): 

328 super().__init__(shape, spaceid) 

329 if mshape is None: 

330 mshape = self.shape 

331 if array_shape is None: 

332 array_shape = mshape 

333 self._mshape = mshape 

334 self._array_shape = array_shape 

335 

336 def expand_shape(self, source_shape): 

337 if not source_shape == self.array_shape: 

338 raise TypeError("Broadcasting is not supported for complex selections") 

339 return source_shape 

340 

341 def broadcast(self, source_shape): 

342 if not source_shape == self.array_shape: 

343 raise TypeError("Broadcasting is not supported for complex selections") 

344 yield self._id 

345 

346 

347def guess_shape(sid): 

348 """ Given a dataspace, try to deduce the shape of the selection. 

349 

350 Returns one of: 

351 * A tuple with the selection shape, same length as the dataspace 

352 * A 1D selection shape for point-based and multiple-hyperslab selections 

353 * None, for unselected scalars and for NULL dataspaces 

354 """ 

355 

356 sel_class = sid.get_simple_extent_type() # Dataspace class 

357 sel_type = sid.get_select_type() # Flavor of selection in use 

358 

359 if sel_class == h5s.NULL: 

360 # NULL dataspaces don't support selections 

361 return None 

362 

363 elif sel_class == h5s.SCALAR: 

364 # NumPy has no way of expressing empty 0-rank selections, so we use None 

365 if sel_type == h5s.SEL_NONE: return None 

366 if sel_type == h5s.SEL_ALL: return tuple() 

367 

368 elif sel_class != h5s.SIMPLE: 

369 raise TypeError("Unrecognized dataspace class %s" % sel_class) 

370 

371 # We have a "simple" (rank >= 1) dataspace 

372 

373 N = sid.get_select_npoints() 

374 rank = len(sid.shape) 

375 

376 if sel_type == h5s.SEL_NONE: 

377 return (0,)*rank 

378 

379 elif sel_type == h5s.SEL_ALL: 

380 return sid.shape 

381 

382 elif sel_type == h5s.SEL_POINTS: 

383 # Like NumPy, point-based selections yield 1D arrays regardless of 

384 # the dataspace rank 

385 return (N,) 

386 

387 elif sel_type != h5s.SEL_HYPERSLABS: 

388 raise TypeError("Unrecognized selection method %s" % sel_type) 

389 

390 # We have a hyperslab-based selection 

391 

392 if N == 0: 

393 return (0,)*rank 

394 

395 bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds()) 

396 

397 # Shape of full selection box 

398 boxshape = topcorner - bottomcorner + np.ones((rank,)) 

399 

400 def get_n_axis(sid, axis): 

401 """ Determine the number of elements selected along a particular axis. 

402 

403 To do this, we "mask off" the axis by making a hyperslab selection 

404 which leaves only the first point along the axis. For a 2D dataset 

405 with selection box shape (X, Y), for axis 1, this would leave a 

406 selection of shape (X, 1). We count the number of points N_leftover 

407 remaining in the selection and compute the axis selection length by 

408 N_axis = N/N_leftover. 

409 """ 

410 

411 if(boxshape[axis]) == 1: 

412 return 1 

413 

414 start = bottomcorner.copy() 

415 start[axis] += 1 

416 count = boxshape.copy() 

417 count[axis] -= 1 

418 

419 # Throw away all points along this axis 

420 masked_sid = sid.copy() 

421 masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB) 

422 

423 N_leftover = masked_sid.get_select_npoints() 

424 

425 return N//N_leftover 

426 

427 

428 shape = tuple(get_n_axis(sid, x) for x in range(rank)) 

429 

430 if product(shape) != N: 

431 # This means multiple hyperslab selections are in effect, 

432 # so we fall back to a 1D shape 

433 return (N,) 

434 

435 return shape