Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/base.py: 40%

247 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# This file is part of h5py, a Python interface to the HDF5 library. 

2# 

3# http://www.h5py.org 

4# 

5# Copyright 2008-2013 Andrew Collette and contributors 

6# 

7# License: Standard 3-clause BSD; see "license.txt" for full license terms 

8# and contributor agreement. 

9 

10""" 

11 Implements operations common to all high-level objects (File, etc.). 

12""" 

13 

14from collections.abc import ( 

15 Mapping, MutableMapping, KeysView, ValuesView, ItemsView 

16) 

17import os 

18import posixpath 

19 

20import numpy as np 

21 

22# The high-level interface is serialized; every public API function & method 

23# is wrapped in a lock. We re-use the low-level lock because (1) it's fast, 

24# and (2) it eliminates the possibility of deadlocks due to out-of-order 

25# lock acquisition. 

26from .._objects import phil, with_phil 

27from .. import h5d, h5i, h5r, h5p, h5f, h5t, h5s 

28from .compat import fspath, filename_encode 

29 

30 

31def is_hdf5(fname): 

32 """ Determine if a file is valid HDF5 (False if it doesn't exist). """ 

33 with phil: 

34 fname = os.path.abspath(fspath(fname)) 

35 

36 if os.path.isfile(fname): 

37 return h5f.is_hdf5(filename_encode(fname)) 

38 return False 

39 

40 

41def find_item_type(data): 

42 """Find the item type of a simple object or collection of objects. 

43 

44 E.g. [[['a']]] -> str 

45 

46 The focus is on collections where all items have the same type; we'll return 

47 None if that's not the case. 

48 

49 The aim is to treat numpy arrays of Python objects like normal Python 

50 collections, while treating arrays with specific dtypes differently. 

51 We're also only interested in array-like collections - lists and tuples, 

52 possibly nested - not things like sets or dicts. 

53 """ 

54 if isinstance(data, np.ndarray): 

55 if ( 

56 data.dtype.kind == 'O' 

57 and not h5t.check_string_dtype(data.dtype) 

58 and not h5t.check_vlen_dtype(data.dtype) 

59 ): 

60 item_types = {type(e) for e in data.flat} 

61 else: 

62 return None 

63 elif isinstance(data, (list, tuple)): 

64 item_types = {find_item_type(e) for e in data} 

65 else: 

66 return type(data) 

67 

68 if len(item_types) != 1: 

69 return None 

70 return item_types.pop() 

71 

72 

73def guess_dtype(data): 

74 """ Attempt to guess an appropriate dtype for the object, returning None 

75 if nothing is appropriate (or if it should be left up the the array 

76 constructor to figure out) 

77 """ 

78 with phil: 

79 if isinstance(data, h5r.RegionReference): 

80 return h5t.regionref_dtype 

81 if isinstance(data, h5r.Reference): 

82 return h5t.ref_dtype 

83 

84 item_type = find_item_type(data) 

85 

86 if item_type is bytes: 

87 return h5t.string_dtype(encoding='ascii') 

88 if item_type is str: 

89 return h5t.string_dtype() 

90 

91 return None 

92 

93 

94def is_float16_dtype(dt): 

95 if dt is None: 

96 return False 

97 

98 dt = np.dtype(dt) # normalize strings -> np.dtype objects 

99 return dt.kind == 'f' and dt.itemsize == 2 

100 

101 

102def array_for_new_object(data, specified_dtype=None): 

103 """Prepare an array from data used to create a new dataset or attribute""" 

104 

105 # We mostly let HDF5 convert data as necessary when it's written. 

106 # But if we are going to a float16 datatype, pre-convert in python 

107 # to workaround a bug in the conversion. 

108 # https://github.com/h5py/h5py/issues/819 

109 if is_float16_dtype(specified_dtype): 

110 as_dtype = specified_dtype 

111 elif not isinstance(data, np.ndarray) and (specified_dtype is not None): 

112 # If we need to convert e.g. a list to an array, don't leave numpy 

113 # to guess a dtype we already know. 

114 as_dtype = specified_dtype 

115 else: 

116 as_dtype = guess_dtype(data) 

117 

118 data = np.asarray(data, order="C", dtype=as_dtype) 

119 

120 # In most cases, this does nothing. But if data was already an array, 

121 # and as_dtype is a tagged h5py dtype (e.g. for an object array of strings), 

122 # asarray() doesn't replace its dtype object. This gives it the tagged dtype: 

123 if as_dtype is not None: 

124 data = data.view(dtype=as_dtype) 

125 

126 return data 

127 

128 

129def default_lapl(): 

130 """ Default link access property list """ 

131 lapl = h5p.create(h5p.LINK_ACCESS) 

132 fapl = h5p.create(h5p.FILE_ACCESS) 

133 fapl.set_fclose_degree(h5f.CLOSE_STRONG) 

134 lapl.set_elink_fapl(fapl) 

135 return lapl 

136 

137 

138def default_lcpl(): 

139 """ Default link creation property list """ 

140 lcpl = h5p.create(h5p.LINK_CREATE) 

141 lcpl.set_create_intermediate_group(True) 

142 return lcpl 

143 

144dlapl = default_lapl() 

145dlcpl = default_lcpl() 

146 

147 

148def is_empty_dataspace(obj): 

149 """ Check if an object's dataspace is empty """ 

150 if obj.get_space().get_simple_extent_type() == h5s.NULL: 

151 return True 

152 return False 

153 

154 

155class CommonStateObject: 

156 

157 """ 

158 Mixin class that allows sharing information between objects which 

159 reside in the same HDF5 file. Requires that the host class have 

160 a ".id" attribute which returns a low-level ObjectID subclass. 

161 

162 Also implements Unicode operations. 

163 """ 

164 

165 @property 

166 def _lapl(self): 

167 """ Fetch the link access property list appropriate for this object 

168 """ 

169 return dlapl 

170 

171 @property 

172 def _lcpl(self): 

173 """ Fetch the link creation property list appropriate for this object 

174 """ 

175 return dlcpl 

176 

177 def _e(self, name, lcpl=None): 

178 """ Encode a name according to the current file settings. 

179 

180 Returns name, or 2-tuple (name, lcpl) if lcpl is True 

181 

182 - Binary strings are always passed as-is, h5t.CSET_ASCII 

183 - Unicode strings are encoded utf8, h5t.CSET_UTF8 

184 

185 If name is None, returns either None or (None, None) appropriately. 

186 """ 

187 def get_lcpl(coding): 

188 """ Create an appropriate link creation property list """ 

189 lcpl = self._lcpl.copy() 

190 lcpl.set_char_encoding(coding) 

191 return lcpl 

192 

193 if name is None: 

194 return (None, None) if lcpl else None 

195 

196 if isinstance(name, bytes): 

197 coding = h5t.CSET_ASCII 

198 elif isinstance(name, str): 

199 try: 

200 name = name.encode('ascii') 

201 coding = h5t.CSET_ASCII 

202 except UnicodeEncodeError: 

203 name = name.encode('utf8') 

204 coding = h5t.CSET_UTF8 

205 else: 

206 raise TypeError(f"A name should be string or bytes, not {type(name)}") 

207 

208 if lcpl: 

209 return name, get_lcpl(coding) 

210 return name 

211 

212 def _d(self, name): 

213 """ Decode a name according to the current file settings. 

214 

215 - Try to decode utf8 

216 - Failing that, return the byte string 

217 

218 If name is None, returns None. 

219 """ 

220 if name is None: 

221 return None 

222 

223 try: 

224 return name.decode('utf8') 

225 except UnicodeDecodeError: 

226 pass 

227 return name 

228 

229 

230class _RegionProxy: 

231 

232 """ 

233 Proxy object which handles region references. 

234 

235 To create a new region reference (datasets only), use slicing syntax: 

236 

237 >>> newref = obj.regionref[0:10:2] 

238 

239 To determine the target dataset shape from an existing reference: 

240 

241 >>> shape = obj.regionref.shape(existingref) 

242 

243 where <obj> may be any object in the file. To determine the shape of 

244 the selection in use on the target dataset: 

245 

246 >>> selection_shape = obj.regionref.selection(existingref) 

247 """ 

248 

249 def __init__(self, obj): 

250 self.obj = obj 

251 self.id = obj.id 

252 

253 def __getitem__(self, args): 

254 if not isinstance(self.id, h5d.DatasetID): 

255 raise TypeError("Region references can only be made to datasets") 

256 from . import selections 

257 with phil: 

258 selection = selections.select(self.id.shape, args, dataset=self.obj) 

259 return h5r.create(self.id, b'.', h5r.DATASET_REGION, selection.id) 

260 

261 def shape(self, ref): 

262 """ Get the shape of the target dataspace referred to by *ref*. """ 

263 with phil: 

264 sid = h5r.get_region(ref, self.id) 

265 return sid.shape 

266 

267 def selection(self, ref): 

268 """ Get the shape of the target dataspace selection referred to by *ref* 

269 """ 

270 from . import selections 

271 with phil: 

272 sid = h5r.get_region(ref, self.id) 

273 return selections.guess_shape(sid) 

274 

275 

276class HLObject(CommonStateObject): 

277 

278 """ 

279 Base class for high-level interface objects. 

280 """ 

281 

282 @property 

283 def file(self): 

284 """ Return a File instance associated with this object """ 

285 from . import files 

286 with phil: 

287 return files.File(self.id) 

288 

289 @property 

290 @with_phil 

291 def name(self): 

292 """ Return the full name of this object. None if anonymous. """ 

293 return self._d(h5i.get_name(self.id)) 

294 

295 @property 

296 @with_phil 

297 def parent(self): 

298 """Return the parent group of this object. 

299 

300 This is always equivalent to obj.file[posixpath.dirname(obj.name)]. 

301 ValueError if this object is anonymous. 

302 """ 

303 if self.name is None: 

304 raise ValueError("Parent of an anonymous object is undefined") 

305 return self.file[posixpath.dirname(self.name)] 

306 

307 @property 

308 @with_phil 

309 def id(self): 

310 """ Low-level identifier appropriate for this object """ 

311 return self._id 

312 

313 @property 

314 @with_phil 

315 def ref(self): 

316 """ An (opaque) HDF5 reference to this object """ 

317 return h5r.create(self.id, b'.', h5r.OBJECT) 

318 

319 @property 

320 @with_phil 

321 def regionref(self): 

322 """Create a region reference (Datasets only). 

323 

324 The syntax is regionref[<slices>]. For example, dset.regionref[...] 

325 creates a region reference in which the whole dataset is selected. 

326 

327 Can also be used to determine the shape of the referenced dataset 

328 (via .shape property), or the shape of the selection (via the 

329 .selection property). 

330 """ 

331 return _RegionProxy(self) 

332 

333 @property 

334 def attrs(self): 

335 """ Attributes attached to this object """ 

336 from . import attrs 

337 with phil: 

338 return attrs.AttributeManager(self) 

339 

340 @with_phil 

341 def __init__(self, oid): 

342 """ Setup this object, given its low-level identifier """ 

343 self._id = oid 

344 

345 @with_phil 

346 def __hash__(self): 

347 return hash(self.id) 

348 

349 @with_phil 

350 def __eq__(self, other): 

351 if hasattr(other, 'id'): 

352 return self.id == other.id 

353 return NotImplemented 

354 

355 def __bool__(self): 

356 with phil: 

357 return bool(self.id) 

358 __nonzero__ = __bool__ 

359 

360 def __getnewargs__(self): 

361 """Disable pickle. 

362 

363 Handles for HDF5 objects can't be reliably deserialised, because the 

364 recipient may not have access to the same files. So we do this to 

365 fail early. 

366 

367 If you really want to pickle h5py objects and can live with some 

368 limitations, look at the h5pickle project on PyPI. 

369 """ 

370 raise TypeError("h5py objects cannot be pickled") 

371 

372 def __getstate__(self): 

373 # Pickle protocols 0 and 1 use this instead of __getnewargs__ 

374 raise TypeError("h5py objects cannot be pickled") 

375 

376# --- Dictionary-style interface ---------------------------------------------- 

377 

378# To implement the dictionary-style interface from groups and attributes, 

379# we inherit from the appropriate abstract base classes in collections. 

380# 

381# All locking is taken care of by the subclasses. 

382# We have to override ValuesView and ItemsView here because Group and 

383# AttributeManager can only test for key names. 

384 

385 

386class KeysViewHDF5(KeysView): 

387 def __str__(self): 

388 return "<KeysViewHDF5 {}>".format(list(self)) 

389 

390 def __reversed__(self): 

391 yield from reversed(self._mapping) 

392 

393 __repr__ = __str__ 

394 

395class ValuesViewHDF5(ValuesView): 

396 

397 """ 

398 Wraps e.g. a Group or AttributeManager to provide a value view. 

399 

400 Note that __contains__ will have poor performance as it has 

401 to scan all the links or attributes. 

402 """ 

403 

404 def __contains__(self, value): 

405 with phil: 

406 for key in self._mapping: 

407 if value == self._mapping.get(key): 

408 return True 

409 return False 

410 

411 def __iter__(self): 

412 with phil: 

413 for key in self._mapping: 

414 yield self._mapping.get(key) 

415 

416 def __reversed__(self): 

417 with phil: 

418 for key in reversed(self._mapping): 

419 yield self._mapping.get(key) 

420 

421 

422class ItemsViewHDF5(ItemsView): 

423 

424 """ 

425 Wraps e.g. a Group or AttributeManager to provide an items view. 

426 """ 

427 

428 def __contains__(self, item): 

429 with phil: 

430 key, val = item 

431 if key in self._mapping: 

432 return val == self._mapping.get(key) 

433 return False 

434 

435 def __iter__(self): 

436 with phil: 

437 for key in self._mapping: 

438 yield (key, self._mapping.get(key)) 

439 

440 def __reversed__(self): 

441 with phil: 

442 for key in reversed(self._mapping): 

443 yield (key, self._mapping.get(key)) 

444 

445 

446class MappingHDF5(Mapping): 

447 

448 """ 

449 Wraps a Group, AttributeManager or DimensionManager object to provide 

450 an immutable mapping interface. 

451 

452 We don't inherit directly from MutableMapping because certain 

453 subclasses, for example DimensionManager, are read-only. 

454 """ 

455 def keys(self): 

456 """ Get a view object on member names """ 

457 return KeysViewHDF5(self) 

458 

459 def values(self): 

460 """ Get a view object on member objects """ 

461 return ValuesViewHDF5(self) 

462 

463 def items(self): 

464 """ Get a view object on member items """ 

465 return ItemsViewHDF5(self) 

466 

467 def _ipython_key_completions_(self): 

468 """ Custom tab completions for __getitem__ in IPython >=5.0. """ 

469 return sorted(self.keys()) 

470 

471 

472class MutableMappingHDF5(MappingHDF5, MutableMapping): 

473 

474 """ 

475 Wraps a Group or AttributeManager object to provide a mutable 

476 mapping interface, in contrast to the read-only mapping of 

477 MappingHDF5. 

478 """ 

479 

480 pass 

481 

482 

483class Empty: 

484 

485 """ 

486 Proxy object to represent empty/null dataspaces (a.k.a H5S_NULL). 

487 

488 This can have an associated dtype, but has no shape or data. This is not 

489 the same as an array with shape (0,). 

490 """ 

491 shape = None 

492 size = None 

493 

494 def __init__(self, dtype): 

495 self.dtype = np.dtype(dtype) 

496 

497 def __eq__(self, other): 

498 if isinstance(other, Empty) and self.dtype == other.dtype: 

499 return True 

500 return False 

501 

502 def __repr__(self): 

503 return "Empty(dtype={0!r})".format(self.dtype) 

504 

505 

506def product(nums): 

507 """Calculate a numeric product 

508 

509 For small amounts of data (e.g. shape tuples), this simple code is much 

510 faster than calling numpy.prod(). 

511 """ 

512 prod = 1 

513 for n in nums: 

514 prod *= n 

515 return prod 

516 

517 

518# Simple variant of cached_property: 

519# Unlike functools, this has no locking, so we don't have to worry about 

520# deadlocks with phil (see issue gh-2064). Unlike cached-property on PyPI, it 

521# doesn't try to import asyncio (which can be ~100 extra modules). 

522# Many projects seem to have similar variants of this, often without attribution, 

523# but to be cautious, this code comes from cached-property (Copyright (c) 2015, 

524# Daniel Greenfeld, BSD license), where it is attributed to bottle (Copyright 

525# (c) 2009-2022, Marcel Hellkamp, MIT license). 

526 

527class cached_property(object): 

528 def __init__(self, func): 

529 self.__doc__ = getattr(func, "__doc__") 

530 self.func = func 

531 

532 def __get__(self, obj, cls): 

533 if obj is None: 

534 return self 

535 

536 value = obj.__dict__[self.func.__name__] = self.func(obj) 

537 return value