Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/base.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

243 statements  

1# This file is part of h5py, a Python interface to the HDF5 library. 

2# 

3# http://www.h5py.org 

4# 

5# Copyright 2008-2013 Andrew Collette and contributors 

6# 

7# License: Standard 3-clause BSD; see "license.txt" for full license terms 

8# and contributor agreement. 

9 

10""" 

11 Implements operations common to all high-level objects (File, etc.). 

12""" 

13 

14from collections.abc import ( 

15 Mapping, MutableMapping, KeysView, ValuesView, ItemsView 

16) 

17import os 

18import posixpath 

19 

20import numpy as np 

21 

22# The high-level interface is serialized; every public API function & method 

23# is wrapped in a lock. We reuse the low-level lock because (1) it's fast, 

24# and (2) it eliminates the possibility of deadlocks due to out-of-order 

25# lock acquisition. 

26from .._objects import phil, with_phil 

27from .. import h5d, h5i, h5r, h5p, h5f, h5t, h5s 

28from .compat import fspath, filename_encode 

29 

30 

31def is_hdf5(fname): 

32 """ Determine if a file is valid HDF5 (False if it doesn't exist). """ 

33 with phil: 

34 fname = os.path.abspath(fspath(fname)) 

35 

36 if os.path.isfile(fname): 

37 return h5f.is_hdf5(filename_encode(fname)) 

38 return False 

39 

40 

41def find_item_type(data): 

42 """Find the item type of a simple object or collection of objects. 

43 

44 E.g. [[['a']]] -> str 

45 

46 The focus is on collections where all items have the same type; we'll return 

47 None if that's not the case. 

48 

49 The aim is to treat numpy arrays of Python objects like normal Python 

50 collections, while treating arrays with specific dtypes differently. 

51 We're also only interested in array-like collections - lists and tuples, 

52 possibly nested - not things like sets or dicts. 

53 """ 

54 if isinstance(data, np.ndarray): 

55 if ( 

56 data.dtype.kind == 'O' 

57 and not h5t.check_string_dtype(data.dtype) 

58 and not h5t.check_vlen_dtype(data.dtype) 

59 ): 

60 item_types = {type(e) for e in data.flat} 

61 else: 

62 return None 

63 elif isinstance(data, (list, tuple)): 

64 item_types = {find_item_type(e) for e in data} 

65 else: 

66 return type(data) 

67 

68 if len(item_types) != 1: 

69 return None 

70 return item_types.pop() 

71 

72 

73def guess_dtype(data): 

74 """ Attempt to guess an appropriate dtype for the object, returning None 

75 if nothing is appropriate (or if it should be left up the the array 

76 constructor to figure out) 

77 """ 

78 with phil: 

79 if isinstance(data, h5r.RegionReference): 

80 return h5t.regionref_dtype 

81 if isinstance(data, h5r.Reference): 

82 return h5t.ref_dtype 

83 

84 item_type = find_item_type(data) 

85 

86 if item_type is bytes: 

87 return h5t.string_dtype(encoding='ascii') 

88 if item_type is str: 

89 return h5t.string_dtype() 

90 

91 return None 

92 

93 

94def is_float16_dtype(dt): 

95 if dt is None: 

96 return False 

97 

98 dt = np.dtype(dt) # normalize strings -> np.dtype objects 

99 return dt.kind == 'f' and dt.itemsize == 2 

100 

101 

102def array_for_new_object(data, specified_dtype=None): 

103 """Prepare an array from data used to create a new dataset or attribute""" 

104 

105 # We mostly let HDF5 convert data as necessary when it's written. 

106 # But if we are going to a float16 datatype, pre-convert in python 

107 # to workaround a bug in the conversion. 

108 # https://github.com/h5py/h5py/issues/819 

109 if is_float16_dtype(specified_dtype): 

110 as_dtype = specified_dtype 

111 elif not isinstance(data, np.ndarray) and (specified_dtype is not None): 

112 # If we need to convert e.g. a list to an array, don't leave numpy 

113 # to guess a dtype we already know. 

114 as_dtype = specified_dtype 

115 else: 

116 as_dtype = guess_dtype(data) 

117 

118 data = np.asarray(data, order="C", dtype=as_dtype) 

119 

120 # In most cases, this does nothing. But if data was already an array, 

121 # and as_dtype is a tagged h5py dtype (e.g. for an object array of strings), 

122 # asarray() doesn't replace its dtype object. This gives it the tagged dtype: 

123 if as_dtype is not None: 

124 data = data.view(dtype=as_dtype) 

125 

126 return data 

127 

128 

129def default_lapl(): 

130 """ Default link access property list """ 

131 return None 

132 

133 

134def default_lcpl(): 

135 """ Default link creation property list """ 

136 lcpl = h5p.create(h5p.LINK_CREATE) 

137 lcpl.set_create_intermediate_group(True) 

138 return lcpl 

139 

140dlapl = default_lapl() 

141dlcpl = default_lcpl() 

142 

143 

144def is_empty_dataspace(obj): 

145 """ Check if an object's dataspace is empty """ 

146 if obj.get_space().get_simple_extent_type() == h5s.NULL: 

147 return True 

148 return False 

149 

150 

151class CommonStateObject: 

152 

153 """ 

154 Mixin class that allows sharing information between objects which 

155 reside in the same HDF5 file. Requires that the host class have 

156 a ".id" attribute which returns a low-level ObjectID subclass. 

157 

158 Also implements Unicode operations. 

159 """ 

160 

161 @property 

162 def _lapl(self): 

163 """ Fetch the link access property list appropriate for this object 

164 """ 

165 return dlapl 

166 

167 @property 

168 def _lcpl(self): 

169 """ Fetch the link creation property list appropriate for this object 

170 """ 

171 return dlcpl 

172 

173 def _e(self, name, lcpl=None): 

174 """ Encode a name according to the current file settings. 

175 

176 Returns name, or 2-tuple (name, lcpl) if lcpl is True 

177 

178 - Binary strings are always passed as-is, h5t.CSET_ASCII 

179 - Unicode strings are encoded utf8, h5t.CSET_UTF8 

180 

181 If name is None, returns either None or (None, None) appropriately. 

182 """ 

183 def get_lcpl(coding): 

184 """ Create an appropriate link creation property list """ 

185 lcpl = self._lcpl.copy() 

186 lcpl.set_char_encoding(coding) 

187 return lcpl 

188 

189 if name is None: 

190 return (None, None) if lcpl else None 

191 

192 if isinstance(name, bytes): 

193 coding = h5t.CSET_ASCII 

194 elif isinstance(name, str): 

195 try: 

196 name = name.encode('ascii') 

197 coding = h5t.CSET_ASCII 

198 except UnicodeEncodeError: 

199 name = name.encode('utf8') 

200 coding = h5t.CSET_UTF8 

201 else: 

202 raise TypeError(f"A name should be string or bytes, not {type(name)}") 

203 

204 if lcpl: 

205 return name, get_lcpl(coding) 

206 return name 

207 

208 def _d(self, name): 

209 """ Decode a name according to the current file settings. 

210 

211 - Try to decode utf8 

212 - Failing that, return the byte string 

213 

214 If name is None, returns None. 

215 """ 

216 if name is None: 

217 return None 

218 

219 try: 

220 return name.decode('utf8') 

221 except UnicodeDecodeError: 

222 pass 

223 return name 

224 

225 

226class _RegionProxy: 

227 

228 """ 

229 Proxy object which handles region references. 

230 

231 To create a new region reference (datasets only), use slicing syntax: 

232 

233 >>> newref = obj.regionref[0:10:2] 

234 

235 To determine the target dataset shape from an existing reference: 

236 

237 >>> shape = obj.regionref.shape(existingref) 

238 

239 where <obj> may be any object in the file. To determine the shape of 

240 the selection in use on the target dataset: 

241 

242 >>> selection_shape = obj.regionref.selection(existingref) 

243 """ 

244 

245 def __init__(self, obj): 

246 self.obj = obj 

247 self.id = obj.id 

248 

249 def __getitem__(self, args): 

250 if not isinstance(self.id, h5d.DatasetID): 

251 raise TypeError("Region references can only be made to datasets") 

252 from . import selections 

253 with phil: 

254 selection = selections.select(self.id.shape, args, dataset=self.obj) 

255 return h5r.create(self.id, b'.', h5r.DATASET_REGION, selection.id) 

256 

257 def shape(self, ref): 

258 """ Get the shape of the target dataspace referred to by *ref*. """ 

259 with phil: 

260 sid = h5r.get_region(ref, self.id) 

261 return sid.shape 

262 

263 def selection(self, ref): 

264 """ Get the shape of the target dataspace selection referred to by *ref* 

265 """ 

266 from . import selections 

267 with phil: 

268 sid = h5r.get_region(ref, self.id) 

269 return selections.guess_shape(sid) 

270 

271 

272class HLObject(CommonStateObject): 

273 

274 """ 

275 Base class for high-level interface objects. 

276 """ 

277 

278 @property 

279 def file(self): 

280 """ Return a File instance associated with this object """ 

281 from . import files 

282 with phil: 

283 return files.File(self.id) 

284 

285 @property 

286 @with_phil 

287 def name(self): 

288 """ Return the full name of this object. None if anonymous. """ 

289 return self._d(h5i.get_name(self.id)) 

290 

291 @property 

292 @with_phil 

293 def parent(self): 

294 """Return the parent group of this object. 

295 

296 This is always equivalent to obj.file[posixpath.dirname(obj.name)]. 

297 ValueError if this object is anonymous. 

298 """ 

299 if self.name is None: 

300 raise ValueError("Parent of an anonymous object is undefined") 

301 return self.file[posixpath.dirname(self.name)] 

302 

303 @property 

304 @with_phil 

305 def id(self): 

306 """ Low-level identifier appropriate for this object """ 

307 return self._id 

308 

309 @property 

310 @with_phil 

311 def ref(self): 

312 """ An (opaque) HDF5 reference to this object """ 

313 return h5r.create(self.id, b'.', h5r.OBJECT) 

314 

315 @property 

316 @with_phil 

317 def regionref(self): 

318 """Create a region reference (Datasets only). 

319 

320 The syntax is regionref[<slices>]. For example, dset.regionref[...] 

321 creates a region reference in which the whole dataset is selected. 

322 

323 Can also be used to determine the shape of the referenced dataset 

324 (via .shape property), or the shape of the selection (via the 

325 .selection property). 

326 """ 

327 return _RegionProxy(self) 

328 

329 @property 

330 def attrs(self): 

331 """ Attributes attached to this object """ 

332 from . import attrs 

333 with phil: 

334 return attrs.AttributeManager(self) 

335 

336 @with_phil 

337 def __init__(self, oid): 

338 """ Setup this object, given its low-level identifier """ 

339 self._id = oid 

340 

341 @with_phil 

342 def __hash__(self): 

343 return hash(self.id) 

344 

345 @with_phil 

346 def __eq__(self, other): 

347 if hasattr(other, 'id'): 

348 return self.id == other.id 

349 return NotImplemented 

350 

351 def __bool__(self): 

352 with phil: 

353 return bool(self.id) 

354 __nonzero__ = __bool__ 

355 

356 def __getnewargs__(self): 

357 """Disable pickle. 

358 

359 Handles for HDF5 objects can't be reliably deserialised, because the 

360 recipient may not have access to the same files. So we do this to 

361 fail early. 

362 

363 If you really want to pickle h5py objects and can live with some 

364 limitations, look at the h5pickle project on PyPI. 

365 """ 

366 raise TypeError("h5py objects cannot be pickled") 

367 

368 def __getstate__(self): 

369 # Pickle protocols 0 and 1 use this instead of __getnewargs__ 

370 raise TypeError("h5py objects cannot be pickled") 

371 

372# --- Dictionary-style interface ---------------------------------------------- 

373 

374# To implement the dictionary-style interface from groups and attributes, 

375# we inherit from the appropriate abstract base classes in collections. 

376# 

377# All locking is taken care of by the subclasses. 

378# We have to override ValuesView and ItemsView here because Group and 

379# AttributeManager can only test for key names. 

380 

381 

382class KeysViewHDF5(KeysView): 

383 def __str__(self): 

384 return "<KeysViewHDF5 {}>".format(list(self)) 

385 

386 def __reversed__(self): 

387 yield from reversed(self._mapping) 

388 

389 __repr__ = __str__ 

390 

391class ValuesViewHDF5(ValuesView): 

392 

393 """ 

394 Wraps e.g. a Group or AttributeManager to provide a value view. 

395 

396 Note that __contains__ will have poor performance as it has 

397 to scan all the links or attributes. 

398 """ 

399 

400 def __contains__(self, value): 

401 with phil: 

402 for key in self._mapping: 

403 if value == self._mapping.get(key): 

404 return True 

405 return False 

406 

407 def __iter__(self): 

408 with phil: 

409 for key in self._mapping: 

410 yield self._mapping.get(key) 

411 

412 def __reversed__(self): 

413 with phil: 

414 for key in reversed(self._mapping): 

415 yield self._mapping.get(key) 

416 

417 

418class ItemsViewHDF5(ItemsView): 

419 

420 """ 

421 Wraps e.g. a Group or AttributeManager to provide an items view. 

422 """ 

423 

424 def __contains__(self, item): 

425 with phil: 

426 key, val = item 

427 if key in self._mapping: 

428 return val == self._mapping.get(key) 

429 return False 

430 

431 def __iter__(self): 

432 with phil: 

433 for key in self._mapping: 

434 yield (key, self._mapping.get(key)) 

435 

436 def __reversed__(self): 

437 with phil: 

438 for key in reversed(self._mapping): 

439 yield (key, self._mapping.get(key)) 

440 

441 

442class MappingHDF5(Mapping): 

443 

444 """ 

445 Wraps a Group, AttributeManager or DimensionManager object to provide 

446 an immutable mapping interface. 

447 

448 We don't inherit directly from MutableMapping because certain 

449 subclasses, for example DimensionManager, are read-only. 

450 """ 

451 def keys(self): 

452 """ Get a view object on member names """ 

453 return KeysViewHDF5(self) 

454 

455 def values(self): 

456 """ Get a view object on member objects """ 

457 return ValuesViewHDF5(self) 

458 

459 def items(self): 

460 """ Get a view object on member items """ 

461 return ItemsViewHDF5(self) 

462 

463 def _ipython_key_completions_(self): 

464 """ Custom tab completions for __getitem__ in IPython >=5.0. """ 

465 return sorted(self.keys()) 

466 

467 

468class MutableMappingHDF5(MappingHDF5, MutableMapping): 

469 

470 """ 

471 Wraps a Group or AttributeManager object to provide a mutable 

472 mapping interface, in contrast to the read-only mapping of 

473 MappingHDF5. 

474 """ 

475 

476 pass 

477 

478 

479class Empty: 

480 

481 """ 

482 Proxy object to represent empty/null dataspaces (a.k.a H5S_NULL). 

483 

484 This can have an associated dtype, but has no shape or data. This is not 

485 the same as an array with shape (0,). 

486 """ 

487 shape = None 

488 size = None 

489 

490 def __init__(self, dtype): 

491 self.dtype = np.dtype(dtype) 

492 

493 def __eq__(self, other): 

494 if isinstance(other, Empty) and self.dtype == other.dtype: 

495 return True 

496 return False 

497 

498 def __repr__(self): 

499 return "Empty(dtype={0!r})".format(self.dtype) 

500 

501 

502def product(nums): 

503 """Calculate a numeric product 

504 

505 For small amounts of data (e.g. shape tuples), this simple code is much 

506 faster than calling numpy.prod(). 

507 """ 

508 prod = 1 

509 for n in nums: 

510 prod *= n 

511 return prod 

512 

513 

514# Simple variant of cached_property: 

515# Unlike functools, this has no locking, so we don't have to worry about 

516# deadlocks with phil (see issue gh-2064). Unlike cached-property on PyPI, it 

517# doesn't try to import asyncio (which can be ~100 extra modules). 

518# Many projects seem to have similar variants of this, often without attribution, 

519# but to be cautious, this code comes from cached-property (Copyright (c) 2015, 

520# Daniel Greenfeld, BSD license), where it is attributed to bottle (Copyright 

521# (c) 2009-2022, Marcel Hellkamp, MIT license). 

522 

523class cached_property: 

524 def __init__(self, func): 

525 self.__doc__ = getattr(func, "__doc__") 

526 self.func = func 

527 

528 def __get__(self, obj, cls): 

529 if obj is None: 

530 return self 

531 

532 value = obj.__dict__[self.func.__name__] = self.func(obj) 

533 return value