Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/base.py: 40%
247 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 Implements operations common to all high-level objects (File, etc.).
12"""
14from collections.abc import (
15 Mapping, MutableMapping, KeysView, ValuesView, ItemsView
16)
17import os
18import posixpath
20import numpy as np
22# The high-level interface is serialized; every public API function & method
23# is wrapped in a lock. We re-use the low-level lock because (1) it's fast,
24# and (2) it eliminates the possibility of deadlocks due to out-of-order
25# lock acquisition.
26from .._objects import phil, with_phil
27from .. import h5d, h5i, h5r, h5p, h5f, h5t, h5s
28from .compat import fspath, filename_encode
31def is_hdf5(fname):
32 """ Determine if a file is valid HDF5 (False if it doesn't exist). """
33 with phil:
34 fname = os.path.abspath(fspath(fname))
36 if os.path.isfile(fname):
37 return h5f.is_hdf5(filename_encode(fname))
38 return False
41def find_item_type(data):
42 """Find the item type of a simple object or collection of objects.
44 E.g. [[['a']]] -> str
46 The focus is on collections where all items have the same type; we'll return
47 None if that's not the case.
49 The aim is to treat numpy arrays of Python objects like normal Python
50 collections, while treating arrays with specific dtypes differently.
51 We're also only interested in array-like collections - lists and tuples,
52 possibly nested - not things like sets or dicts.
53 """
54 if isinstance(data, np.ndarray):
55 if (
56 data.dtype.kind == 'O'
57 and not h5t.check_string_dtype(data.dtype)
58 and not h5t.check_vlen_dtype(data.dtype)
59 ):
60 item_types = {type(e) for e in data.flat}
61 else:
62 return None
63 elif isinstance(data, (list, tuple)):
64 item_types = {find_item_type(e) for e in data}
65 else:
66 return type(data)
68 if len(item_types) != 1:
69 return None
70 return item_types.pop()
73def guess_dtype(data):
74 """ Attempt to guess an appropriate dtype for the object, returning None
75 if nothing is appropriate (or if it should be left up the the array
76 constructor to figure out)
77 """
78 with phil:
79 if isinstance(data, h5r.RegionReference):
80 return h5t.regionref_dtype
81 if isinstance(data, h5r.Reference):
82 return h5t.ref_dtype
84 item_type = find_item_type(data)
86 if item_type is bytes:
87 return h5t.string_dtype(encoding='ascii')
88 if item_type is str:
89 return h5t.string_dtype()
91 return None
94def is_float16_dtype(dt):
95 if dt is None:
96 return False
98 dt = np.dtype(dt) # normalize strings -> np.dtype objects
99 return dt.kind == 'f' and dt.itemsize == 2
102def array_for_new_object(data, specified_dtype=None):
103 """Prepare an array from data used to create a new dataset or attribute"""
105 # We mostly let HDF5 convert data as necessary when it's written.
106 # But if we are going to a float16 datatype, pre-convert in python
107 # to workaround a bug in the conversion.
108 # https://github.com/h5py/h5py/issues/819
109 if is_float16_dtype(specified_dtype):
110 as_dtype = specified_dtype
111 elif not isinstance(data, np.ndarray) and (specified_dtype is not None):
112 # If we need to convert e.g. a list to an array, don't leave numpy
113 # to guess a dtype we already know.
114 as_dtype = specified_dtype
115 else:
116 as_dtype = guess_dtype(data)
118 data = np.asarray(data, order="C", dtype=as_dtype)
120 # In most cases, this does nothing. But if data was already an array,
121 # and as_dtype is a tagged h5py dtype (e.g. for an object array of strings),
122 # asarray() doesn't replace its dtype object. This gives it the tagged dtype:
123 if as_dtype is not None:
124 data = data.view(dtype=as_dtype)
126 return data
129def default_lapl():
130 """ Default link access property list """
131 lapl = h5p.create(h5p.LINK_ACCESS)
132 fapl = h5p.create(h5p.FILE_ACCESS)
133 fapl.set_fclose_degree(h5f.CLOSE_STRONG)
134 lapl.set_elink_fapl(fapl)
135 return lapl
138def default_lcpl():
139 """ Default link creation property list """
140 lcpl = h5p.create(h5p.LINK_CREATE)
141 lcpl.set_create_intermediate_group(True)
142 return lcpl
144dlapl = default_lapl()
145dlcpl = default_lcpl()
148def is_empty_dataspace(obj):
149 """ Check if an object's dataspace is empty """
150 if obj.get_space().get_simple_extent_type() == h5s.NULL:
151 return True
152 return False
155class CommonStateObject:
157 """
158 Mixin class that allows sharing information between objects which
159 reside in the same HDF5 file. Requires that the host class have
160 a ".id" attribute which returns a low-level ObjectID subclass.
162 Also implements Unicode operations.
163 """
165 @property
166 def _lapl(self):
167 """ Fetch the link access property list appropriate for this object
168 """
169 return dlapl
171 @property
172 def _lcpl(self):
173 """ Fetch the link creation property list appropriate for this object
174 """
175 return dlcpl
177 def _e(self, name, lcpl=None):
178 """ Encode a name according to the current file settings.
180 Returns name, or 2-tuple (name, lcpl) if lcpl is True
182 - Binary strings are always passed as-is, h5t.CSET_ASCII
183 - Unicode strings are encoded utf8, h5t.CSET_UTF8
185 If name is None, returns either None or (None, None) appropriately.
186 """
187 def get_lcpl(coding):
188 """ Create an appropriate link creation property list """
189 lcpl = self._lcpl.copy()
190 lcpl.set_char_encoding(coding)
191 return lcpl
193 if name is None:
194 return (None, None) if lcpl else None
196 if isinstance(name, bytes):
197 coding = h5t.CSET_ASCII
198 elif isinstance(name, str):
199 try:
200 name = name.encode('ascii')
201 coding = h5t.CSET_ASCII
202 except UnicodeEncodeError:
203 name = name.encode('utf8')
204 coding = h5t.CSET_UTF8
205 else:
206 raise TypeError(f"A name should be string or bytes, not {type(name)}")
208 if lcpl:
209 return name, get_lcpl(coding)
210 return name
212 def _d(self, name):
213 """ Decode a name according to the current file settings.
215 - Try to decode utf8
216 - Failing that, return the byte string
218 If name is None, returns None.
219 """
220 if name is None:
221 return None
223 try:
224 return name.decode('utf8')
225 except UnicodeDecodeError:
226 pass
227 return name
230class _RegionProxy:
232 """
233 Proxy object which handles region references.
235 To create a new region reference (datasets only), use slicing syntax:
237 >>> newref = obj.regionref[0:10:2]
239 To determine the target dataset shape from an existing reference:
241 >>> shape = obj.regionref.shape(existingref)
243 where <obj> may be any object in the file. To determine the shape of
244 the selection in use on the target dataset:
246 >>> selection_shape = obj.regionref.selection(existingref)
247 """
249 def __init__(self, obj):
250 self.obj = obj
251 self.id = obj.id
253 def __getitem__(self, args):
254 if not isinstance(self.id, h5d.DatasetID):
255 raise TypeError("Region references can only be made to datasets")
256 from . import selections
257 with phil:
258 selection = selections.select(self.id.shape, args, dataset=self.obj)
259 return h5r.create(self.id, b'.', h5r.DATASET_REGION, selection.id)
261 def shape(self, ref):
262 """ Get the shape of the target dataspace referred to by *ref*. """
263 with phil:
264 sid = h5r.get_region(ref, self.id)
265 return sid.shape
267 def selection(self, ref):
268 """ Get the shape of the target dataspace selection referred to by *ref*
269 """
270 from . import selections
271 with phil:
272 sid = h5r.get_region(ref, self.id)
273 return selections.guess_shape(sid)
276class HLObject(CommonStateObject):
278 """
279 Base class for high-level interface objects.
280 """
282 @property
283 def file(self):
284 """ Return a File instance associated with this object """
285 from . import files
286 with phil:
287 return files.File(self.id)
289 @property
290 @with_phil
291 def name(self):
292 """ Return the full name of this object. None if anonymous. """
293 return self._d(h5i.get_name(self.id))
295 @property
296 @with_phil
297 def parent(self):
298 """Return the parent group of this object.
300 This is always equivalent to obj.file[posixpath.dirname(obj.name)].
301 ValueError if this object is anonymous.
302 """
303 if self.name is None:
304 raise ValueError("Parent of an anonymous object is undefined")
305 return self.file[posixpath.dirname(self.name)]
307 @property
308 @with_phil
309 def id(self):
310 """ Low-level identifier appropriate for this object """
311 return self._id
313 @property
314 @with_phil
315 def ref(self):
316 """ An (opaque) HDF5 reference to this object """
317 return h5r.create(self.id, b'.', h5r.OBJECT)
319 @property
320 @with_phil
321 def regionref(self):
322 """Create a region reference (Datasets only).
324 The syntax is regionref[<slices>]. For example, dset.regionref[...]
325 creates a region reference in which the whole dataset is selected.
327 Can also be used to determine the shape of the referenced dataset
328 (via .shape property), or the shape of the selection (via the
329 .selection property).
330 """
331 return _RegionProxy(self)
333 @property
334 def attrs(self):
335 """ Attributes attached to this object """
336 from . import attrs
337 with phil:
338 return attrs.AttributeManager(self)
340 @with_phil
341 def __init__(self, oid):
342 """ Setup this object, given its low-level identifier """
343 self._id = oid
345 @with_phil
346 def __hash__(self):
347 return hash(self.id)
349 @with_phil
350 def __eq__(self, other):
351 if hasattr(other, 'id'):
352 return self.id == other.id
353 return NotImplemented
355 def __bool__(self):
356 with phil:
357 return bool(self.id)
358 __nonzero__ = __bool__
360 def __getnewargs__(self):
361 """Disable pickle.
363 Handles for HDF5 objects can't be reliably deserialised, because the
364 recipient may not have access to the same files. So we do this to
365 fail early.
367 If you really want to pickle h5py objects and can live with some
368 limitations, look at the h5pickle project on PyPI.
369 """
370 raise TypeError("h5py objects cannot be pickled")
372 def __getstate__(self):
373 # Pickle protocols 0 and 1 use this instead of __getnewargs__
374 raise TypeError("h5py objects cannot be pickled")
376# --- Dictionary-style interface ----------------------------------------------
378# To implement the dictionary-style interface from groups and attributes,
379# we inherit from the appropriate abstract base classes in collections.
380#
381# All locking is taken care of by the subclasses.
382# We have to override ValuesView and ItemsView here because Group and
383# AttributeManager can only test for key names.
386class KeysViewHDF5(KeysView):
387 def __str__(self):
388 return "<KeysViewHDF5 {}>".format(list(self))
390 def __reversed__(self):
391 yield from reversed(self._mapping)
393 __repr__ = __str__
395class ValuesViewHDF5(ValuesView):
397 """
398 Wraps e.g. a Group or AttributeManager to provide a value view.
400 Note that __contains__ will have poor performance as it has
401 to scan all the links or attributes.
402 """
404 def __contains__(self, value):
405 with phil:
406 for key in self._mapping:
407 if value == self._mapping.get(key):
408 return True
409 return False
411 def __iter__(self):
412 with phil:
413 for key in self._mapping:
414 yield self._mapping.get(key)
416 def __reversed__(self):
417 with phil:
418 for key in reversed(self._mapping):
419 yield self._mapping.get(key)
422class ItemsViewHDF5(ItemsView):
424 """
425 Wraps e.g. a Group or AttributeManager to provide an items view.
426 """
428 def __contains__(self, item):
429 with phil:
430 key, val = item
431 if key in self._mapping:
432 return val == self._mapping.get(key)
433 return False
435 def __iter__(self):
436 with phil:
437 for key in self._mapping:
438 yield (key, self._mapping.get(key))
440 def __reversed__(self):
441 with phil:
442 for key in reversed(self._mapping):
443 yield (key, self._mapping.get(key))
446class MappingHDF5(Mapping):
448 """
449 Wraps a Group, AttributeManager or DimensionManager object to provide
450 an immutable mapping interface.
452 We don't inherit directly from MutableMapping because certain
453 subclasses, for example DimensionManager, are read-only.
454 """
455 def keys(self):
456 """ Get a view object on member names """
457 return KeysViewHDF5(self)
459 def values(self):
460 """ Get a view object on member objects """
461 return ValuesViewHDF5(self)
463 def items(self):
464 """ Get a view object on member items """
465 return ItemsViewHDF5(self)
467 def _ipython_key_completions_(self):
468 """ Custom tab completions for __getitem__ in IPython >=5.0. """
469 return sorted(self.keys())
472class MutableMappingHDF5(MappingHDF5, MutableMapping):
474 """
475 Wraps a Group or AttributeManager object to provide a mutable
476 mapping interface, in contrast to the read-only mapping of
477 MappingHDF5.
478 """
480 pass
483class Empty:
485 """
486 Proxy object to represent empty/null dataspaces (a.k.a H5S_NULL).
488 This can have an associated dtype, but has no shape or data. This is not
489 the same as an array with shape (0,).
490 """
491 shape = None
492 size = None
494 def __init__(self, dtype):
495 self.dtype = np.dtype(dtype)
497 def __eq__(self, other):
498 if isinstance(other, Empty) and self.dtype == other.dtype:
499 return True
500 return False
502 def __repr__(self):
503 return "Empty(dtype={0!r})".format(self.dtype)
506def product(nums):
507 """Calculate a numeric product
509 For small amounts of data (e.g. shape tuples), this simple code is much
510 faster than calling numpy.prod().
511 """
512 prod = 1
513 for n in nums:
514 prod *= n
515 return prod
518# Simple variant of cached_property:
519# Unlike functools, this has no locking, so we don't have to worry about
520# deadlocks with phil (see issue gh-2064). Unlike cached-property on PyPI, it
521# doesn't try to import asyncio (which can be ~100 extra modules).
522# Many projects seem to have similar variants of this, often without attribution,
523# but to be cautious, this code comes from cached-property (Copyright (c) 2015,
524# Daniel Greenfeld, BSD license), where it is attributed to bottle (Copyright
525# (c) 2009-2022, Marcel Hellkamp, MIT license).
527class cached_property(object):
528 def __init__(self, func):
529 self.__doc__ = getattr(func, "__doc__")
530 self.func = func
532 def __get__(self, obj, cls):
533 if obj is None:
534 return self
536 value = obj.__dict__[self.func.__name__] = self.func(obj)
537 return value