Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/base.py: 39%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 Implements operations common to all high-level objects (File, etc.).
12"""
14from collections.abc import (
15 Mapping, MutableMapping, KeysView, ValuesView, ItemsView
16)
17import os
18import posixpath
20import numpy as np
22# The high-level interface is serialized; every public API function & method
23# is wrapped in a lock. We reuse the low-level lock because (1) it's fast,
24# and (2) it eliminates the possibility of deadlocks due to out-of-order
25# lock acquisition.
26from .._objects import phil, with_phil
27from .. import h5d, h5i, h5r, h5p, h5f, h5t, h5s
28from .compat import fspath, filename_encode
31def is_hdf5(fname):
32 """ Determine if a file is valid HDF5 (False if it doesn't exist). """
33 with phil:
34 fname = os.path.abspath(fspath(fname))
36 if os.path.isfile(fname):
37 return h5f.is_hdf5(filename_encode(fname))
38 return False
41def find_item_type(data):
42 """Find the item type of a simple object or collection of objects.
44 E.g. [[['a']]] -> str
46 The focus is on collections where all items have the same type; we'll return
47 None if that's not the case.
49 The aim is to treat numpy arrays of Python objects like normal Python
50 collections, while treating arrays with specific dtypes differently.
51 We're also only interested in array-like collections - lists and tuples,
52 possibly nested - not things like sets or dicts.
53 """
54 if isinstance(data, np.ndarray):
55 if (
56 data.dtype.kind == 'O'
57 and not h5t.check_string_dtype(data.dtype)
58 and not h5t.check_vlen_dtype(data.dtype)
59 ):
60 item_types = {type(e) for e in data.flat}
61 else:
62 return None
63 elif isinstance(data, (list, tuple)):
64 item_types = {find_item_type(e) for e in data}
65 else:
66 return type(data)
68 if len(item_types) != 1:
69 return None
70 return item_types.pop()
73def guess_dtype(data):
74 """ Attempt to guess an appropriate dtype for the object, returning None
75 if nothing is appropriate (or if it should be left up the the array
76 constructor to figure out)
77 """
78 with phil:
79 if isinstance(data, h5r.RegionReference):
80 return h5t.regionref_dtype
81 if isinstance(data, h5r.Reference):
82 return h5t.ref_dtype
84 item_type = find_item_type(data)
86 if item_type is bytes:
87 return h5t.string_dtype(encoding='ascii')
88 if item_type is str:
89 return h5t.string_dtype()
91 return None
94def is_float16_dtype(dt):
95 if dt is None:
96 return False
98 dt = np.dtype(dt) # normalize strings -> np.dtype objects
99 return dt.kind == 'f' and dt.itemsize == 2
102def array_for_new_object(data, specified_dtype=None):
103 """Prepare an array from data used to create a new dataset or attribute"""
105 # We mostly let HDF5 convert data as necessary when it's written.
106 # But if we are going to a float16 datatype, pre-convert in python
107 # to workaround a bug in the conversion.
108 # https://github.com/h5py/h5py/issues/819
109 if is_float16_dtype(specified_dtype):
110 as_dtype = specified_dtype
111 elif not isinstance(data, np.ndarray) and (specified_dtype is not None):
112 # If we need to convert e.g. a list to an array, don't leave numpy
113 # to guess a dtype we already know.
114 as_dtype = specified_dtype
115 else:
116 as_dtype = guess_dtype(data)
118 data = np.asarray(data, order="C", dtype=as_dtype)
120 # In most cases, this does nothing. But if data was already an array,
121 # and as_dtype is a tagged h5py dtype (e.g. for an object array of strings),
122 # asarray() doesn't replace its dtype object. This gives it the tagged dtype:
123 if as_dtype is not None:
124 data = data.view(dtype=as_dtype)
126 return data
129def default_lapl():
130 """ Default link access property list """
131 return None
134def default_lcpl():
135 """ Default link creation property list """
136 lcpl = h5p.create(h5p.LINK_CREATE)
137 lcpl.set_create_intermediate_group(True)
138 return lcpl
140dlapl = default_lapl()
141dlcpl = default_lcpl()
144def is_empty_dataspace(obj):
145 """ Check if an object's dataspace is empty """
146 if obj.get_space().get_simple_extent_type() == h5s.NULL:
147 return True
148 return False
151class CommonStateObject:
153 """
154 Mixin class that allows sharing information between objects which
155 reside in the same HDF5 file. Requires that the host class have
156 a ".id" attribute which returns a low-level ObjectID subclass.
158 Also implements Unicode operations.
159 """
161 @property
162 def _lapl(self):
163 """ Fetch the link access property list appropriate for this object
164 """
165 return dlapl
167 @property
168 def _lcpl(self):
169 """ Fetch the link creation property list appropriate for this object
170 """
171 return dlcpl
173 def _e(self, name, lcpl=None):
174 """ Encode a name according to the current file settings.
176 Returns name, or 2-tuple (name, lcpl) if lcpl is True
178 - Binary strings are always passed as-is, h5t.CSET_ASCII
179 - Unicode strings are encoded utf8, h5t.CSET_UTF8
181 If name is None, returns either None or (None, None) appropriately.
182 """
183 def get_lcpl(coding):
184 """ Create an appropriate link creation property list """
185 lcpl = self._lcpl.copy()
186 lcpl.set_char_encoding(coding)
187 return lcpl
189 if name is None:
190 return (None, None) if lcpl else None
192 if isinstance(name, bytes):
193 coding = h5t.CSET_ASCII
194 elif isinstance(name, str):
195 try:
196 name = name.encode('ascii')
197 coding = h5t.CSET_ASCII
198 except UnicodeEncodeError:
199 name = name.encode('utf8')
200 coding = h5t.CSET_UTF8
201 else:
202 raise TypeError(f"A name should be string or bytes, not {type(name)}")
204 if lcpl:
205 return name, get_lcpl(coding)
206 return name
208 def _d(self, name):
209 """ Decode a name according to the current file settings.
211 - Try to decode utf8
212 - Failing that, return the byte string
214 If name is None, returns None.
215 """
216 if name is None:
217 return None
219 try:
220 return name.decode('utf8')
221 except UnicodeDecodeError:
222 pass
223 return name
226class _RegionProxy:
228 """
229 Proxy object which handles region references.
231 To create a new region reference (datasets only), use slicing syntax:
233 >>> newref = obj.regionref[0:10:2]
235 To determine the target dataset shape from an existing reference:
237 >>> shape = obj.regionref.shape(existingref)
239 where <obj> may be any object in the file. To determine the shape of
240 the selection in use on the target dataset:
242 >>> selection_shape = obj.regionref.selection(existingref)
243 """
245 def __init__(self, obj):
246 self.obj = obj
247 self.id = obj.id
249 def __getitem__(self, args):
250 if not isinstance(self.id, h5d.DatasetID):
251 raise TypeError("Region references can only be made to datasets")
252 from . import selections
253 with phil:
254 selection = selections.select(self.id.shape, args, dataset=self.obj)
255 return h5r.create(self.id, b'.', h5r.DATASET_REGION, selection.id)
257 def shape(self, ref):
258 """ Get the shape of the target dataspace referred to by *ref*. """
259 with phil:
260 sid = h5r.get_region(ref, self.id)
261 return sid.shape
263 def selection(self, ref):
264 """ Get the shape of the target dataspace selection referred to by *ref*
265 """
266 from . import selections
267 with phil:
268 sid = h5r.get_region(ref, self.id)
269 return selections.guess_shape(sid)
272class HLObject(CommonStateObject):
274 """
275 Base class for high-level interface objects.
276 """
278 @property
279 def file(self):
280 """ Return a File instance associated with this object """
281 from . import files
282 with phil:
283 return files.File(self.id)
285 @property
286 @with_phil
287 def name(self):
288 """ Return the full name of this object. None if anonymous. """
289 return self._d(h5i.get_name(self.id))
291 @property
292 @with_phil
293 def parent(self):
294 """Return the parent group of this object.
296 This is always equivalent to obj.file[posixpath.dirname(obj.name)].
297 ValueError if this object is anonymous.
298 """
299 if self.name is None:
300 raise ValueError("Parent of an anonymous object is undefined")
301 return self.file[posixpath.dirname(self.name)]
303 @property
304 @with_phil
305 def id(self):
306 """ Low-level identifier appropriate for this object """
307 return self._id
309 @property
310 @with_phil
311 def ref(self):
312 """ An (opaque) HDF5 reference to this object """
313 return h5r.create(self.id, b'.', h5r.OBJECT)
315 @property
316 @with_phil
317 def regionref(self):
318 """Create a region reference (Datasets only).
320 The syntax is regionref[<slices>]. For example, dset.regionref[...]
321 creates a region reference in which the whole dataset is selected.
323 Can also be used to determine the shape of the referenced dataset
324 (via .shape property), or the shape of the selection (via the
325 .selection property).
326 """
327 return _RegionProxy(self)
329 @property
330 def attrs(self):
331 """ Attributes attached to this object """
332 from . import attrs
333 with phil:
334 return attrs.AttributeManager(self)
336 @with_phil
337 def __init__(self, oid):
338 """ Setup this object, given its low-level identifier """
339 self._id = oid
341 @with_phil
342 def __hash__(self):
343 return hash(self.id)
345 @with_phil
346 def __eq__(self, other):
347 if hasattr(other, 'id'):
348 return self.id == other.id
349 return NotImplemented
351 def __bool__(self):
352 with phil:
353 return bool(self.id)
354 __nonzero__ = __bool__
356 def __getnewargs__(self):
357 """Disable pickle.
359 Handles for HDF5 objects can't be reliably deserialised, because the
360 recipient may not have access to the same files. So we do this to
361 fail early.
363 If you really want to pickle h5py objects and can live with some
364 limitations, look at the h5pickle project on PyPI.
365 """
366 raise TypeError("h5py objects cannot be pickled")
368 def __getstate__(self):
369 # Pickle protocols 0 and 1 use this instead of __getnewargs__
370 raise TypeError("h5py objects cannot be pickled")
372# --- Dictionary-style interface ----------------------------------------------
374# To implement the dictionary-style interface from groups and attributes,
375# we inherit from the appropriate abstract base classes in collections.
376#
377# All locking is taken care of by the subclasses.
378# We have to override ValuesView and ItemsView here because Group and
379# AttributeManager can only test for key names.
382class KeysViewHDF5(KeysView):
383 def __str__(self):
384 return "<KeysViewHDF5 {}>".format(list(self))
386 def __reversed__(self):
387 yield from reversed(self._mapping)
389 __repr__ = __str__
391class ValuesViewHDF5(ValuesView):
393 """
394 Wraps e.g. a Group or AttributeManager to provide a value view.
396 Note that __contains__ will have poor performance as it has
397 to scan all the links or attributes.
398 """
400 def __contains__(self, value):
401 with phil:
402 for key in self._mapping:
403 if value == self._mapping.get(key):
404 return True
405 return False
407 def __iter__(self):
408 with phil:
409 for key in self._mapping:
410 yield self._mapping.get(key)
412 def __reversed__(self):
413 with phil:
414 for key in reversed(self._mapping):
415 yield self._mapping.get(key)
418class ItemsViewHDF5(ItemsView):
420 """
421 Wraps e.g. a Group or AttributeManager to provide an items view.
422 """
424 def __contains__(self, item):
425 with phil:
426 key, val = item
427 if key in self._mapping:
428 return val == self._mapping.get(key)
429 return False
431 def __iter__(self):
432 with phil:
433 for key in self._mapping:
434 yield (key, self._mapping.get(key))
436 def __reversed__(self):
437 with phil:
438 for key in reversed(self._mapping):
439 yield (key, self._mapping.get(key))
442class MappingHDF5(Mapping):
444 """
445 Wraps a Group, AttributeManager or DimensionManager object to provide
446 an immutable mapping interface.
448 We don't inherit directly from MutableMapping because certain
449 subclasses, for example DimensionManager, are read-only.
450 """
451 def keys(self):
452 """ Get a view object on member names """
453 return KeysViewHDF5(self)
455 def values(self):
456 """ Get a view object on member objects """
457 return ValuesViewHDF5(self)
459 def items(self):
460 """ Get a view object on member items """
461 return ItemsViewHDF5(self)
463 def _ipython_key_completions_(self):
464 """ Custom tab completions for __getitem__ in IPython >=5.0. """
465 return sorted(self.keys())
468class MutableMappingHDF5(MappingHDF5, MutableMapping):
470 """
471 Wraps a Group or AttributeManager object to provide a mutable
472 mapping interface, in contrast to the read-only mapping of
473 MappingHDF5.
474 """
476 pass
479class Empty:
481 """
482 Proxy object to represent empty/null dataspaces (a.k.a H5S_NULL).
484 This can have an associated dtype, but has no shape or data. This is not
485 the same as an array with shape (0,).
486 """
487 shape = None
488 size = None
490 def __init__(self, dtype):
491 self.dtype = np.dtype(dtype)
493 def __eq__(self, other):
494 if isinstance(other, Empty) and self.dtype == other.dtype:
495 return True
496 return False
498 def __repr__(self):
499 return "Empty(dtype={0!r})".format(self.dtype)
502def product(nums):
503 """Calculate a numeric product
505 For small amounts of data (e.g. shape tuples), this simple code is much
506 faster than calling numpy.prod().
507 """
508 prod = 1
509 for n in nums:
510 prod *= n
511 return prod
514# Simple variant of cached_property:
515# Unlike functools, this has no locking, so we don't have to worry about
516# deadlocks with phil (see issue gh-2064). Unlike cached-property on PyPI, it
517# doesn't try to import asyncio (which can be ~100 extra modules).
518# Many projects seem to have similar variants of this, often without attribution,
519# but to be cautious, this code comes from cached-property (Copyright (c) 2015,
520# Daniel Greenfeld, BSD license), where it is attributed to bottle (Copyright
521# (c) 2009-2022, Marcel Hellkamp, MIT license).
523class cached_property:
524 def __init__(self, func):
525 self.__doc__ = getattr(func, "__doc__")
526 self.func = func
528 def __get__(self, obj, cls):
529 if obj is None:
530 return self
532 value = obj.__dict__[self.func.__name__] = self.func(obj)
533 return value