Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/files.py: 26%
277 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:30 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:30 +0000
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 Implements high-level support for HDF5 file objects.
12"""
14import sys
15import os
16from warnings import warn
18from .compat import filename_decode, filename_encode
20from .base import phil, with_phil
21from .group import Group
22from .. import h5, h5f, h5p, h5i, h5fd, _objects
23from .. import version
25mpi = h5.get_config().mpi
26ros3 = h5.get_config().ros3
27direct_vfd = h5.get_config().direct_vfd
28hdf5_version = version.hdf5_version_tuple[0:3]
30swmr_support = False
31if hdf5_version >= h5.get_config().swmr_min_hdf5_version:
32 swmr_support = True
35libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST}
36libver_dict_r = dict((y, x) for x, y in libver_dict.items())
37if hdf5_version >= (1, 10, 2):
38 libver_dict.update({'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110})
39 libver_dict_r.update({h5f.LIBVER_V18: 'v108', h5f.LIBVER_V110: 'v110'})
41if hdf5_version >= (1, 11, 4):
42 libver_dict.update({'v112': h5f.LIBVER_V112})
43 libver_dict_r.update({h5f.LIBVER_V112: 'v112'})
45if hdf5_version >= (1, 13, 0):
46 libver_dict.update({'v114': h5f.LIBVER_V114})
47 libver_dict_r.update({h5f.LIBVER_V114: 'v114'})
50def _set_fapl_mpio(plist, **kwargs):
51 """Set file access property list for mpio driver"""
52 if not mpi:
53 raise ValueError("h5py was built without MPI support, can't use mpio driver")
55 import mpi4py.MPI
56 kwargs.setdefault('info', mpi4py.MPI.Info())
57 plist.set_fapl_mpio(**kwargs)
60def _set_fapl_fileobj(plist, **kwargs):
61 """Set the Python file object driver in a file access property list"""
62 plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj'))
65_drivers = {
66 'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs),
67 'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs),
68 'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs),
69 'family': lambda plist, **kwargs: plist.set_fapl_family(
70 memb_fapl=plist.copy(),
71 **kwargs
72 ),
73 'mpio': _set_fapl_mpio,
74 'fileobj': _set_fapl_fileobj,
75 'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs),
76}
78if ros3:
79 _drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs)
81if direct_vfd:
82 _drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs) # noqa
85def register_driver(name, set_fapl):
86 """Register a custom driver.
88 Parameters
89 ----------
90 name : str
91 The name of the driver.
92 set_fapl : callable[PropFAID, **kwargs] -> NoneType
93 The function to set the fapl to use your custom driver.
94 """
95 _drivers[name] = set_fapl
98def unregister_driver(name):
99 """Unregister a custom driver.
101 Parameters
102 ----------
103 name : str
104 The name of the driver.
105 """
106 del _drivers[name]
109def registered_drivers():
110 """Return a frozenset of the names of all of the registered drivers.
111 """
112 return frozenset(_drivers)
115def make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, locking,
116 page_buf_size, min_meta_keep, min_raw_keep,
117 alignment_threshold, alignment_interval, meta_block_size,
118 **kwds):
119 """ Set up a file access property list """
120 plist = h5p.create(h5p.FILE_ACCESS)
122 if libver is not None:
123 if libver in libver_dict:
124 low = libver_dict[libver]
125 high = h5f.LIBVER_LATEST
126 else:
127 low, high = (libver_dict[x] for x in libver)
128 else:
129 # we default to earliest
130 low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST
131 plist.set_libver_bounds(low, high)
132 plist.set_alignment(alignment_threshold, alignment_interval)
134 cache_settings = list(plist.get_cache())
135 if rdcc_nslots is not None:
136 cache_settings[1] = rdcc_nslots
137 if rdcc_nbytes is not None:
138 cache_settings[2] = rdcc_nbytes
139 if rdcc_w0 is not None:
140 cache_settings[3] = rdcc_w0
141 plist.set_cache(*cache_settings)
143 if page_buf_size:
144 plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep),
145 int(min_raw_keep))
147 if meta_block_size is not None:
148 plist.set_meta_block_size(int(meta_block_size))
150 if locking is not None:
151 if hdf5_version < (1, 12, 1) and (hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7):
152 raise ValueError(
153 "HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking.")
155 if locking in ("false", False):
156 plist.set_file_locking(False, ignore_when_disabled=False)
157 elif locking in ("true", True):
158 plist.set_file_locking(True, ignore_when_disabled=False)
159 elif locking == "best-effort":
160 plist.set_file_locking(True, ignore_when_disabled=True)
161 else:
162 raise ValueError(f"Unsupported locking value: {locking}")
164 if driver is None or (driver == 'windows' and sys.platform == 'win32'):
165 # Prevent swallowing unused key arguments
166 if kwds:
167 msg = "'{key}' is an invalid keyword argument for this function" \
168 .format(key=next(iter(kwds)))
169 raise TypeError(msg)
170 return plist
172 try:
173 set_fapl = _drivers[driver]
174 except KeyError:
175 raise ValueError('Unknown driver type "%s"' % driver)
176 else:
177 set_fapl(plist, **kwds)
179 return plist
182def make_fcpl(track_order=False, fs_strategy=None, fs_persist=False,
183 fs_threshold=1, fs_page_size=None):
184 """ Set up a file creation property list """
185 if track_order or fs_strategy:
186 plist = h5p.create(h5p.FILE_CREATE)
187 if track_order:
188 plist.set_link_creation_order(
189 h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
190 plist.set_attr_creation_order(
191 h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
192 if fs_strategy:
193 strategies = {
194 'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR,
195 'page': h5f.FSPACE_STRATEGY_PAGE,
196 'aggregate': h5f.FSPACE_STRATEGY_AGGR,
197 'none': h5f.FSPACE_STRATEGY_NONE
198 }
199 fs_strat_num = strategies.get(fs_strategy, -1)
200 if fs_strat_num == -1:
201 raise ValueError("Invalid file space strategy type")
203 plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold)
204 if fs_page_size and fs_strategy == 'page':
205 plist.set_file_space_page_size(int(fs_page_size))
206 else:
207 plist = None
208 return plist
211def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False):
212 """ Get a new FileID by opening or creating a file.
213 Also validates mode argument."""
215 if userblock_size is not None:
216 if mode in ('r', 'r+'):
217 raise ValueError("User block may only be specified "
218 "when creating a file")
219 try:
220 userblock_size = int(userblock_size)
221 except (TypeError, ValueError):
222 raise ValueError("User block size must be an integer")
223 if fcpl is None:
224 fcpl = h5p.create(h5p.FILE_CREATE)
225 fcpl.set_userblock(userblock_size)
227 if mode == 'r':
228 flags = h5f.ACC_RDONLY
229 if swmr and swmr_support:
230 flags |= h5f.ACC_SWMR_READ
231 fid = h5f.open(name, flags, fapl=fapl)
232 elif mode == 'r+':
233 fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
234 elif mode in ['w-', 'x']:
235 fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
236 elif mode == 'w':
237 fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl)
238 elif mode == 'a':
239 # Open in append mode (read/write).
240 # If that fails, create a new file only if it won't clobber an
241 # existing one (ACC_EXCL)
242 try:
243 fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
244 # Not all drivers raise FileNotFoundError (commented those that do not)
245 except FileNotFoundError if fapl.get_driver() in (
246 h5fd.SEC2,
247 h5fd.DIRECT if direct_vfd else -1,
248 # h5fd.STDIO,
249 # h5fd.CORE,
250 h5fd.FAMILY,
251 h5fd.WINDOWS,
252 # h5fd.MPIO,
253 # h5fd.MPIPOSIX,
254 h5fd.fileobj_driver,
255 h5fd.ROS3D if ros3 else -1,
256 ) else OSError:
257 fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
258 else:
259 raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a")
261 try:
262 if userblock_size is not None:
263 existing_fcpl = fid.get_create_plist()
264 if existing_fcpl.get_userblock() != userblock_size:
265 raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock()))
266 except Exception as e:
267 fid.close()
268 raise e
270 return fid
273class File(Group):
275 """
276 Represents an HDF5 file.
277 """
279 @property
280 def attrs(self):
281 """ Attributes attached to this object """
282 # hdf5 complains that a file identifier is an invalid location for an
283 # attribute. Instead of self, pass the root group to AttributeManager:
284 from . import attrs
285 with phil:
286 return attrs.AttributeManager(self['/'])
288 @property
289 @with_phil
290 def filename(self):
291 """File name on disk"""
292 return filename_decode(h5f.get_name(self.id))
294 @property
295 @with_phil
296 def driver(self):
297 """Low-level HDF5 file driver used to open file"""
298 drivers = {h5fd.SEC2: 'sec2',
299 h5fd.STDIO: 'stdio',
300 h5fd.CORE: 'core',
301 h5fd.FAMILY: 'family',
302 h5fd.WINDOWS: 'windows',
303 h5fd.MPIO: 'mpio',
304 h5fd.MPIPOSIX: 'mpiposix',
305 h5fd.fileobj_driver: 'fileobj'}
306 if ros3:
307 drivers[h5fd.ROS3D] = 'ros3'
308 if direct_vfd:
309 drivers[h5fd.DIRECT] = 'direct'
310 return drivers.get(self.id.get_access_plist().get_driver(), 'unknown')
312 @property
313 @with_phil
314 def mode(self):
315 """ Python mode used to open file """
316 write_intent = h5f.ACC_RDWR
317 if swmr_support:
318 write_intent |= h5f.ACC_SWMR_WRITE
319 return 'r+' if self.id.get_intent() & write_intent else 'r'
321 @property
322 @with_phil
323 def libver(self):
324 """File format version bounds (2-tuple: low, high)"""
325 bounds = self.id.get_access_plist().get_libver_bounds()
326 return tuple(libver_dict_r[x] for x in bounds)
328 @property
329 @with_phil
330 def userblock_size(self):
331 """ User block size (in bytes) """
332 fcpl = self.id.get_create_plist()
333 return fcpl.get_userblock()
335 @property
336 @with_phil
337 def meta_block_size(self):
338 """ Meta block size (in bytes) """
339 fapl = self.id.get_access_plist()
340 return fapl.get_meta_block_size()
342 if mpi and hdf5_version >= (1, 8, 9):
344 @property
345 @with_phil
346 def atomic(self):
347 """ Set/get MPI-IO atomic mode
348 """
349 return self.id.get_mpi_atomicity()
351 @atomic.setter
352 @with_phil
353 def atomic(self, value):
354 # pylint: disable=missing-docstring
355 self.id.set_mpi_atomicity(value)
357 @property
358 @with_phil
359 def swmr_mode(self):
360 """ Controls single-writer multiple-reader mode """
361 return swmr_support and bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE))
363 @swmr_mode.setter
364 @with_phil
365 def swmr_mode(self, value):
366 # pylint: disable=missing-docstring
367 if swmr_support:
368 if value:
369 self.id.start_swmr_write()
370 else:
371 raise ValueError("It is not possible to forcibly switch SWMR mode off.")
372 else:
373 raise RuntimeError('SWMR support is not available in HDF5 version {}.{}.{}.'.format(*hdf5_version))
375 def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False,
376 rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None,
377 fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None,
378 page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None,
379 alignment_threshold=1, alignment_interval=1, meta_block_size=None, **kwds):
380 """Create a new file object.
382 See the h5py user guide for a detailed explanation of the options.
384 name
385 Name of the file on disk, or file-like object. Note: for files
386 created with the 'core' driver, HDF5 still requires this be
387 non-empty.
388 mode
389 r Readonly, file must exist (default)
390 r+ Read/write, file must exist
391 w Create file, truncate if exists
392 w- or x Create file, fail if exists
393 a Read/write if exists, create otherwise
394 driver
395 Name of the driver to use. Legal values are None (default,
396 recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'.
397 libver
398 Library version bounds. Supported values: 'earliest', 'v108',
399 'v110', 'v112' and 'latest'. The 'v108', 'v110' and 'v112'
400 options can only be specified with the HDF5 1.10.2 library or later.
401 userblock_size
402 Desired size of user block. Only allowed when creating a new
403 file (mode w, w- or x).
404 swmr
405 Open the file in SWMR read mode. Only used when mode = 'r'.
406 rdcc_nbytes
407 Total size of the dataset chunk cache in bytes. The default size
408 is 1024**2 (1 MiB) per dataset. Applies to all datasets unless individually changed.
409 rdcc_w0
410 The chunk preemption policy for all datasets. This must be
411 between 0 and 1 inclusive and indicates the weighting according to
412 which chunks which have been fully read or written are penalized
413 when determining which chunks to flush from cache. A value of 0
414 means fully read or written chunks are treated no differently than
415 other chunks (the preemption is strictly LRU) while a value of 1
416 means fully read or written chunks are always preempted before
417 other chunks. If your application only reads or writes data once,
418 this can be safely set to 1. Otherwise, this should be set lower
419 depending on how often you re-read or re-write the same data. The
420 default value is 0.75. Applies to all datasets unless individually changed.
421 rdcc_nslots
422 The number of chunk slots in the raw data chunk cache for this
423 file. Increasing this value reduces the number of cache collisions,
424 but slightly increases the memory used. Due to the hashing
425 strategy, this value should ideally be a prime number. As a rule of
426 thumb, this value should be at least 10 times the number of chunks
427 that can fit in rdcc_nbytes bytes. For maximum performance, this
428 value should be set approximately 100 times that number of
429 chunks. The default value is 521. Applies to all datasets unless individually changed.
430 track_order
431 Track dataset/group/attribute creation order under root group
432 if True. If None use global default h5.get_config().track_order.
433 fs_strategy
434 The file space handling strategy to be used. Only allowed when
435 creating a new file (mode w, w- or x). Defined as:
436 "fsm" FSM, Aggregators, VFD
437 "page" Paged FSM, VFD
438 "aggregate" Aggregators, VFD
439 "none" VFD
440 If None use HDF5 defaults.
441 fs_page_size
442 File space page size in bytes. Only used when fs_strategy="page". If
443 None use the HDF5 default (4096 bytes).
444 fs_persist
445 A boolean value to indicate whether free space should be persistent
446 or not. Only allowed when creating a new file. The default value
447 is False.
448 fs_threshold
449 The smallest free-space section size that the free space manager
450 will track. Only allowed when creating a new file. The default
451 value is 1.
452 page_buf_size
453 Page buffer size in bytes. Only allowed for HDF5 files created with
454 fs_strategy="page". Must be a power of two value and greater or
455 equal than the file space page size when creating the file. It is
456 not used by default.
457 min_meta_keep
458 Minimum percentage of metadata to keep in the page buffer before
459 allowing pages containing metadata to be evicted. Applicable only if
460 page_buf_size is set. Default value is zero.
461 min_raw_keep
462 Minimum percentage of raw data to keep in the page buffer before
463 allowing pages containing raw data to be evicted. Applicable only if
464 page_buf_size is set. Default value is zero.
465 locking
466 The file locking behavior. Defined as:
468 - False (or "false") -- Disable file locking
469 - True (or "true") -- Enable file locking
470 - "best-effort" -- Enable file locking but ignore some errors
471 - None -- Use HDF5 defaults
473 .. warning::
475 The HDF5_USE_FILE_LOCKING environment variable can override
476 this parameter.
478 Only available with HDF5 >= 1.12.1 or 1.10.x >= 1.10.7.
480 alignment_threshold
481 Together with ``alignment_interval``, this property ensures that
482 any file object greater than or equal in size to the alignement
483 threshold (in bytes) will be aligned on an address which is a
484 multiple of alignment interval.
486 alignment_interval
487 This property should be used in conjunction with
488 ``alignment_threshold``. See the description above. For more
489 details, see
490 https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT
492 meta_block_size
493 Set the current minimum size, in bytes, of new metadata block allocations.
494 See https://portal.hdfgroup.org/display/HDF5/H5P_SET_META_BLOCK_SIZE
496 Additional keywords
497 Passed on to the selected file driver.
498 """
499 if (fs_strategy or page_buf_size) and hdf5_version < (1, 10, 1):
500 raise ValueError("HDF5 version 1.10.1 or greater required for file space strategy or page buffering support.")
502 if swmr and not swmr_support:
503 raise ValueError("The SWMR feature is not available in this version of the HDF5 library")
505 if driver == 'ros3':
506 if ros3:
507 from urllib.parse import urlparse
508 url = urlparse(name)
509 if url.scheme == 's3':
510 aws_region = kwds.get('aws_region', b'').decode('ascii')
511 if len(aws_region) == 0:
512 raise ValueError('AWS region required for s3:// location')
513 name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}'
514 elif url.scheme not in ('https', 'http'):
515 raise ValueError(f'{name}: S3 location must begin with '
516 'either "https://", "http://", or "s3://"')
517 else:
518 raise ValueError(
519 "h5py was built without ROS3 support, can't use ros3 driver")
521 if locking is not None and hdf5_version < (1, 12, 1) and (
522 hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7):
523 raise ValueError("HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking options.")
525 if isinstance(name, _objects.ObjectID):
526 if fs_strategy:
527 raise ValueError("Unable to set file space strategy of an existing file")
529 with phil:
530 fid = h5i.get_file_id(name)
531 else:
532 if hasattr(name, 'read') and hasattr(name, 'seek'):
533 if driver not in (None, 'fileobj'):
534 raise ValueError("Driver must be 'fileobj' for file-like object if specified.")
535 driver = 'fileobj'
536 if kwds.get('fileobj', name) != name:
537 raise ValueError("Invalid value of 'fileobj' argument; "
538 "must equal to file-like object if specified.")
539 kwds.update(fileobj=name)
540 name = repr(name).encode('ASCII', 'replace')
541 else:
542 name = filename_encode(name)
544 if track_order is None:
545 track_order = h5.get_config().track_order
547 if fs_strategy and mode not in ('w', 'w-', 'x'):
548 raise ValueError("Unable to set file space strategy of an existing file")
550 if swmr and mode != 'r':
551 warn(
552 "swmr=True only affects read ('r') mode. For swmr write "
553 "mode, set f.swmr_mode = True after opening the file.",
554 stacklevel=2,
555 )
557 with phil:
558 fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
559 locking, page_buf_size, min_meta_keep, min_raw_keep,
560 alignment_threshold=alignment_threshold,
561 alignment_interval=alignment_interval,
562 meta_block_size=meta_block_size,
563 **kwds)
564 fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy,
565 fs_persist=fs_persist, fs_threshold=fs_threshold,
566 fs_page_size=fs_page_size)
567 fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
569 if isinstance(libver, tuple):
570 self._libver = libver
571 else:
572 self._libver = (libver, 'latest')
574 super().__init__(fid)
576 def close(self):
577 """ Close the file. All open objects become invalid """
578 with phil:
579 # Check that the file is still open, otherwise skip
580 if self.id.valid:
581 # We have to explicitly murder all open objects related to the file
583 # Close file-resident objects first, then the files.
584 # Otherwise we get errors in MPI mode.
585 self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE)
586 self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE)
588 self.id.close()
589 _objects.nonlocal_close()
591 def flush(self):
592 """ Tell the HDF5 library to flush its buffers.
593 """
594 with phil:
595 h5f.flush(self.id)
597 @with_phil
598 def __enter__(self):
599 return self
601 @with_phil
602 def __exit__(self, *args):
603 if self.id:
604 self.close()
606 @with_phil
607 def __repr__(self):
608 if not self.id:
609 r = '<Closed HDF5 file>'
610 else:
611 # Filename has to be forced to Unicode if it comes back bytes
612 # Mode is always a "native" string
613 filename = self.filename
614 if isinstance(filename, bytes): # Can't decode fname
615 filename = filename.decode('utf8', 'replace')
616 r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>'
618 return r