Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/h5py/_hl/files.py: 26%
273 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# This file is part of h5py, a Python interface to the HDF5 library.
2#
3# http://www.h5py.org
4#
5# Copyright 2008-2013 Andrew Collette and contributors
6#
7# License: Standard 3-clause BSD; see "license.txt" for full license terms
8# and contributor agreement.
10"""
11 Implements high-level support for HDF5 file objects.
12"""
14import sys
15import os
16from warnings import warn
18from .compat import filename_decode, filename_encode
20from .base import phil, with_phil
21from .group import Group
22from .. import h5, h5f, h5p, h5i, h5fd, _objects
23from .. import version
25mpi = h5.get_config().mpi
26ros3 = h5.get_config().ros3
27direct_vfd = h5.get_config().direct_vfd
28hdf5_version = version.hdf5_version_tuple[0:3]
30swmr_support = True
33libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST,
34 'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110}
35libver_dict_r = dict((y, x) for x, y in libver_dict.items())
37if hdf5_version >= (1, 11, 4):
38 libver_dict.update({'v112': h5f.LIBVER_V112})
39 libver_dict_r.update({h5f.LIBVER_V112: 'v112'})
41if hdf5_version >= (1, 13, 0):
42 libver_dict.update({'v114': h5f.LIBVER_V114})
43 libver_dict_r.update({h5f.LIBVER_V114: 'v114'})
46def _set_fapl_mpio(plist, **kwargs):
47 """Set file access property list for mpio driver"""
48 if not mpi:
49 raise ValueError("h5py was built without MPI support, can't use mpio driver")
51 import mpi4py.MPI
52 kwargs.setdefault('info', mpi4py.MPI.Info())
53 plist.set_fapl_mpio(**kwargs)
56def _set_fapl_fileobj(plist, **kwargs):
57 """Set the Python file object driver in a file access property list"""
58 plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj'))
61_drivers = {
62 'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs),
63 'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs),
64 'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs),
65 'family': lambda plist, **kwargs: plist.set_fapl_family(
66 memb_fapl=plist.copy(),
67 **kwargs
68 ),
69 'mpio': _set_fapl_mpio,
70 'fileobj': _set_fapl_fileobj,
71 'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs),
72}
74if ros3:
75 _drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs)
77if direct_vfd:
78 _drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs) # noqa
81def register_driver(name, set_fapl):
82 """Register a custom driver.
84 Parameters
85 ----------
86 name : str
87 The name of the driver.
88 set_fapl : callable[PropFAID, **kwargs] -> NoneType
89 The function to set the fapl to use your custom driver.
90 """
91 _drivers[name] = set_fapl
94def unregister_driver(name):
95 """Unregister a custom driver.
97 Parameters
98 ----------
99 name : str
100 The name of the driver.
101 """
102 del _drivers[name]
105def registered_drivers():
106 """Return a frozenset of the names of all of the registered drivers.
107 """
108 return frozenset(_drivers)
111def make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, locking,
112 page_buf_size, min_meta_keep, min_raw_keep,
113 alignment_threshold, alignment_interval, meta_block_size,
114 **kwds):
115 """ Set up a file access property list """
116 plist = h5p.create(h5p.FILE_ACCESS)
118 if libver is not None:
119 if libver in libver_dict:
120 low = libver_dict[libver]
121 high = h5f.LIBVER_LATEST
122 else:
123 low, high = (libver_dict[x] for x in libver)
124 else:
125 # we default to earliest
126 low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST
127 plist.set_libver_bounds(low, high)
128 plist.set_alignment(alignment_threshold, alignment_interval)
130 cache_settings = list(plist.get_cache())
131 if rdcc_nslots is not None:
132 cache_settings[1] = rdcc_nslots
133 if rdcc_nbytes is not None:
134 cache_settings[2] = rdcc_nbytes
135 if rdcc_w0 is not None:
136 cache_settings[3] = rdcc_w0
137 plist.set_cache(*cache_settings)
139 if page_buf_size:
140 plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep),
141 int(min_raw_keep))
143 if meta_block_size is not None:
144 plist.set_meta_block_size(int(meta_block_size))
146 if locking is not None:
147 if hdf5_version < (1, 12, 1) and (hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7):
148 raise ValueError(
149 "HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking.")
151 if locking in ("false", False):
152 plist.set_file_locking(False, ignore_when_disabled=False)
153 elif locking in ("true", True):
154 plist.set_file_locking(True, ignore_when_disabled=False)
155 elif locking == "best-effort":
156 plist.set_file_locking(True, ignore_when_disabled=True)
157 else:
158 raise ValueError(f"Unsupported locking value: {locking}")
160 if driver is None or (driver == 'windows' and sys.platform == 'win32'):
161 # Prevent swallowing unused key arguments
162 if kwds:
163 msg = "'{key}' is an invalid keyword argument for this function" \
164 .format(key=next(iter(kwds)))
165 raise TypeError(msg)
166 return plist
168 try:
169 set_fapl = _drivers[driver]
170 except KeyError:
171 raise ValueError('Unknown driver type "%s"' % driver)
172 else:
173 if driver == 'ros3':
174 token = kwds.pop('session_token', None)
175 set_fapl(plist, **kwds)
176 if token:
177 if hdf5_version < (1, 14, 2):
178 raise ValueError('HDF5 >= 1.14.2 required for AWS session token')
179 plist.set_fapl_ros3_token(token)
180 else:
181 set_fapl(plist, **kwds)
183 return plist
186def make_fcpl(track_order=False, fs_strategy=None, fs_persist=False,
187 fs_threshold=1, fs_page_size=None):
188 """ Set up a file creation property list """
189 if track_order or fs_strategy:
190 plist = h5p.create(h5p.FILE_CREATE)
191 if track_order:
192 plist.set_link_creation_order(
193 h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
194 plist.set_attr_creation_order(
195 h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
196 if fs_strategy:
197 strategies = {
198 'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR,
199 'page': h5f.FSPACE_STRATEGY_PAGE,
200 'aggregate': h5f.FSPACE_STRATEGY_AGGR,
201 'none': h5f.FSPACE_STRATEGY_NONE
202 }
203 fs_strat_num = strategies.get(fs_strategy, -1)
204 if fs_strat_num == -1:
205 raise ValueError("Invalid file space strategy type")
207 plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold)
208 if fs_page_size and fs_strategy == 'page':
209 plist.set_file_space_page_size(int(fs_page_size))
210 else:
211 plist = None
212 return plist
215def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False):
216 """ Get a new FileID by opening or creating a file.
217 Also validates mode argument."""
219 if userblock_size is not None:
220 if mode in ('r', 'r+'):
221 raise ValueError("User block may only be specified "
222 "when creating a file")
223 try:
224 userblock_size = int(userblock_size)
225 except (TypeError, ValueError):
226 raise ValueError("User block size must be an integer")
227 if fcpl is None:
228 fcpl = h5p.create(h5p.FILE_CREATE)
229 fcpl.set_userblock(userblock_size)
231 if mode == 'r':
232 flags = h5f.ACC_RDONLY
233 if swmr and swmr_support:
234 flags |= h5f.ACC_SWMR_READ
235 fid = h5f.open(name, flags, fapl=fapl)
236 elif mode == 'r+':
237 fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
238 elif mode in ['w-', 'x']:
239 fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
240 elif mode == 'w':
241 fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl)
242 elif mode == 'a':
243 # Open in append mode (read/write).
244 # If that fails, create a new file only if it won't clobber an
245 # existing one (ACC_EXCL)
246 try:
247 fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
248 # Not all drivers raise FileNotFoundError (commented those that do not)
249 except FileNotFoundError if fapl.get_driver() in (
250 h5fd.SEC2,
251 h5fd.DIRECT if direct_vfd else -1,
252 # h5fd.STDIO,
253 # h5fd.CORE,
254 h5fd.FAMILY,
255 h5fd.WINDOWS,
256 # h5fd.MPIO,
257 # h5fd.MPIPOSIX,
258 h5fd.fileobj_driver,
259 h5fd.ROS3D if ros3 else -1,
260 ) else OSError:
261 fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
262 else:
263 raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a")
265 try:
266 if userblock_size is not None:
267 existing_fcpl = fid.get_create_plist()
268 if existing_fcpl.get_userblock() != userblock_size:
269 raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock()))
270 except Exception as e:
271 fid.close()
272 raise e
274 return fid
277class File(Group):
279 """
280 Represents an HDF5 file.
281 """
283 @property
284 def attrs(self):
285 """ Attributes attached to this object """
286 # hdf5 complains that a file identifier is an invalid location for an
287 # attribute. Instead of self, pass the root group to AttributeManager:
288 from . import attrs
289 with phil:
290 return attrs.AttributeManager(self['/'])
292 @property
293 @with_phil
294 def filename(self):
295 """File name on disk"""
296 return filename_decode(h5f.get_name(self.id))
298 @property
299 @with_phil
300 def driver(self):
301 """Low-level HDF5 file driver used to open file"""
302 drivers = {h5fd.SEC2: 'sec2',
303 h5fd.STDIO: 'stdio',
304 h5fd.CORE: 'core',
305 h5fd.FAMILY: 'family',
306 h5fd.WINDOWS: 'windows',
307 h5fd.MPIO: 'mpio',
308 h5fd.MPIPOSIX: 'mpiposix',
309 h5fd.fileobj_driver: 'fileobj'}
310 if ros3:
311 drivers[h5fd.ROS3D] = 'ros3'
312 if direct_vfd:
313 drivers[h5fd.DIRECT] = 'direct'
314 return drivers.get(self.id.get_access_plist().get_driver(), 'unknown')
316 @property
317 @with_phil
318 def mode(self):
319 """ Python mode used to open file """
320 write_intent = h5f.ACC_RDWR
321 if swmr_support:
322 write_intent |= h5f.ACC_SWMR_WRITE
323 return 'r+' if self.id.get_intent() & write_intent else 'r'
325 @property
326 @with_phil
327 def libver(self):
328 """File format version bounds (2-tuple: low, high)"""
329 bounds = self.id.get_access_plist().get_libver_bounds()
330 return tuple(libver_dict_r[x] for x in bounds)
332 @property
333 @with_phil
334 def userblock_size(self):
335 """ User block size (in bytes) """
336 fcpl = self.id.get_create_plist()
337 return fcpl.get_userblock()
339 @property
340 @with_phil
341 def meta_block_size(self):
342 """ Meta block size (in bytes) """
343 fapl = self.id.get_access_plist()
344 return fapl.get_meta_block_size()
346 if mpi:
348 @property
349 @with_phil
350 def atomic(self):
351 """ Set/get MPI-IO atomic mode
352 """
353 return self.id.get_mpi_atomicity()
355 @atomic.setter
356 @with_phil
357 def atomic(self, value):
358 # pylint: disable=missing-docstring
359 self.id.set_mpi_atomicity(value)
361 @property
362 @with_phil
363 def swmr_mode(self):
364 """ Controls single-writer multiple-reader mode """
365 return swmr_support and bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE))
367 @swmr_mode.setter
368 @with_phil
369 def swmr_mode(self, value):
370 # pylint: disable=missing-docstring
371 if value:
372 self.id.start_swmr_write()
373 else:
374 raise ValueError("It is not possible to forcibly switch SWMR mode off.")
376 def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False,
377 rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None,
378 fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None,
379 page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None,
380 alignment_threshold=1, alignment_interval=1, meta_block_size=None, **kwds):
381 """Create a new file object.
383 See the h5py user guide for a detailed explanation of the options.
385 name
386 Name of the file on disk, or file-like object. Note: for files
387 created with the 'core' driver, HDF5 still requires this be
388 non-empty.
389 mode
390 r Readonly, file must exist (default)
391 r+ Read/write, file must exist
392 w Create file, truncate if exists
393 w- or x Create file, fail if exists
394 a Read/write if exists, create otherwise
395 driver
396 Name of the driver to use. Legal values are None (default,
397 recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'.
398 libver
399 Library version bounds. Supported values: 'earliest', 'v108',
400 'v110', 'v112' and 'latest'. The 'v108', 'v110' and 'v112'
401 options can only be specified with the HDF5 1.10.2 library or later.
402 userblock_size
403 Desired size of user block. Only allowed when creating a new
404 file (mode w, w- or x).
405 swmr
406 Open the file in SWMR read mode. Only used when mode = 'r'.
407 rdcc_nbytes
408 Total size of the dataset chunk cache in bytes. The default size
409 is 1024**2 (1 MiB) per dataset. Applies to all datasets unless individually changed.
410 rdcc_w0
411 The chunk preemption policy for all datasets. This must be
412 between 0 and 1 inclusive and indicates the weighting according to
413 which chunks which have been fully read or written are penalized
414 when determining which chunks to flush from cache. A value of 0
415 means fully read or written chunks are treated no differently than
416 other chunks (the preemption is strictly LRU) while a value of 1
417 means fully read or written chunks are always preempted before
418 other chunks. If your application only reads or writes data once,
419 this can be safely set to 1. Otherwise, this should be set lower
420 depending on how often you re-read or re-write the same data. The
421 default value is 0.75. Applies to all datasets unless individually changed.
422 rdcc_nslots
423 The number of chunk slots in the raw data chunk cache for this
424 file. Increasing this value reduces the number of cache collisions,
425 but slightly increases the memory used. Due to the hashing
426 strategy, this value should ideally be a prime number. As a rule of
427 thumb, this value should be at least 10 times the number of chunks
428 that can fit in rdcc_nbytes bytes. For maximum performance, this
429 value should be set approximately 100 times that number of
430 chunks. The default value is 521. Applies to all datasets unless individually changed.
431 track_order
432 Track dataset/group/attribute creation order under root group
433 if True. If None use global default h5.get_config().track_order.
434 fs_strategy
435 The file space handling strategy to be used. Only allowed when
436 creating a new file (mode w, w- or x). Defined as:
437 "fsm" FSM, Aggregators, VFD
438 "page" Paged FSM, VFD
439 "aggregate" Aggregators, VFD
440 "none" VFD
441 If None use HDF5 defaults.
442 fs_page_size
443 File space page size in bytes. Only used when fs_strategy="page". If
444 None use the HDF5 default (4096 bytes).
445 fs_persist
446 A boolean value to indicate whether free space should be persistent
447 or not. Only allowed when creating a new file. The default value
448 is False.
449 fs_threshold
450 The smallest free-space section size that the free space manager
451 will track. Only allowed when creating a new file. The default
452 value is 1.
453 page_buf_size
454 Page buffer size in bytes. Only allowed for HDF5 files created with
455 fs_strategy="page". Must be a power of two value and greater or
456 equal than the file space page size when creating the file. It is
457 not used by default.
458 min_meta_keep
459 Minimum percentage of metadata to keep in the page buffer before
460 allowing pages containing metadata to be evicted. Applicable only if
461 page_buf_size is set. Default value is zero.
462 min_raw_keep
463 Minimum percentage of raw data to keep in the page buffer before
464 allowing pages containing raw data to be evicted. Applicable only if
465 page_buf_size is set. Default value is zero.
466 locking
467 The file locking behavior. Defined as:
469 - False (or "false") -- Disable file locking
470 - True (or "true") -- Enable file locking
471 - "best-effort" -- Enable file locking but ignore some errors
472 - None -- Use HDF5 defaults
474 .. warning::
476 The HDF5_USE_FILE_LOCKING environment variable can override
477 this parameter.
479 Only available with HDF5 >= 1.12.1 or 1.10.x >= 1.10.7.
481 alignment_threshold
482 Together with ``alignment_interval``, this property ensures that
483 any file object greater than or equal in size to the alignement
484 threshold (in bytes) will be aligned on an address which is a
485 multiple of alignment interval.
487 alignment_interval
488 This property should be used in conjunction with
489 ``alignment_threshold``. See the description above. For more
490 details, see
491 https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT
493 meta_block_size
494 Set the current minimum size, in bytes, of new metadata block allocations.
495 See https://portal.hdfgroup.org/display/HDF5/H5P_SET_META_BLOCK_SIZE
497 Additional keywords
498 Passed on to the selected file driver.
499 """
500 if driver == 'ros3':
501 if ros3:
502 from urllib.parse import urlparse
503 url = urlparse(name)
504 if url.scheme == 's3':
505 aws_region = kwds.get('aws_region', b'').decode('ascii')
506 if len(aws_region) == 0:
507 raise ValueError('AWS region required for s3:// location')
508 name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}'
509 elif url.scheme not in ('https', 'http'):
510 raise ValueError(f'{name}: S3 location must begin with '
511 'either "https://", "http://", or "s3://"')
512 else:
513 raise ValueError(
514 "h5py was built without ROS3 support, can't use ros3 driver")
516 if locking is not None and hdf5_version < (1, 12, 1) and (
517 hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7):
518 raise ValueError("HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking options.")
520 if isinstance(name, _objects.ObjectID):
521 if fs_strategy:
522 raise ValueError("Unable to set file space strategy of an existing file")
524 with phil:
525 fid = h5i.get_file_id(name)
526 else:
527 if hasattr(name, 'read') and hasattr(name, 'seek'):
528 if driver not in (None, 'fileobj'):
529 raise ValueError("Driver must be 'fileobj' for file-like object if specified.")
530 driver = 'fileobj'
531 if kwds.get('fileobj', name) != name:
532 raise ValueError("Invalid value of 'fileobj' argument; "
533 "must equal to file-like object if specified.")
534 kwds.update(fileobj=name)
535 name = repr(name).encode('ASCII', 'replace')
536 else:
537 name = filename_encode(name)
539 if track_order is None:
540 track_order = h5.get_config().track_order
542 if fs_strategy and mode not in ('w', 'w-', 'x'):
543 raise ValueError("Unable to set file space strategy of an existing file")
545 if swmr and mode != 'r':
546 warn(
547 "swmr=True only affects read ('r') mode. For swmr write "
548 "mode, set f.swmr_mode = True after opening the file.",
549 stacklevel=2,
550 )
552 with phil:
553 fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
554 locking, page_buf_size, min_meta_keep, min_raw_keep,
555 alignment_threshold=alignment_threshold,
556 alignment_interval=alignment_interval,
557 meta_block_size=meta_block_size,
558 **kwds)
559 fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy,
560 fs_persist=fs_persist, fs_threshold=fs_threshold,
561 fs_page_size=fs_page_size)
562 fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
564 if isinstance(libver, tuple):
565 self._libver = libver
566 else:
567 self._libver = (libver, 'latest')
569 super().__init__(fid)
571 def close(self):
572 """ Close the file. All open objects become invalid """
573 with phil:
574 # Check that the file is still open, otherwise skip
575 if self.id.valid:
576 # We have to explicitly murder all open objects related to the file
578 # Close file-resident objects first, then the files.
579 # Otherwise we get errors in MPI mode.
580 self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE)
581 self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE)
583 self.id.close()
584 _objects.nonlocal_close()
586 def flush(self):
587 """ Tell the HDF5 library to flush its buffers.
588 """
589 with phil:
590 h5f.flush(self.id)
592 @with_phil
593 def __enter__(self):
594 return self
596 @with_phil
597 def __exit__(self, *args):
598 if self.id:
599 self.close()
601 @with_phil
602 def __repr__(self):
603 if not self.id:
604 r = '<Closed HDF5 file>'
605 else:
606 # Filename has to be forced to Unicode if it comes back bytes
607 # Mode is always a "native" string
608 filename = self.filename
609 if isinstance(filename, bytes): # Can't decode fname
610 filename = filename.decode('utf8', 'replace')
611 r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>'
613 return r