Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/attributeset.py: 20%
236 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Here is defined the AttributeSet class."""
3import re
4import warnings
5import pickle
6import numpy as np
8from . import hdf5extension
9from .utils import SizeType
10from .registry import class_name_dict
11from .exceptions import ClosedNodeError, FiltersWarning, PerformanceWarning
12from .path import check_attribute_name
13from .undoredo import attr_to_shadow
14from .filters import Filters
17# System attributes
18SYS_ATTRS = ["CLASS", "VERSION", "TITLE", "NROWS", "EXTDIM",
19 "ENCODING", "PYTABLES_FORMAT_VERSION",
20 "FLAVOR", "FILTERS", "AUTO_INDEX",
21 "DIRTY", "NODE_TYPE", "NODE_TYPE_VERSION",
22 "PSEUDOATOM"]
23# Prefixes of other system attributes
24SYS_ATTRS_PREFIXES = ["FIELD_"]
25# RO_ATTRS will be disabled and let the user modify them if they
26# want to. The user is still not allowed to remove or rename
27# system attributes. Francesc Alted 2004-12-19
28# Read-only attributes:
29# RO_ATTRS = ["CLASS", "FLAVOR", "VERSION", "NROWS", "EXTDIM",
30# "PYTABLES_FORMAT_VERSION", "FILTERS",
31# "NODE_TYPE", "NODE_TYPE_VERSION"]
32# RO_ATTRS = []
34# The next attributes are not meant to be copied during a Node copy process
35SYS_ATTRS_NOTTOBECOPIED = ["CLASS", "VERSION", "TITLE", "NROWS", "EXTDIM",
36 "PYTABLES_FORMAT_VERSION", "FILTERS", "ENCODING"]
37# Attributes forced to be copied during node copies
38FORCE_COPY_CLASS = ['CLASS', 'VERSION']
39# Regular expression for column default values.
40_field_fill_re = re.compile('^FIELD_[0-9]+_FILL$')
41# Regular expression for fixing old pickled filters.
42_old_filters_re = re.compile(br'\(([ic])tables\.Leaf\n')
43# Fixed version of the previous string.
44_new_filters_sub = br'(\1tables.filters\n'
47def issysattrname(name):
48 """Check if a name is a system attribute or not"""
50 return bool(name in SYS_ATTRS or np.prod(
51 [name.startswith(prefix) for prefix in SYS_ATTRS_PREFIXES]))
54class AttributeSet(hdf5extension.AttributeSet):
55 """Container for the HDF5 attributes of a Node.
57 This class provides methods to create new HDF5 node attributes,
58 and to get, rename or delete existing ones.
60 Like in Group instances (see :ref:`GroupClassDescr`), AttributeSet
61 instances make use of the *natural naming* convention, i.e. you can
62 access the attributes on disk as if they were normal Python
63 attributes of the AttributeSet instance.
65 This offers the user a very convenient way to access HDF5 node
66 attributes. However, for this reason and in order not to pollute the
67 object namespace, one can not assign *normal* attributes to
68 AttributeSet instances, and their members use names which start by
69 special prefixes as happens with Group objects.
71 .. rubric:: Notes on native and pickled attributes
73 The values of most basic types are saved as HDF5 native data in the
74 HDF5 file. This includes Python bool, int, float, complex and str
75 (but not long nor unicode) values, as well as their NumPy scalar
76 versions and homogeneous or *structured* NumPy arrays of them. When
77 read, these values are always loaded as NumPy scalar or array
78 objects, as needed.
80 For that reason, attributes in native HDF5 files will be always
81 mapped into NumPy objects. Specifically, a multidimensional
82 attribute will be mapped into a multidimensional ndarray and a
83 scalar will be mapped into a NumPy scalar object (for example, a
84 scalar H5T_NATIVE_LLONG will be read and returned as a numpy.int64
85 scalar).
87 However, other kinds of values are serialized using pickle, so you
88 only will be able to correctly retrieve them using a Python-aware
89 HDF5 library. Thus, if you want to save Python scalar values and
90 make sure you are able to read them with generic HDF5 tools, you
91 should make use of *scalar or homogeneous/structured array NumPy
92 objects* (for example, numpy.int64(1) or numpy.array([1, 2, 3],
93 dtype='int16')).
95 One more advice: because of the various potential difficulties in
96 restoring a Python object stored in an attribute, you may end up
97 getting a pickle string where a Python object is expected. If this
98 is the case, you may wish to run pickle.loads() on that string to
99 get an idea of where things went wrong, as shown in this example::
101 >>> import os, tempfile
102 >>> import tables as tb
103 >>>
104 >>> class MyClass:
105 ... foo = 'bar'
106 ...
107 >>> myObject = MyClass() # save object of custom class in HDF5 attr
108 >>> h5fname = tempfile.mktemp(suffix='.h5')
109 >>> h5f = tb.open_file(h5fname, 'w')
110 >>> h5f.root._v_attrs.obj = myObject # store the object
111 >>> print(h5f.root._v_attrs.obj.foo) # retrieve it
112 bar
113 >>> h5f.close()
114 >>>
115 >>> del MyClass, myObject # delete class of object and reopen file
116 >>> h5f = tb.open_file(h5fname, 'r')
117 >>> print(repr(h5f.root._v_attrs.obj))
118 b'ccopy_reg\\n_reconstructor...
119 >>> import pickle # let's unpickle that to see what went wrong
120 >>> pickle.loads(h5f.root._v_attrs.obj)
121 Traceback (most recent call last):
122 ...
123 AttributeError: Can't get attribute 'MyClass' ...
124 >>> # So the problem was not in the stored object,
125 ... # but in the *environment* where it was restored.
126 ... h5f.close()
127 >>> os.remove(h5fname)
130 .. rubric:: Notes on AttributeSet methods
132 Note that this class overrides the __getattr__(), __setattr__(),
133 __delattr__() and __dir__() special methods. This allows you to
134 read, assign or delete attributes on disk by just using the next
135 constructs::
137 leaf.attrs.myattr = 'str attr' # set a string (native support)
138 leaf.attrs.myattr2 = 3 # set an integer (native support)
139 leaf.attrs.myattr3 = [3, (1, 2)] # a generic object (Pickled)
140 attrib = leaf.attrs.myattr # get the attribute ``myattr``
141 del leaf.attrs.myattr # delete the attribute ``myattr``
143 In addition, the dictionary-like __getitem__(), __setitem__() and
144 __delitem__() methods are available, so you may write things like
145 this::
147 for name in node._v_attrs._f_list():
148 print("name: %s, value: %s" % (name, node._v_attrs[name]))
150 Use whatever idiom you prefer to access the attributes.
152 Finally, on interactive python sessions you may get autocompletions of
153 attributes named as *valid python identifiers* by pressing the `[Tab]`
154 key, or to use the dir() global function.
156 If an attribute is set on a target node that already has a large
157 number of attributes, a PerformanceWarning will be issued.
160 .. rubric:: AttributeSet attributes
162 .. attribute:: _v_attrnames
164 A list with all attribute names.
166 .. attribute:: _v_attrnamessys
168 A list with system attribute names.
170 .. attribute:: _v_attrnamesuser
172 A list with user attribute names.
174 .. attribute:: _v_unimplemented
176 A list of attribute names with unimplemented native HDF5 types.
178 """
180 def _g_getnode(self):
181 return self._v__nodefile._get_node(self._v__nodepath)
183 @property
184 def _v_node(self):
185 """The :class:`Node` instance this attribute set is associated with."""
186 return self._g_getnode()
188 def __init__(self, node):
189 """Create the basic structures to keep the attribute information.
191 Reads all the HDF5 attributes (if any) on disk for the node "node".
193 Parameters
194 ----------
195 node
196 The parent node
198 """
200 # Refuse to create an instance of an already closed node
201 if not node._v_isopen:
202 raise ClosedNodeError("the node for attribute set is closed")
204 dict_ = self.__dict__
206 self._g_new(node)
207 dict_["_v__nodefile"] = node._v_file
208 dict_["_v__nodepath"] = node._v_pathname
209 dict_["_v_attrnames"] = self._g_list_attr(node)
210 # The list of unimplemented attribute names
211 dict_["_v_unimplemented"] = []
213 # Get the file version format. This is an optimization
214 # in order to avoid accessing it too much.
215 try:
216 format_version = node._v_file.format_version
217 except AttributeError:
218 parsed_version = None
219 else:
220 if format_version == 'unknown':
221 parsed_version = None
222 else:
223 parsed_version = tuple(map(int, format_version.split('.')))
224 dict_["_v__format_version"] = parsed_version
225 # Split the attribute list in system and user lists
226 dict_["_v_attrnamessys"] = []
227 dict_["_v_attrnamesuser"] = []
228 for attr in self._v_attrnames:
229 # put the attributes on the local dictionary to allow
230 # tab-completion
231 self.__getattr__(attr)
232 if issysattrname(attr):
233 self._v_attrnamessys.append(attr)
234 else:
235 self._v_attrnamesuser.append(attr)
237 # Sort the attributes
238 self._v_attrnames.sort()
239 self._v_attrnamessys.sort()
240 self._v_attrnamesuser.sort()
242 def _g_update_node_location(self, node):
243 """Updates the location information about the associated `node`."""
245 dict_ = self.__dict__
246 dict_['_v__nodefile'] = node._v_file
247 dict_['_v__nodepath'] = node._v_pathname
248 # hdf5extension operations:
249 self._g_new(node)
251 def _f_list(self, attrset='user'):
252 """Get a list of attribute names.
254 The attrset string selects the attribute set to be used. A
255 'user' value returns only user attributes (this is the default).
256 A 'sys' value returns only system attributes. Finally, 'all'
257 returns both system and user attributes.
259 """
261 if attrset == "user":
262 return self._v_attrnamesuser[:]
263 elif attrset == "sys":
264 return self._v_attrnamessys[:]
265 elif attrset == "all":
266 return self._v_attrnames[:]
268 def __dir__(self):
269 """Autocomplete only children named as valid python identifiers.
271 Only PY3 supports this special method.
272 """
273 return list({c for c in
274 super().__dir__() + self._v_attrnames
275 if c.isidentifier()})
277 def __getattr__(self, name):
278 """Get the attribute named "name"."""
280 # If attribute does not exist, raise AttributeError
281 if name not in self._v_attrnames:
282 raise AttributeError(f"Attribute {name!r} does not exist "
283 f"in node: {self._v__nodepath!r}")
285 # Read the attribute from disk. This is an optimization to read
286 # quickly system attributes that are _string_ values, but it
287 # takes care of other types as well as for example NROWS for
288 # Tables and EXTDIM for EArrays
289 format_version = self._v__format_version
290 value = self._g_getattr(self._v_node, name)
292 # Check whether the value is pickled
293 # Pickled values always seems to end with a "."
294 maybe_pickled = (
295 isinstance(value, np.generic) and # NumPy scalar?
296 value.dtype.type == np.bytes_ and # string type?
297 value.itemsize > 0 and value.endswith(b'.'))
299 if (maybe_pickled and value in [b"0", b"0."]):
300 # Workaround for a bug in many versions of Python (starting
301 # somewhere after Python 2.6.1). See ticket #253.
302 retval = value
303 elif (maybe_pickled and _field_fill_re.match(name)
304 and format_version == (1, 5)):
305 # This format was used during the first 1.2 releases, just
306 # for string defaults.
307 try:
308 retval = pickle.loads(value)
309 retval = np.array(retval)
310 except ImportError:
311 retval = None # signal error avoiding exception
312 elif (maybe_pickled and
313 name == 'FILTERS' and
314 format_version is not None and
315 format_version < (2, 0)):
316 # This is a big hack, but we don't have other way to recognize
317 # pickled filters of PyTables 1.x files.
318 value = _old_filters_re.sub(_new_filters_sub, value, 1)
319 retval = pickle.loads(value) # pass unpickling errors through
320 elif maybe_pickled:
321 try:
322 retval = pickle.loads(value)
323 # except cPickle.UnpicklingError:
324 # It seems that pickle may raise other errors than UnpicklingError
325 # Perhaps it would be better just an "except:" clause?
326 # except (cPickle.UnpicklingError, ImportError):
327 # Definitely (see SF bug #1254636)
328 except UnicodeDecodeError:
329 # Object maybe pickled on python 2 and unpickled on python 3.
330 # encoding='bytes' was added in python 3.4 to resolve this.
331 # However 'bytes' mangles class attributes as they are
332 # unplicked as bytestrings. Hence try 'latin1' first.
333 # Ref: http://bugs.python.org/issue6784
334 try:
335 retval = pickle.loads(value, encoding='latin1')
336 except TypeError:
337 try:
338 retval = pickle.loads(value, encoding='bytes')
339 except Exception:
340 retval = value
341 except Exception:
342 retval = value
343 except Exception:
344 # catch other unpickling errors:
345 # ivb (2005-09-07): It is too hard to tell
346 # whether the unpickling failed
347 # because of the string not being a pickle one at all,
348 # because of a malformed pickle string,
349 # or because of some other problem in object reconstruction,
350 # thus making inconvenient even the issuing of a warning here.
351 # The documentation contains a note on this issue,
352 # explaining how the user can tell where the problem was.
353 retval = value
354 # Additional check for allowing a workaround for #307
355 if isinstance(retval, str) and retval == '':
356 retval = np.array(retval)[()]
357 elif (name == 'FILTERS' and
358 format_version is not None and
359 format_version >= (2, 0)):
360 try:
361 retval = Filters._unpack(value)
362 except ValueError:
363 warnings.warn(FiltersWarning('Failed parsing FILTERS key'))
364 retval = None
365 elif name == 'TITLE' and not isinstance(value, str):
366 retval = value.decode('utf-8')
367 elif (issysattrname(name) and isinstance(value, (bytes, str)) and
368 not isinstance(value, str) and not _field_fill_re.match(name)):
369 # system attributes should always be str
370 # python 3, bytes and not "FIELD_[0-9]+_FILL"
371 retval = value.decode('utf-8')
372 else:
373 retval = value
375 # Put this value in local directory
376 self.__dict__[name] = retval
377 return retval
379 def _g__setattr(self, name, value):
380 """Set a PyTables attribute.
382 Sets a (maybe new) PyTables attribute with the specified `name`
383 and `value`. If the attribute already exists, it is simply
384 replaced.
386 It does not log the change.
388 """
390 # Save this attribute to disk
391 # (overwriting an existing one if needed)
392 stvalue = value
393 if issysattrname(name):
394 if name in ["EXTDIM", "AUTO_INDEX", "DIRTY", "NODE_TYPE_VERSION"]:
395 stvalue = np.array(value, dtype=np.int32)
396 value = stvalue[()]
397 elif name == "NROWS":
398 stvalue = np.array(value, dtype=SizeType)
399 value = stvalue[()]
400 elif (name == "FILTERS" and
401 self._v__format_version is not None and
402 self._v__format_version >= (2, 0)):
403 stvalue = value._pack()
404 # value will remain as a Filters instance here
405 # Convert value from a Python scalar into a NumPy scalar
406 # (only in case it has not been converted yet)
407 # Fixes ticket #59
408 if (stvalue is value and
409 type(value) in (bool, bytes, int, float, complex, str,
410 np.unicode_)):
411 # Additional check for allowing a workaround for #307
412 if isinstance(value, str) and len(value) == 0:
413 stvalue = np.array('')
414 else:
415 stvalue = np.array(value)
416 value = stvalue[()]
418 self._g_setattr(self._v_node, name, stvalue)
420 # New attribute or value. Introduce it into the local
421 # directory
422 self.__dict__[name] = value
424 # Finally, add this attribute to the list if not present
425 attrnames = self._v_attrnames
426 if name not in attrnames:
427 attrnames.append(name)
428 attrnames.sort()
429 if issysattrname(name):
430 attrnamessys = self._v_attrnamessys
431 attrnamessys.append(name)
432 attrnamessys.sort()
433 else:
434 attrnamesuser = self._v_attrnamesuser
435 attrnamesuser.append(name)
436 attrnamesuser.sort()
438 def __setattr__(self, name, value):
439 """Set a PyTables attribute.
441 Sets a (maybe new) PyTables attribute with the specified `name`
442 and `value`. If the attribute already exists, it is simply
443 replaced.
445 A ``ValueError`` is raised when the name starts with a reserved
446 prefix or contains a ``/``. A `NaturalNameWarning` is issued if
447 the name is not a valid Python identifier. A
448 `PerformanceWarning` is issued when the recommended maximum
449 number of attributes in a node is going to be exceeded.
451 """
453 nodefile = self._v__nodefile
454 attrnames = self._v_attrnames
456 # Check for name validity
457 check_attribute_name(name)
459 nodefile._check_writable()
461 # Check if there are too many attributes.
462 max_node_attrs = nodefile.params['MAX_NODE_ATTRS']
463 if len(attrnames) >= max_node_attrs:
464 warnings.warn("""\
465node ``%s`` is exceeding the recommended maximum number of attributes (%d);\
466be ready to see PyTables asking for *lots* of memory and possibly slow I/O"""
467 % (self._v__nodepath, max_node_attrs),
468 PerformanceWarning)
470 undo_enabled = nodefile.is_undo_enabled()
471 # Log old attribute removal (if any).
472 if undo_enabled and (name in attrnames):
473 self._g_del_and_log(name)
475 # Set the attribute.
476 self._g__setattr(name, value)
478 # Log new attribute addition.
479 if undo_enabled:
480 self._g_log_add(name)
482 def _g_log_add(self, name):
483 self._v__nodefile._log('ADDATTR', self._v__nodepath, name)
485 def _g_del_and_log(self, name):
486 nodefile = self._v__nodefile
487 node_pathname = self._v__nodepath
488 # Log *before* moving to use the right shadow name.
489 nodefile._log('DELATTR', node_pathname, name)
490 attr_to_shadow(nodefile, node_pathname, name)
492 def _g__delattr(self, name):
493 """Delete a PyTables attribute.
495 Deletes the specified existing PyTables attribute.
497 It does not log the change.
499 """
501 # Delete the attribute from disk
502 self._g_remove(self._v_node, name)
504 # Delete the attribute from local lists
505 self._v_attrnames.remove(name)
506 if name in self._v_attrnamessys:
507 self._v_attrnamessys.remove(name)
508 else:
509 self._v_attrnamesuser.remove(name)
511 # Delete the attribute from the local directory
512 # closes (#1049285)
513 del self.__dict__[name]
515 def __delattr__(self, name):
516 """Delete a PyTables attribute.
518 Deletes the specified existing PyTables attribute from the
519 attribute set. If a nonexistent or system attribute is
520 specified, an ``AttributeError`` is raised.
522 """
524 nodefile = self._v__nodefile
526 # Check if attribute exists
527 if name not in self._v_attrnames:
528 raise AttributeError(
529 "Attribute ('%s') does not exist in node '%s'"
530 % (name, self._v__nodepath))
532 nodefile._check_writable()
534 # Remove the PyTables attribute or move it to shadow.
535 if nodefile.is_undo_enabled():
536 self._g_del_and_log(name)
537 else:
538 self._g__delattr(name)
540 def __getitem__(self, name):
541 """The dictionary like interface for __getattr__()."""
543 try:
544 return self.__getattr__(name)
545 except AttributeError:
546 # Capture the AttributeError an re-raise a KeyError one
547 raise KeyError(
548 "Attribute ('%s') does not exist in node '%s'"
549 % (name, self._v__nodepath))
551 def __setitem__(self, name, value):
552 """The dictionary like interface for __setattr__()."""
554 self.__setattr__(name, value)
556 def __delitem__(self, name):
557 """The dictionary like interface for __delattr__()."""
559 try:
560 self.__delattr__(name)
561 except AttributeError:
562 # Capture the AttributeError an re-raise a KeyError one
563 raise KeyError(
564 "Attribute ('%s') does not exist in node '%s'"
565 % (name, self._v__nodepath))
567 def __contains__(self, name):
568 """Is there an attribute with that name?
570 A true value is returned if the attribute set has an attribute
571 with the given name, false otherwise.
573 """
575 return name in self._v_attrnames
577 def _f_rename(self, oldattrname, newattrname):
578 """Rename an attribute from oldattrname to newattrname."""
580 if oldattrname == newattrname:
581 # Do nothing
582 return
584 # First, fetch the value of the oldattrname
585 attrvalue = getattr(self, oldattrname)
587 # Now, create the new attribute
588 setattr(self, newattrname, attrvalue)
590 # Finally, remove the old attribute
591 delattr(self, oldattrname)
593 def _g_copy(self, newset, set_attr=None, copyclass=False):
594 """Copy set attributes.
596 Copies all user and allowed system PyTables attributes to the
597 given attribute set, replacing the existing ones.
599 You can specify a *bound* method of the destination set that
600 will be used to set its attributes. Else, its `_g__setattr`
601 method will be used.
603 Changes are logged depending on the chosen setting method. The
604 default setting method does not log anything.
606 .. versionchanged:: 3.0
607 The *newSet* parameter has been renamed into *newset*.
609 .. versionchanged:: 3.0
610 The *copyClass* parameter has been renamed into *copyclass*.
612 """
614 copysysattrs = newset._v__nodefile.params['PYTABLES_SYS_ATTRS']
615 if set_attr is None:
616 set_attr = newset._g__setattr
618 for attrname in self._v_attrnamesuser:
619 # Do not copy the unimplemented attributes.
620 if attrname not in self._v_unimplemented:
621 set_attr(attrname, getattr(self, attrname))
622 # Copy the system attributes that we are allowed to.
623 if copysysattrs:
624 for attrname in self._v_attrnamessys:
625 if ((attrname not in SYS_ATTRS_NOTTOBECOPIED) and
626 # Do not copy the FIELD_ attributes in tables as this can
627 # be really *slow* (don't know exactly the reason).
628 # See #304.
629 not attrname.startswith("FIELD_")):
630 set_attr(attrname, getattr(self, attrname))
631 # Copy CLASS and VERSION attributes if requested
632 if copyclass:
633 for attrname in FORCE_COPY_CLASS:
634 if attrname in self._v_attrnamessys:
635 set_attr(attrname, getattr(self, attrname))
637 def _f_copy(self, where):
638 """Copy attributes to the where node.
640 Copies all user and certain system attributes to the given where
641 node (a Node instance - see :ref:`NodeClassDescr`), replacing
642 the existing ones.
644 """
646 # AttributeSet must be defined in order to define a Node.
647 # However, we need to know Node here.
648 # Using class_name_dict avoids a circular import.
649 if not isinstance(where, class_name_dict['Node']):
650 raise TypeError(f"destination object is not a node: {where!r}")
651 self._g_copy(where._v_attrs, where._v_attrs.__setattr__)
653 def _g_close(self):
654 # Nothing will be done here, as the existing instance is completely
655 # operative now.
656 pass
658 def __str__(self):
659 """The string representation for this object."""
661 # The pathname
662 pathname = self._v__nodepath
663 # Get this class name
664 classname = self.__class__.__name__
665 # The attribute names
666 attrnumber = sum(1 for _ in self._v_attrnames)
667 return f"{pathname}._v_attrs ({classname}), {attrnumber} attributes"
669 def __repr__(self):
670 """A detailed string representation for this object."""
672 # print additional info only if there are attributes to show
673 attrnames = list(self._v_attrnames)
674 if attrnames:
675 rep = [f'{attr} := {getattr(self, attr)!r}' for attr in attrnames]
676 return f"{self!s}:\n [" + ',\n '.join(rep) + "]"
677 else:
678 return str(self)
681class NotLoggedAttributeSet(AttributeSet):
682 def _g_log_add(self, name):
683 pass
685 def _g_del_and_log(self, name):
686 self._g__delattr(name)