Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/atom.py: 56%
416 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Atom classes for describing dataset contents."""
3import re
4import inspect
5import warnings
7import numpy as np
9from .utils import SizeType
10from .misc.enum import Enum
12import pickle
14from .exceptions import FlavorWarning
16__docformat__ = 'reStructuredText'
17"""The format of documentation strings in this module."""
19all_types = set() # filled as atom classes are created
20"""Set of all PyTables types."""
22atom_map = {} # filled as atom classes are created
23"""Maps atom kinds to item sizes and atom classes.
25If there is a fixed set of possible item sizes for a given kind, the
26kind maps to another mapping from item size in bytes to atom class.
27Otherwise, the kind maps directly to the atom class.
28"""
30deftype_from_kind = {} # filled as atom classes are created
31"""Maps atom kinds to their default atom type (if any)."""
34_type_re = re.compile(r'^([a-z]+)([0-9]*)$')
37def split_type(type):
38 """Split a PyTables type into a PyTables kind and an item size.
40 Returns a tuple of (kind, itemsize). If no item size is present in the type
41 (in the form of a precision), the returned item size is None::
43 >>> split_type('int32')
44 ('int', 4)
45 >>> split_type('string')
46 ('string', None)
47 >>> split_type('int20')
48 Traceback (most recent call last):
49 ...
50 ValueError: precision must be a multiple of 8: 20
51 >>> split_type('foo bar')
52 Traceback (most recent call last):
53 ...
54 ValueError: malformed type: 'foo bar'
56 """
58 match = _type_re.match(type)
59 if not match:
60 raise ValueError("malformed type: %r" % type)
61 kind, precision = match.groups()
62 itemsize = None
63 if precision:
64 precision = int(precision)
65 itemsize, remainder = divmod(precision, 8)
66 if remainder: # 0 could be a valid item size
67 raise ValueError("precision must be a multiple of 8: %d"
68 % precision)
69 return (kind, itemsize)
72def _invalid_itemsize_error(kind, itemsize, itemsizes):
73 isizes = sorted(itemsizes)
74 return ValueError("invalid item size for kind ``%s``: %r; "
75 "it must be one of ``%r``"
76 % (kind, itemsize, isizes))
79def _abstract_atom_init(deftype, defvalue):
80 """Return a constructor for an abstract `Atom` class."""
82 defitemsize = split_type(deftype)[1]
84 def __init__(self, itemsize=defitemsize, shape=(), dflt=defvalue):
85 assert self.kind in atom_map
86 try:
87 atomclass = atom_map[self.kind][itemsize]
88 except KeyError:
89 raise _invalid_itemsize_error(self.kind, itemsize,
90 atom_map[self.kind])
91 self.__class__ = atomclass
92 atomclass.__init__(self, shape, dflt)
93 return __init__
96def _normalize_shape(shape):
97 """Check that the `shape` is safe to be used and return it as a tuple."""
99 if isinstance(shape, (np.integer, int)):
100 if shape < 1:
101 raise ValueError("shape value must be greater than 0: %d"
102 % shape)
103 shape = (shape,) # N is a shorthand for (N,)
104 try:
105 shape = tuple(shape)
106 except TypeError:
107 raise TypeError("shape must be an integer or sequence: %r"
108 % (shape,))
110 # XXX Get from HDF5 library if possible.
111 # HDF5 does not support ranks greater than 32
112 if len(shape) > 32:
113 raise ValueError(
114 f"shapes with rank > 32 are not supported: {shape!r}")
116 return tuple(SizeType(s) for s in shape)
119def _normalize_default(value, dtype):
120 """Return `value` as a valid default of NumPy type `dtype`."""
122 # Create NumPy objects as defaults
123 # This is better in order to serialize them as attributes
124 if value is None:
125 value = 0
126 basedtype = dtype.base
127 try:
128 default = np.array(value, dtype=basedtype)
129 except ValueError:
130 array = np.array(value)
131 if array.shape != basedtype.shape:
132 raise
133 # Maybe nested dtype with "scalar" value.
134 default = np.array(value, dtype=basedtype.base)
135 # 0-dim arrays will be representented as NumPy scalars
136 # (PyTables attribute convention)
137 if default.shape == ():
138 default = default[()]
139 return default
142def _cmp_dispatcher(other_method_name):
143 """Dispatch comparisons to a method of the *other* object.
145 Returns a new *rich comparison* method which dispatches calls to
146 the method `other_method_name` of the *other* object. If there is
147 no such method in the object, ``False`` is returned.
149 This is part of the implementation of a double dispatch pattern.
150 """
152 def dispatched_cmp(self, other):
153 try:
154 other_method = getattr(other, other_method_name)
155 except AttributeError:
156 return False
157 return other_method(self)
158 return dispatched_cmp
161class MetaAtom(type):
162 """Atom metaclass.
164 This metaclass ensures that data about atom classes gets inserted
165 into the suitable registries.
167 """
169 def __init__(cls, name, bases, dict_):
170 super().__init__(name, bases, dict_)
172 kind = dict_.get('kind')
173 itemsize = dict_.get('itemsize')
174 type_ = dict_.get('type')
175 deftype = dict_.get('_deftype')
177 if kind and deftype:
178 deftype_from_kind[kind] = deftype
180 if type_:
181 all_types.add(type_)
183 if kind and itemsize and not hasattr(itemsize, '__int__'):
184 # Atom classes with a non-fixed item size do have an
185 # ``itemsize``, but it's not a number (e.g. property).
186 atom_map[kind] = cls
187 return
189 if kind: # first definition of kind, make new entry
190 atom_map[kind] = {}
192 if itemsize and hasattr(itemsize, '__int__'): # fixed
193 kind = cls.kind # maybe from superclasses
194 atom_map[kind][int(itemsize)] = cls
197class Atom(metaclass=MetaAtom):
198 """Defines the type of atomic cells stored in a dataset.
200 The meaning of *atomic* is that individual elements of a cell can
201 not be extracted directly by indexing (i.e. __getitem__()) the
202 dataset; e.g. if a dataset has shape (2, 2) and its atoms have
203 shape (3,), to get the third element of the cell at (1, 0) one
204 should use dataset[1,0][2] instead of dataset[1,0,2].
206 The Atom class is meant to declare the different properties of the
207 *base element* (also known as *atom*) of CArray, EArray and
208 VLArray datasets, although they are also used to describe the base
209 elements of Array datasets. Atoms have the property that their
210 length is always the same. However, you can grow datasets along
211 the extensible dimension in the case of EArray or put a variable
212 number of them on a VLArray row. Moreover, they are not restricted
213 to scalar values, and they can be *fully multidimensional
214 objects*.
216 Parameters
217 ----------
218 itemsize : int
219 For types with a non-fixed size, this sets the size in
220 bytes of individual items in the atom.
221 shape : tuple
222 Sets the shape of the atom. An integer shape of
223 N is equivalent to the tuple (N,).
224 dflt
225 Sets the default value for the atom.
227 The following are the public methods and attributes of the Atom class.
229 Notes
230 -----
231 A series of descendant classes are offered in order to make the
232 use of these element descriptions easier. You should use a
233 particular Atom descendant class whenever you know the exact type
234 you will need when writing your code. Otherwise, you may use one
235 of the Atom.from_*() factory Methods.
237 .. rubric:: Atom attributes
239 .. attribute:: dflt
241 The default value of the atom.
243 If the user does not supply a value for an element while
244 filling a dataset, this default value will be written to disk.
245 If the user supplies a scalar value for a multidimensional
246 atom, this value is automatically *broadcast* to all the items
247 in the atom cell. If dflt is not supplied, an appropriate zero
248 value (or *null* string) will be chosen by default. Please
249 note that default values are kept internally as NumPy objects.
251 .. attribute:: dtype
253 The NumPy dtype that most closely matches this atom.
255 .. attribute:: itemsize
257 Size in bytes of a single item in the atom.
258 Specially useful for atoms of the string kind.
260 .. attribute:: kind
262 The PyTables kind of the atom (a string).
264 .. attribute:: shape
266 The shape of the atom (a tuple for scalar atoms).
268 .. attribute:: type
270 The PyTables type of the atom (a string).
272 Atoms can be compared with atoms and other objects for
273 strict (in)equality without having to compare individual
274 attributes::
276 >>> atom1 = StringAtom(itemsize=10) # same as ``atom2``
277 >>> atom2 = Atom.from_kind('string', 10) # same as ``atom1``
278 >>> atom3 = IntAtom()
279 >>> atom1 == 'foo'
280 False
281 >>> atom1 == atom2
282 True
283 >>> atom2 != atom1
284 False
285 >>> atom1 == atom3
286 False
287 >>> atom3 != atom2
288 True
290 """
292 @classmethod
293 def prefix(cls):
294 """Return the atom class prefix."""
295 cname = cls.__name__
296 return cname[:cname.rfind('Atom')]
298 @classmethod
299 def from_sctype(cls, sctype, shape=(), dflt=None):
300 """Create an Atom from a NumPy scalar type sctype.
302 Optional shape and default value may be specified as the
303 shape and dflt
304 arguments, respectively. Information in the
305 sctype not represented in an Atom is ignored::
307 >>> import numpy as np
308 >>> Atom.from_sctype(np.int16, shape=(2, 2))
309 Int16Atom(shape=(2, 2), dflt=0)
310 >>> Atom.from_sctype('S5', dflt='hello')
311 Traceback (most recent call last):
312 ...
313 ValueError: unknown NumPy scalar type: 'S5'
314 >>> Atom.from_sctype('float64')
315 Float64Atom(shape=(), dflt=0.0)
317 """
318 if (not isinstance(sctype, type)
319 or not issubclass(sctype, np.generic)):
320 if sctype not in np.sctypeDict:
321 raise ValueError(f"unknown NumPy scalar type: {sctype!r}")
322 sctype = np.sctypeDict[sctype]
323 return cls.from_dtype(np.dtype((sctype, shape)), dflt)
325 @classmethod
326 def from_dtype(cls, dtype, dflt=None):
327 """Create an Atom from a NumPy dtype.
329 An optional default value may be specified as the dflt
330 argument. Information in the dtype not represented in an Atom is
331 ignored::
333 >>> import numpy as np
334 >>> Atom.from_dtype(np.dtype((np.int16, (2, 2))))
335 Int16Atom(shape=(2, 2), dflt=0)
336 >>> Atom.from_dtype(np.dtype('float64'))
337 Float64Atom(shape=(), dflt=0.0)
339 Note: for easier use in Python 3, where all strings lead to the
340 Unicode dtype, this dtype will also generate a StringAtom. Since
341 this is only viable for strings that are castable as ascii, a
342 warning is issued.
344 >>> Atom.from_dtype(np.dtype('U20')) # doctest: +SKIP
345 Atom.py:392: FlavorWarning: support for unicode type is very
346 limited, and only works for strings that can be cast as ascii
347 StringAtom(itemsize=20, shape=(), dflt=b'')
349 """
350 basedtype = dtype.base
351 if basedtype.names:
352 raise ValueError("compound data types are not supported: %r"
353 % dtype)
354 if basedtype.shape != ():
355 raise ValueError("nested data types are not supported: %r"
356 % dtype)
357 if basedtype.kind == 'S': # can not reuse something like 'string80'
358 itemsize = basedtype.itemsize
359 return cls.from_kind('string', itemsize, dtype.shape, dflt)
360 elif basedtype.kind == 'U':
361 # workaround for unicode type (standard string type in Python 3)
362 warnings.warn("support for unicode type is very limited, and "
363 "only works for strings that can be cast as ascii",
364 FlavorWarning)
365 itemsize = basedtype.itemsize // 4
366 assert str(itemsize) in basedtype.str, (
367 "something went wrong in handling unicode.")
368 return cls.from_kind('string', itemsize, dtype.shape, dflt)
369 # Most NumPy types have direct correspondence with PyTables types.
370 return cls.from_type(basedtype.name, dtype.shape, dflt)
372 @classmethod
373 def from_type(cls, type, shape=(), dflt=None):
374 """Create an Atom from a PyTables type.
376 Optional shape and default value may be specified as the
377 shape and dflt arguments, respectively::
379 >>> Atom.from_type('bool')
380 BoolAtom(shape=(), dflt=False)
381 >>> Atom.from_type('int16', shape=(2, 2))
382 Int16Atom(shape=(2, 2), dflt=0)
383 >>> Atom.from_type('string40', dflt='hello')
384 Traceback (most recent call last):
385 ...
386 ValueError: unknown type: 'string40'
387 >>> Atom.from_type('Float64')
388 Traceback (most recent call last):
389 ...
390 ValueError: unknown type: 'Float64'
392 """
394 if type not in all_types:
395 raise ValueError(f"unknown type: {type!r}")
396 kind, itemsize = split_type(type)
397 return cls.from_kind(kind, itemsize, shape, dflt)
399 @classmethod
400 def from_kind(cls, kind, itemsize=None, shape=(), dflt=None):
401 """Create an Atom from a PyTables kind.
403 Optional item size, shape and default value may be
404 specified as the itemsize, shape and dflt
405 arguments, respectively. Bear in mind that not all atoms support
406 a default item size::
408 >>> Atom.from_kind('int', itemsize=2, shape=(2, 2))
409 Int16Atom(shape=(2, 2), dflt=0)
410 >>> Atom.from_kind('int', shape=(2, 2))
411 Int32Atom(shape=(2, 2), dflt=0)
412 >>> Atom.from_kind('int', shape=1)
413 Int32Atom(shape=(1,), dflt=0)
414 >>> Atom.from_kind('string', dflt=b'hello')
415 Traceback (most recent call last):
416 ...
417 ValueError: no default item size for kind ``string``
418 >>> Atom.from_kind('Float')
419 Traceback (most recent call last):
420 ...
421 ValueError: unknown kind: 'Float'
423 Moreover, some kinds with atypical constructor signatures
424 are not supported; you need to use the proper
425 constructor::
427 >>> Atom.from_kind('enum') #doctest: +ELLIPSIS
428 Traceback (most recent call last):
429 ...
430 ValueError: the ``enum`` kind is not supported...
432 """
434 kwargs = {'shape': shape}
435 if kind not in atom_map:
436 raise ValueError(f"unknown kind: {kind!r}")
437 # This incompatibility detection may get out-of-date and is
438 # too hard-wired, but I couldn't come up with something
439 # smarter. -- Ivan (2007-02-08)
440 if kind in ['enum']:
441 raise ValueError("the ``%s`` kind is not supported; "
442 "please use the appropriate constructor"
443 % kind)
444 # If no `itemsize` is given, try to get the default type of the
445 # kind (which has a fixed item size).
446 if itemsize is None:
447 if kind not in deftype_from_kind:
448 raise ValueError("no default item size for kind ``%s``"
449 % kind)
450 type_ = deftype_from_kind[kind]
451 kind, itemsize = split_type(type_)
452 kdata = atom_map[kind]
453 # Look up the class and set a possible item size.
454 if hasattr(kdata, 'kind'): # atom class: non-fixed item size
455 atomclass = kdata
456 kwargs['itemsize'] = itemsize
457 else: # dictionary: fixed item size
458 if itemsize not in kdata:
459 raise _invalid_itemsize_error(kind, itemsize, kdata)
460 atomclass = kdata[itemsize]
461 # Only set a `dflt` argument if given (`None` may not be understood).
462 if dflt is not None:
463 kwargs['dflt'] = dflt
465 return atomclass(**kwargs)
467 @property
468 def size(self):
469 """Total size in bytes of the atom."""
470 return self.dtype.itemsize
472 @property
473 def recarrtype(self):
474 """String type to be used in numpy.rec.array()."""
475 return str(self.dtype.shape) + self.dtype.base.str[1:]
477 @property
478 def ndim(self):
479 """The number of dimensions of the atom.
481 .. versionadded:: 2.4"""
482 return len(self.shape)
484 def __init__(self, nptype, shape, dflt):
485 if not hasattr(self, 'type'):
486 raise NotImplementedError("``%s`` is an abstract class; "
487 "please use one of its subclasses"
488 % self.__class__.__name__)
489 self.shape = shape = _normalize_shape(shape)
490 """The shape of the atom (a tuple for scalar atoms)."""
491 # Curiously enough, NumPy isn't generally able to accept NumPy
492 # integers in a shape. ;(
493 npshape = tuple(int(s) for s in shape)
494 self.dtype = dtype = np.dtype((nptype, npshape))
495 """The NumPy dtype that most closely matches this atom."""
496 self.dflt = _normalize_default(dflt, dtype)
497 """The default value of the atom.
499 If the user does not supply a value for an element while
500 filling a dataset, this default value will be written to
501 disk. If the user supplies a scalar value for a
502 multidimensional atom, this value is automatically *broadcast*
503 to all the items in the atom cell. If dflt is not supplied, an
504 appropriate zero value (or *null* string) will be chosen by
505 default. Please note that default values are kept internally
506 as NumPy objects."""
508 def __repr__(self):
509 args = f'shape={self.shape}, dflt={self.dflt!r}'
510 if not hasattr(self.__class__.itemsize, '__int__'): # non-fixed
511 args = f'itemsize={self.itemsize}, {args}'
512 return f'{self.__class__.__name__}({args})'
514 __eq__ = _cmp_dispatcher('_is_equal_to_atom')
516 def __ne__(self, other):
517 return not self.__eq__(other)
519 # XXX: API incompatible change for PyTables 3 line
520 # Overriding __eq__ blocks inheritance of __hash__ in 3.x
521 # def __hash__(self):
522 # return hash((self.__class__, self.type, self.shape, self.itemsize,
523 # self.dflt))
525 def copy(self, **override):
526 """Get a copy of the atom, possibly overriding some arguments.
528 Constructor arguments to be overridden must be passed as
529 keyword arguments::
531 >>> atom1 = Int32Atom(shape=12)
532 >>> atom2 = atom1.copy()
533 >>> print(atom1)
534 Int32Atom(shape=(12,), dflt=0)
535 >>> print(atom2)
536 Int32Atom(shape=(12,), dflt=0)
537 >>> atom1 is atom2
538 False
539 >>> atom3 = atom1.copy(shape=(2, 2))
540 >>> print(atom3)
541 Int32Atom(shape=(2, 2), dflt=0)
542 >>> atom1.copy(foobar=42) #doctest: +ELLIPSIS
543 Traceback (most recent call last):
544 ...
545 TypeError: ...__init__() got an unexpected keyword argument 'foobar'
547 """
548 newargs = self._get_init_args()
549 newargs.update(override)
550 return self.__class__(**newargs)
552 def _get_init_args(self):
553 """Get a dictionary of instance constructor arguments.
555 This implementation works on classes which use the same names
556 for both constructor arguments and instance attributes.
558 """
559 signature = inspect.signature(self.__init__)
560 parameters = signature.parameters
561 args = [arg for arg, p in parameters.items()
562 if p.kind is p.POSITIONAL_OR_KEYWORD]
564 return {arg: getattr(self, arg) for arg in args if arg != 'self'}
566 def _is_equal_to_atom(self, atom):
567 """Is this object equal to the given `atom`?"""
569 return (self.type == atom.type and self.shape == atom.shape
570 and self.itemsize == atom.itemsize
571 and np.all(self.dflt == atom.dflt))
574class StringAtom(Atom):
575 """Defines an atom of type string.
577 The item size is the *maximum* length in characters of strings.
579 """
581 kind = 'string'
582 type = 'string'
583 _defvalue = b''
585 @property
586 def itemsize(self):
587 """Size in bytes of a sigle item in the atom."""
588 return self.dtype.base.itemsize
590 def __init__(self, itemsize, shape=(), dflt=_defvalue):
591 if not hasattr(itemsize, '__int__') or int(itemsize) < 0:
592 raise ValueError("invalid item size for kind ``%s``: %r; "
593 "it must be a positive integer"
594 % ('string', itemsize))
595 Atom.__init__(self, 'S%d' % itemsize, shape, dflt)
598class BoolAtom(Atom):
599 """Defines an atom of type bool."""
601 kind = 'bool'
602 itemsize = 1
603 type = 'bool'
604 _deftype = 'bool8'
605 _defvalue = False
607 def __init__(self, shape=(), dflt=_defvalue):
608 Atom.__init__(self, self.type, shape, dflt)
611class IntAtom(Atom):
612 """Defines an atom of a signed integral type (int kind)."""
614 kind = 'int'
615 signed = True
616 _deftype = 'int32'
617 _defvalue = 0
618 __init__ = _abstract_atom_init(_deftype, _defvalue)
621class UIntAtom(Atom):
622 """Defines an atom of an unsigned integral type (uint kind)."""
624 kind = 'uint'
625 signed = False
626 _deftype = 'uint32'
627 _defvalue = 0
628 __init__ = _abstract_atom_init(_deftype, _defvalue)
631class FloatAtom(Atom):
632 """Defines an atom of a floating point type (float kind)."""
634 kind = 'float'
635 _deftype = 'float64'
636 _defvalue = 0.0
637 __init__ = _abstract_atom_init(_deftype, _defvalue)
640def _create_numeric_class(baseclass, itemsize):
641 """Create a numeric atom class with the given `baseclass` and an
642 `itemsize`."""
644 prefix = '%s%d' % (baseclass.prefix(), itemsize * 8)
645 type_ = prefix.lower()
646 classdict = {'itemsize': itemsize, 'type': type_,
647 '__doc__': "Defines an atom of type ``%s``." % type_}
649 def __init__(self, shape=(), dflt=baseclass._defvalue):
650 Atom.__init__(self, self.type, shape, dflt)
651 classdict['__init__'] = __init__
652 return type('%sAtom' % prefix, (baseclass,), classdict)
655Int8Atom = _create_numeric_class(IntAtom, 1)
656Int16Atom = _create_numeric_class(IntAtom, 2)
657Int32Atom = _create_numeric_class(IntAtom, 4)
658Int64Atom = _create_numeric_class(IntAtom, 8)
659UInt8Atom = _create_numeric_class(UIntAtom, 1)
660UInt16Atom = _create_numeric_class(UIntAtom, 2)
661UInt32Atom = _create_numeric_class(UIntAtom, 4)
662UInt64Atom = _create_numeric_class(UIntAtom, 8)
664if hasattr(np, 'float16'):
665 Float16Atom = _create_numeric_class(FloatAtom, 2)
666Float32Atom = _create_numeric_class(FloatAtom, 4)
667Float64Atom = _create_numeric_class(FloatAtom, 8)
668if hasattr(np, 'float96'):
669 Float96Atom = _create_numeric_class(FloatAtom, 12)
670if hasattr(np, 'float128'):
671 Float128Atom = _create_numeric_class(FloatAtom, 16)
674class ComplexAtom(Atom):
675 """Defines an atom of kind complex.
677 Allowed item sizes are 8 (single precision) and 16 (double precision). This
678 class must be used instead of more concrete ones to avoid confusions with
679 numarray-like precision specifications used in PyTables 1.X.
681 """
683 # This definition is a little more complex (no pun intended)
684 # because, although the complex kind is a normal numerical one,
685 # the usage of bottom-level classes is artificially forbidden.
686 # Everything will be back to normality when people has stopped
687 # using the old bottom-level complex classes.
689 kind = 'complex'
690 _deftype = 'complex128'
691 _defvalue = 0j
692 _isizes = [8, 16]
694 @property
695 def itemsize(self):
696 """Size in bytes of a sigle item in the atom."""
697 return self.dtype.base.itemsize
699 # Only instances have a `type` attribute, so complex types must be
700 # registered by hand.
701 all_types.add('complex64')
702 all_types.add('complex128')
703 if hasattr(np, 'complex192'):
704 all_types.add('complex192')
705 _isizes.append(24)
706 if hasattr(np, 'complex256'):
707 all_types.add('complex256')
708 _isizes.append(32)
710 def __init__(self, itemsize, shape=(), dflt=_defvalue):
711 if itemsize not in self._isizes:
712 raise _invalid_itemsize_error('complex', itemsize, self._isizes)
713 self.type = '%s%d' % (self.kind, itemsize * 8)
714 Atom.__init__(self, self.type, shape, dflt)
717class _ComplexErrorAtom(ComplexAtom, metaclass=type):
718 """Reminds the user to stop using the old complex atom names."""
720 def __init__(self, shape=(), dflt=ComplexAtom._defvalue):
721 raise TypeError(
722 "to avoid confusions with PyTables 1.X complex atom names, "
723 "please use ``ComplexAtom(itemsize=N)``, "
724 "where N=8 for single precision complex atoms, "
725 "and N=16 for double precision complex atoms")
728Complex32Atom = Complex64Atom = Complex128Atom = _ComplexErrorAtom
729if hasattr(np, 'complex192'):
730 Complex192Atom = _ComplexErrorAtom
731if hasattr(np, 'complex256'):
732 Complex256Atom = _ComplexErrorAtom
735class TimeAtom(Atom):
736 """Defines an atom of time type (time kind).
738 There are two distinct supported types of time: a 32 bit integer value and
739 a 64 bit floating point value. Both of them reflect the number of seconds
740 since the Unix epoch. This atom has the property of being stored using the
741 HDF5 time datatypes.
743 """
745 kind = 'time'
746 _deftype = 'time32'
747 _defvalue = 0
748 __init__ = _abstract_atom_init(_deftype, _defvalue)
751class Time32Atom(TimeAtom):
752 """Defines an atom of type time32."""
754 itemsize = 4
755 type = 'time32'
756 _defvalue = 0
758 def __init__(self, shape=(), dflt=_defvalue):
759 Atom.__init__(self, 'int32', shape, dflt)
762class Time64Atom(TimeAtom):
763 """Defines an atom of type time64."""
765 itemsize = 8
766 type = 'time64'
767 _defvalue = 0.0
769 def __init__(self, shape=(), dflt=_defvalue):
770 Atom.__init__(self, 'float64', shape, dflt)
773class EnumAtom(Atom):
774 """Description of an atom of an enumerated type.
776 Instances of this class describe the atom type used to store enumerated
777 values. Those values belong to an enumerated type, defined by the first
778 argument (enum) in the constructor of the atom, which accepts the same
779 kinds of arguments as the Enum class (see :ref:`EnumClassDescr`). The
780 enumerated type is stored in the enum attribute of the atom.
782 A default value must be specified as the second argument (dflt) in the
783 constructor; it must be the *name* (a string) of one of the enumerated
784 values in the enumerated type. When the atom is created, the corresponding
785 concrete value is broadcast and stored in the dflt attribute (setting
786 different default values for items in a multidimensional atom is not
787 supported yet). If the name does not match any value in the enumerated
788 type, a KeyError is raised.
790 Another atom must be specified as the base argument in order to determine
791 the base type used for storing the values of enumerated values in memory
792 and disk. This *storage atom* is kept in the base attribute of the created
793 atom. As a shorthand, you may specify a PyTables type instead of the
794 storage atom, implying that this has a scalar shape.
796 The storage atom should be able to represent each and every concrete value
797 in the enumeration. If it is not, a TypeError is raised. The default value
798 of the storage atom is ignored.
800 The type attribute of enumerated atoms is always enum.
802 Enumerated atoms also support comparisons with other objects::
804 >>> enum = ['T0', 'T1', 'T2']
805 >>> atom1 = EnumAtom(enum, 'T0', 'int8') # same as ``atom2``
806 >>> atom2 = EnumAtom(enum, 'T0', Int8Atom()) # same as ``atom1``
807 >>> atom3 = EnumAtom(enum, 'T0', 'int16')
808 >>> atom4 = Int8Atom()
809 >>> atom1 == enum
810 False
811 >>> atom1 == atom2
812 True
813 >>> atom2 != atom1
814 False
815 >>> atom1 == atom3
816 False
817 >>> atom1 == atom4
818 False
819 >>> atom4 != atom1
820 True
822 Examples
823 --------
825 The next C enum construction::
827 enum myEnum {
828 T0,
829 T1,
830 T2
831 };
833 would correspond to the following PyTables
834 declaration::
836 >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', 'int32')
838 Please note the dflt argument with a value of 'T0'. Since the concrete
839 value matching T0 is unknown right now (we have not used explicit concrete
840 values), using the name is the only option left for defining a default
841 value for the atom.
843 The chosen representation of values for this enumerated atom uses unsigned
844 32-bit integers, which surely wastes quite a lot of memory. Another size
845 could be selected by using the base argument (this time with a full-blown
846 storage atom)::
848 >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', UInt8Atom())
850 You can also define multidimensional arrays for data elements::
852 >>> my_enum_atom = EnumAtom(
853 ... ['T0', 'T1', 'T2'], 'T0', base='uint32', shape=(3,2))
855 for 3x2 arrays of uint32.
857 """
859 # Registering this class in the class map may be a little wrong,
860 # since the ``Atom.from_kind()`` method fails miserably with
861 # enumerations, as they don't support an ``itemsize`` argument.
862 # However, resetting ``__metaclass__`` to ``type`` doesn't seem to
863 # work and I don't feel like creating a subclass of ``MetaAtom``.
865 kind = 'enum'
866 type = 'enum'
868 @property
869 def itemsize(self):
870 """Size in bytes of a single item in the atom."""
871 return self.dtype.base.itemsize
873 def _checkbase(self, base):
874 """Check the `base` storage atom."""
876 if base.kind == 'enum':
877 raise TypeError("can not use an enumerated atom "
878 "as a storage atom: %r" % base)
880 # Check whether the storage atom can represent concrete values
881 # in the enumeration...
882 basedtype = base.dtype
883 pyvalues = [value for (name, value) in self.enum]
884 try:
885 npgenvalues = np.array(pyvalues)
886 except ValueError:
887 raise TypeError("concrete values are not uniformly-shaped")
888 try:
889 npvalues = np.array(npgenvalues, dtype=basedtype.base)
890 except ValueError:
891 raise TypeError("storage atom type is incompatible with "
892 "concrete values in the enumeration")
893 if npvalues.shape[1:] != basedtype.shape:
894 raise TypeError("storage atom shape does not match that of "
895 "concrete values in the enumeration")
896 if npvalues.tolist() != npgenvalues.tolist():
897 raise TypeError("storage atom type lacks precision for "
898 "concrete values in the enumeration")
900 # ...with some implementation limitations.
901 if npvalues.dtype.kind not in ['i', 'u']:
902 raise NotImplementedError("only integer concrete values "
903 "are supported for the moment, sorry")
904 if len(npvalues.shape) > 1:
905 raise NotImplementedError("only scalar concrete values "
906 "are supported for the moment, sorry")
908 def _get_init_args(self):
909 """Get a dictionary of instance constructor arguments."""
911 return dict(enum=self.enum, dflt=self._defname,
912 base=self.base, shape=self.shape)
914 def _is_equal_to_atom(self, atom):
915 """Is this object equal to the given `atom`?"""
917 return False
919 def _is_equal_to_enumatom(self, enumatom):
920 """Is this object equal to the given `enumatom`?"""
922 return (self.enum == enumatom.enum and self.shape == enumatom.shape
923 and np.all(self.dflt == enumatom.dflt)
924 and self.base == enumatom.base)
926 def __init__(self, enum, dflt, base, shape=()):
927 if not isinstance(enum, Enum):
928 enum = Enum(enum)
929 self.enum = enum
931 if isinstance(base, str):
932 base = Atom.from_type(base)
933 self._checkbase(base)
934 self.base = base
936 default = enum[dflt] # check default value
937 self._defname = dflt # kept for representation purposes
939 # These are kept to ease dumping this particular
940 # representation of the enumeration to storage.
941 names, values = [], []
942 for (name, value) in enum:
943 names.append(name)
944 values.append(value)
945 basedtype = self.base.dtype
947 self._names = names
948 self._values = np.array(values, dtype=basedtype.base)
950 Atom.__init__(self, basedtype, shape, default)
952 def __repr__(self):
953 return ('EnumAtom(enum=%r, dflt=%r, base=%r, shape=%r)'
954 % (self.enum, self._defname, self.base, self.shape))
956 __eq__ = _cmp_dispatcher('_is_equal_to_enumatom')
958 # XXX: API incompatible change for PyTables 3 line
959 # Overriding __eq__ blocks inheritance of __hash__ in 3.x
960 # def __hash__(self):
961 # return hash((self.__class__, self.enum, self.shape, self.dflt,
962 # self.base))
965class ReferenceAtom(Atom):
966 """Defines an atom of type object to read references.
967 This atom is read-only.
968 """
970 kind = 'reference'
971 type = 'object'
972 _deftype = 'NoneType'
973 _defvalue = None
975 @property
976 def itemsize(self):
977 """Size in bytes of a single item in the atom."""
978 return self.dtype.base.itemsize
980 def __init__(self, shape=()):
981 Atom.__init__(self, self.type, shape, self._defvalue)
983 def __repr__(self):
984 return f'ReferenceAtom(shape={self.shape})'
986# Pseudo-atom classes
987# ===================
988#
989# Now, there come three special classes, `ObjectAtom`, `VLStringAtom`
990# and `VLUnicodeAtom`, that actually do not descend from `Atom`, but
991# which goal is so similar that they should be described here.
992# Pseudo-atoms can only be used with `VLArray` datasets, and they do
993# not support multidimensional values, nor multiple values per row.
994#
995# They can be recognised because they also have ``kind``, ``type`` and
996# ``shape`` attributes, but no ``size``, ``itemsize`` or ``dflt``
997# ones. Instead, they have a ``base`` atom which defines the elements
998# used for storage.
999#
1000# See ``examples/vlarray1.py`` and ``examples/vlarray2.py`` for
1001# further examples on `VLArray` datasets, including object
1002# serialization and string management.
1005class PseudoAtom:
1006 """Pseudo-atoms can only be used in ``VLArray`` nodes.
1008 They can be recognised because they also have `kind`, `type` and
1009 `shape` attributes, but no `size`, `itemsize` or `dflt` ones.
1010 Instead, they have a `base` atom which defines the elements used
1011 for storage.
1012 """
1014 def __repr__(self):
1015 return '%s()' % self.__class__.__name__
1017 def toarray(self, object_):
1018 """Convert an `object_` into an array of base atoms."""
1020 raise NotImplementedError
1022 def fromarray(self, array):
1023 """Convert an `array` of base atoms into an object."""
1025 raise NotImplementedError
1028class _BufferedAtom(PseudoAtom):
1029 """Pseudo-atom which stores data as a buffer (flat array of uints)."""
1031 shape = ()
1033 def toarray(self, object_):
1034 buffer_ = self._tobuffer(object_)
1035 array = np.ndarray(buffer=buffer_, dtype=self.base.dtype,
1036 shape=len(buffer_))
1037 return array
1039 def _tobuffer(self, object_):
1040 """Convert an `object_` into a buffer."""
1042 raise NotImplementedError
1045class VLStringAtom(_BufferedAtom):
1046 """Defines an atom of type ``vlstring``.
1048 This class describes a *row* of the VLArray class, rather than an atom. It
1049 differs from the StringAtom class in that you can only add *one instance of
1050 it to one specific row*, i.e. the :meth:`VLArray.append` method only
1051 accepts one object when the base atom is of this type.
1053 This class stores bytestrings. It does not make assumptions on the
1054 encoding of the string, and raw bytes are stored as is. To store a string
1055 you will need to *explicitly* convert it to a bytestring before you can
1056 save them::
1058 >>> s = 'A unicode string: hbar = \u210f'
1059 >>> bytestring = s.encode('utf-8')
1060 >>> VLArray.append(bytestring) # doctest: +SKIP
1062 For full Unicode support, using VLUnicodeAtom (see :ref:`VLUnicodeAtom`) is
1063 recommended.
1065 Variable-length string atoms do not accept parameters and they cause the
1066 reads of rows to always return Python bytestrings. You can regard vlstring
1067 atoms as an easy way to save generic variable length strings.
1069 """
1071 kind = 'vlstring'
1072 type = 'vlstring'
1073 base = UInt8Atom()
1075 def _tobuffer(self, object_):
1076 if isinstance(object_, str):
1077 warnings.warn("Storing non bytestrings in VLStringAtom is "
1078 "deprecated.", DeprecationWarning)
1079 elif not isinstance(object_, bytes):
1080 raise TypeError(f"object is not a string: {object_!r}")
1081 return np.string_(object_)
1083 def fromarray(self, array):
1084 return array.tobytes()
1087class VLUnicodeAtom(_BufferedAtom):
1088 """Defines an atom of type vlunicode.
1090 This class describes a *row* of the VLArray class, rather than an atom. It
1091 is very similar to VLStringAtom (see :ref:`VLStringAtom`), but it stores
1092 Unicode strings (using 32-bit characters a la UCS-4, so all strings of the
1093 same length also take up the same space).
1095 This class does not make assumptions on the encoding of plain input
1096 strings. Plain strings are supported as long as no character is out of the
1097 ASCII set; otherwise, you will need to *explicitly* convert them to Unicode
1098 before you can save them.
1100 Variable-length Unicode atoms do not accept parameters and they cause the
1101 reads of rows to always return Python Unicode strings. You can regard
1102 vlunicode atoms as an easy way to save variable length Unicode strings.
1104 """
1106 kind = 'vlunicode'
1107 type = 'vlunicode'
1108 base = UInt32Atom()
1110 # numpy.unicode_ no more implements the buffer interface in Python 3
1111 #
1112 # When the Python build is UCS-2, we need to promote the
1113 # Unicode string to UCS-4. We *must* use a 0-d array since
1114 # NumPy scalars inherit the UCS-2 encoding from Python (see
1115 # NumPy ticket #525). Since ``_tobuffer()`` can't return an
1116 # array, we must override ``toarray()`` itself.
1117 def toarray(self, object_):
1118 if isinstance(object_, bytes):
1119 warnings.warn("Storing bytestrings in VLUnicodeAtom is "
1120 "deprecated.", DeprecationWarning)
1121 elif not isinstance(object_, str):
1122 raise TypeError(f"object is not a string: {object_!r}")
1123 ustr = str(object_)
1124 uarr = np.array(ustr, dtype='U')
1125 return np.ndarray(
1126 buffer=uarr, dtype=self.base.dtype, shape=len(ustr))
1128 def _tobuffer(self, object_):
1129 # This works (and is used) only with UCS-4 builds of Python,
1130 # where the width of the internal representation of a
1131 # character matches that of the base atoms.
1132 if isinstance(object_, bytes):
1133 warnings.warn("Storing bytestrings in VLUnicodeAtom is "
1134 "deprecated.", DeprecationWarning)
1135 elif not isinstance(object_, str):
1136 raise TypeError(f"object is not a string: {object_!r}")
1137 return np.unicode_(object_)
1139 def fromarray(self, array):
1140 length = len(array)
1141 if length == 0:
1142 return '' # ``array.view('U0')`` raises a `TypeError`
1143 return array.view('U%d' % length).item()
1146class ObjectAtom(_BufferedAtom):
1147 """Defines an atom of type object.
1149 This class is meant to fit *any* kind of Python object in a row of a
1150 VLArray dataset by using pickle behind the scenes. Due to the fact that
1151 you can not foresee how long will be the output of the pickle
1152 serialization (i.e. the atom already has a *variable* length), you can only
1153 fit *one object per row*. However, you can still group several objects in a
1154 single tuple or list and pass it to the :meth:`VLArray.append` method.
1156 Object atoms do not accept parameters and they cause the reads of rows to
1157 always return Python objects. You can regard object atoms as an easy way to
1158 save an arbitrary number of generic Python objects in a VLArray dataset.
1160 """
1162 kind = 'object'
1163 type = 'object'
1164 base = UInt8Atom()
1166 def _tobuffer(self, object_):
1167 return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL)
1169 def fromarray(self, array):
1170 # We have to check for an empty array because of a possible
1171 # bug in HDF5 which makes it claim that a dataset has one
1172 # record when in fact it is empty.
1173 if array.size == 0:
1174 return None
1175 return pickle.loads(array.tobytes())