Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/atom.py: 56%

1"""Atom classes for describing dataset contents."""

3import re

4import inspect

5import warnings

7import numpy as np

9from .utils import SizeType

10from .misc.enum import Enum

12import pickle

14from .exceptions import FlavorWarning

16__docformat__ = 'reStructuredText'

17"""The format of documentation strings in this module."""

19all_types = set() # filled as atom classes are created

20"""Set of all PyTables types."""

22atom_map = {} # filled as atom classes are created

23"""Maps atom kinds to item sizes and atom classes.

25If there is a fixed set of possible item sizes for a given kind, the

26kind maps to another mapping from item size in bytes to atom class.

27Otherwise, the kind maps directly to the atom class.

28"""

30deftype_from_kind = {} # filled as atom classes are created

31"""Maps atom kinds to their default atom type (if any)."""

34_type_re = re.compile(r'^([a-z]+)([0-9]*)$')

37def split_type(type):

38 """Split a PyTables type into a PyTables kind and an item size.

40 Returns a tuple of (kind, itemsize). If no item size is present in the type

41 (in the form of a precision), the returned item size is None::

43 >>> split_type('int32')

44 ('int', 4)

45 >>> split_type('string')

46 ('string', None)

47 >>> split_type('int20')

48 Traceback (most recent call last):

49 ...

50 ValueError: precision must be a multiple of 8: 20

51 >>> split_type('foo bar')

52 Traceback (most recent call last):

53 ...

54 ValueError: malformed type: 'foo bar'

56 """

58 match = _type_re.match(type)

59 if not match:

60 raise ValueError("malformed type: %r" % type)

61 kind, precision = match.groups()

62 itemsize = None

63 if precision:

64 precision = int(precision)

65 itemsize, remainder = divmod(precision, 8)

66 if remainder: # 0 could be a valid item size

67 raise ValueError("precision must be a multiple of 8: %d"

68 % precision)

69 return (kind, itemsize)

72def _invalid_itemsize_error(kind, itemsize, itemsizes):

73 isizes = sorted(itemsizes)

74 return ValueError("invalid item size for kind ``%s``: %r; "

75 "it must be one of ``%r``"

76 % (kind, itemsize, isizes))

79def _abstract_atom_init(deftype, defvalue):

80 """Return a constructor for an abstract `Atom` class."""

82 defitemsize = split_type(deftype)[1]

84 def __init__(self, itemsize=defitemsize, shape=(), dflt=defvalue):

85 assert self.kind in atom_map

86 try:

87 atomclass = atom_map[self.kind][itemsize]

88 except KeyError:

89 raise _invalid_itemsize_error(self.kind, itemsize,

90 atom_map[self.kind])

91 self.__class__ = atomclass

92 atomclass.__init__(self, shape, dflt)

93 return __init__

96def _normalize_shape(shape):

97 """Check that the `shape` is safe to be used and return it as a tuple."""

99 if isinstance(shape, (np.integer, int)):

100 if shape < 1:

101 raise ValueError("shape value must be greater than 0: %d"

102 % shape)

103 shape = (shape,) # N is a shorthand for (N,)

104 try:

105 shape = tuple(shape)

106 except TypeError:

107 raise TypeError("shape must be an integer or sequence: %r"

108 % (shape,))

109

110 # XXX Get from HDF5 library if possible.

111 # HDF5 does not support ranks greater than 32

112 if len(shape) > 32:

113 raise ValueError(

114 f"shapes with rank > 32 are not supported: {shape!r}")

115

116 return tuple(SizeType(s) for s in shape)

117

118

119def _normalize_default(value, dtype):

120 """Return `value` as a valid default of NumPy type `dtype`."""

121

122 # Create NumPy objects as defaults

123 # This is better in order to serialize them as attributes

124 if value is None:

125 value = 0

126 basedtype = dtype.base

127 try:

128 default = np.array(value, dtype=basedtype)

129 except ValueError:

130 array = np.array(value)

131 if array.shape != basedtype.shape:

132 raise

133 # Maybe nested dtype with "scalar" value.

134 default = np.array(value, dtype=basedtype.base)

135 # 0-dim arrays will be representented as NumPy scalars

136 # (PyTables attribute convention)

137 if default.shape == ():

138 default = default[()]

139 return default

140

141

142def _cmp_dispatcher(other_method_name):

143 """Dispatch comparisons to a method of the *other* object.

144

145 Returns a new *rich comparison* method which dispatches calls to

146 the method `other_method_name` of the *other* object. If there is

147 no such method in the object, ``False`` is returned.

148

149 This is part of the implementation of a double dispatch pattern.

150 """

151

152 def dispatched_cmp(self, other):

153 try:

154 other_method = getattr(other, other_method_name)

155 except AttributeError:

156 return False

157 return other_method(self)

158 return dispatched_cmp

159

160

161class MetaAtom(type):

162 """Atom metaclass.

163

164 This metaclass ensures that data about atom classes gets inserted

165 into the suitable registries.

166

167 """

168

169 def __init__(cls, name, bases, dict_):

170 super().__init__(name, bases, dict_)

171

172 kind = dict_.get('kind')

173 itemsize = dict_.get('itemsize')

174 type_ = dict_.get('type')

175 deftype = dict_.get('_deftype')

176

177 if kind and deftype:

178 deftype_from_kind[kind] = deftype

179

180 if type_:

181 all_types.add(type_)

182

183 if kind and itemsize and not hasattr(itemsize, '__int__'):

184 # Atom classes with a non-fixed item size do have an

185 # ``itemsize``, but it's not a number (e.g. property).

186 atom_map[kind] = cls

187 return

188

189 if kind: # first definition of kind, make new entry

190 atom_map[kind] = {}

191

192 if itemsize and hasattr(itemsize, '__int__'): # fixed

193 kind = cls.kind # maybe from superclasses

194 atom_map[kind][int(itemsize)] = cls

195

196

197class Atom(metaclass=MetaAtom):

198 """Defines the type of atomic cells stored in a dataset.

199

200 The meaning of *atomic* is that individual elements of a cell can

201 not be extracted directly by indexing (i.e. __getitem__()) the

202 dataset; e.g. if a dataset has shape (2, 2) and its atoms have

203 shape (3,), to get the third element of the cell at (1, 0) one

204 should use dataset[1,0][2] instead of dataset[1,0,2].

205

206 The Atom class is meant to declare the different properties of the

207 *base element* (also known as *atom*) of CArray, EArray and

208 VLArray datasets, although they are also used to describe the base

209 elements of Array datasets. Atoms have the property that their

210 length is always the same. However, you can grow datasets along

211 the extensible dimension in the case of EArray or put a variable

212 number of them on a VLArray row. Moreover, they are not restricted

213 to scalar values, and they can be *fully multidimensional

214 objects*.

215

216 Parameters

217 ----------

218 itemsize : int

219 For types with a non-fixed size, this sets the size in

220 bytes of individual items in the atom.

221 shape : tuple

222 Sets the shape of the atom. An integer shape of

223 N is equivalent to the tuple (N,).

224 dflt

225 Sets the default value for the atom.

226

227 The following are the public methods and attributes of the Atom class.

228

229 Notes

230 -----

231 A series of descendant classes are offered in order to make the

232 use of these element descriptions easier. You should use a

233 particular Atom descendant class whenever you know the exact type

234 you will need when writing your code. Otherwise, you may use one

235 of the Atom.from_*() factory Methods.

236

237 .. rubric:: Atom attributes

238

239 .. attribute:: dflt

240

241 The default value of the atom.

242

243 If the user does not supply a value for an element while

244 filling a dataset, this default value will be written to disk.

245 If the user supplies a scalar value for a multidimensional

246 atom, this value is automatically *broadcast* to all the items

247 in the atom cell. If dflt is not supplied, an appropriate zero

248 value (or *null* string) will be chosen by default. Please

249 note that default values are kept internally as NumPy objects.

250

251 .. attribute:: dtype

252

253 The NumPy dtype that most closely matches this atom.

254

255 .. attribute:: itemsize

256

257 Size in bytes of a single item in the atom.

258 Specially useful for atoms of the string kind.

259

260 .. attribute:: kind

261

262 The PyTables kind of the atom (a string).

263

264 .. attribute:: shape

265

266 The shape of the atom (a tuple for scalar atoms).

267

268 .. attribute:: type

269

270 The PyTables type of the atom (a string).

271

272 Atoms can be compared with atoms and other objects for

273 strict (in)equality without having to compare individual

274 attributes::

275

276 >>> atom1 = StringAtom(itemsize=10) # same as ``atom2``

277 >>> atom2 = Atom.from_kind('string', 10) # same as ``atom1``

278 >>> atom3 = IntAtom()

279 >>> atom1 == 'foo'

280 False

281 >>> atom1 == atom2

282 True

283 >>> atom2 != atom1

284 False

285 >>> atom1 == atom3

286 False

287 >>> atom3 != atom2

288 True

289

290 """

291

292 @classmethod

293 def prefix(cls):

294 """Return the atom class prefix."""

295 cname = cls.__name__

296 return cname[:cname.rfind('Atom')]

297

298 @classmethod

299 def from_sctype(cls, sctype, shape=(), dflt=None):

300 """Create an Atom from a NumPy scalar type sctype.

301

302 Optional shape and default value may be specified as the

303 shape and dflt

304 arguments, respectively. Information in the

305 sctype not represented in an Atom is ignored::

306

307 >>> import numpy as np

308 >>> Atom.from_sctype(np.int16, shape=(2, 2))

309 Int16Atom(shape=(2, 2), dflt=0)

310 >>> Atom.from_sctype('S5', dflt='hello')

311 Traceback (most recent call last):

312 ...

313 ValueError: unknown NumPy scalar type: 'S5'

314 >>> Atom.from_sctype('float64')

315 Float64Atom(shape=(), dflt=0.0)

316

317 """

318 if (not isinstance(sctype, type)

319 or not issubclass(sctype, np.generic)):

320 if sctype not in np.sctypeDict:

321 raise ValueError(f"unknown NumPy scalar type: {sctype!r}")

322 sctype = np.sctypeDict[sctype]

323 return cls.from_dtype(np.dtype((sctype, shape)), dflt)

324

325 @classmethod

326 def from_dtype(cls, dtype, dflt=None):

327 """Create an Atom from a NumPy dtype.

328

329 An optional default value may be specified as the dflt

330 argument. Information in the dtype not represented in an Atom is

331 ignored::

332

333 >>> import numpy as np

334 >>> Atom.from_dtype(np.dtype((np.int16, (2, 2))))

335 Int16Atom(shape=(2, 2), dflt=0)

336 >>> Atom.from_dtype(np.dtype('float64'))

337 Float64Atom(shape=(), dflt=0.0)

338

339 Note: for easier use in Python 3, where all strings lead to the

340 Unicode dtype, this dtype will also generate a StringAtom. Since

341 this is only viable for strings that are castable as ascii, a

342 warning is issued.

343

344 >>> Atom.from_dtype(np.dtype('U20')) # doctest: +SKIP

345 Atom.py:392: FlavorWarning: support for unicode type is very

346 limited, and only works for strings that can be cast as ascii

347 StringAtom(itemsize=20, shape=(), dflt=b'')

348

349 """

350 basedtype = dtype.base

351 if basedtype.names:

352 raise ValueError("compound data types are not supported: %r"

353 % dtype)

354 if basedtype.shape != ():

355 raise ValueError("nested data types are not supported: %r"

356 % dtype)

357 if basedtype.kind == 'S': # can not reuse something like 'string80'

358 itemsize = basedtype.itemsize

359 return cls.from_kind('string', itemsize, dtype.shape, dflt)

360 elif basedtype.kind == 'U':

361 # workaround for unicode type (standard string type in Python 3)

362 warnings.warn("support for unicode type is very limited, and "

363 "only works for strings that can be cast as ascii",

364 FlavorWarning)

365 itemsize = basedtype.itemsize // 4

366 assert str(itemsize) in basedtype.str, (

367 "something went wrong in handling unicode.")

368 return cls.from_kind('string', itemsize, dtype.shape, dflt)

369 # Most NumPy types have direct correspondence with PyTables types.

370 return cls.from_type(basedtype.name, dtype.shape, dflt)

371

372 @classmethod

373 def from_type(cls, type, shape=(), dflt=None):

374 """Create an Atom from a PyTables type.

375

376 Optional shape and default value may be specified as the

377 shape and dflt arguments, respectively::

378

379 >>> Atom.from_type('bool')

380 BoolAtom(shape=(), dflt=False)

381 >>> Atom.from_type('int16', shape=(2, 2))

382 Int16Atom(shape=(2, 2), dflt=0)

383 >>> Atom.from_type('string40', dflt='hello')

384 Traceback (most recent call last):

385 ...

386 ValueError: unknown type: 'string40'

387 >>> Atom.from_type('Float64')

388 Traceback (most recent call last):

389 ...

390 ValueError: unknown type: 'Float64'

391

392 """

393

394 if type not in all_types:

395 raise ValueError(f"unknown type: {type!r}")

396 kind, itemsize = split_type(type)

397 return cls.from_kind(kind, itemsize, shape, dflt)

398

399 @classmethod

400 def from_kind(cls, kind, itemsize=None, shape=(), dflt=None):

401 """Create an Atom from a PyTables kind.

402

403 Optional item size, shape and default value may be

404 specified as the itemsize, shape and dflt

405 arguments, respectively. Bear in mind that not all atoms support

406 a default item size::

407

408 >>> Atom.from_kind('int', itemsize=2, shape=(2, 2))

409 Int16Atom(shape=(2, 2), dflt=0)

410 >>> Atom.from_kind('int', shape=(2, 2))

411 Int32Atom(shape=(2, 2), dflt=0)

412 >>> Atom.from_kind('int', shape=1)

413 Int32Atom(shape=(1,), dflt=0)

414 >>> Atom.from_kind('string', dflt=b'hello')

415 Traceback (most recent call last):

416 ...

417 ValueError: no default item size for kind ``string``

418 >>> Atom.from_kind('Float')

419 Traceback (most recent call last):

420 ...

421 ValueError: unknown kind: 'Float'

422

423 Moreover, some kinds with atypical constructor signatures

424 are not supported; you need to use the proper

425 constructor::

426

427 >>> Atom.from_kind('enum') #doctest: +ELLIPSIS

428 Traceback (most recent call last):

429 ...

430 ValueError: the ``enum`` kind is not supported...

431

432 """

433

434 kwargs = {'shape': shape}

435 if kind not in atom_map:

436 raise ValueError(f"unknown kind: {kind!r}")

437 # This incompatibility detection may get out-of-date and is

438 # too hard-wired, but I couldn't come up with something

439 # smarter. -- Ivan (2007-02-08)

440 if kind in ['enum']:

441 raise ValueError("the ``%s`` kind is not supported; "

442 "please use the appropriate constructor"

443 % kind)

444 # If no `itemsize` is given, try to get the default type of the

445 # kind (which has a fixed item size).

446 if itemsize is None:

447 if kind not in deftype_from_kind:

448 raise ValueError("no default item size for kind ``%s``"

449 % kind)

450 type_ = deftype_from_kind[kind]

451 kind, itemsize = split_type(type_)

452 kdata = atom_map[kind]

453 # Look up the class and set a possible item size.

454 if hasattr(kdata, 'kind'): # atom class: non-fixed item size

455 atomclass = kdata

456 kwargs['itemsize'] = itemsize

457 else: # dictionary: fixed item size

458 if itemsize not in kdata:

459 raise _invalid_itemsize_error(kind, itemsize, kdata)

460 atomclass = kdata[itemsize]

461 # Only set a `dflt` argument if given (`None` may not be understood).

462 if dflt is not None:

463 kwargs['dflt'] = dflt

464

465 return atomclass(**kwargs)

466

467 @property

468 def size(self):

469 """Total size in bytes of the atom."""

470 return self.dtype.itemsize

471

472 @property

473 def recarrtype(self):

474 """String type to be used in numpy.rec.array()."""

475 return str(self.dtype.shape) + self.dtype.base.str[1:]

476

477 @property

478 def ndim(self):

479 """The number of dimensions of the atom.

480

481 .. versionadded:: 2.4"""

482 return len(self.shape)

483

484 def __init__(self, nptype, shape, dflt):

485 if not hasattr(self, 'type'):

486 raise NotImplementedError("``%s`` is an abstract class; "

487 "please use one of its subclasses"

488 % self.__class__.__name__)

489 self.shape = shape = _normalize_shape(shape)

490 """The shape of the atom (a tuple for scalar atoms)."""

491 # Curiously enough, NumPy isn't generally able to accept NumPy

492 # integers in a shape. ;(

493 npshape = tuple(int(s) for s in shape)

494 self.dtype = dtype = np.dtype((nptype, npshape))

495 """The NumPy dtype that most closely matches this atom."""

496 self.dflt = _normalize_default(dflt, dtype)

497 """The default value of the atom.

498

499 If the user does not supply a value for an element while

500 filling a dataset, this default value will be written to

501 disk. If the user supplies a scalar value for a

502 multidimensional atom, this value is automatically *broadcast*

503 to all the items in the atom cell. If dflt is not supplied, an

504 appropriate zero value (or *null* string) will be chosen by

505 default. Please note that default values are kept internally

506 as NumPy objects."""

507

508 def __repr__(self):

509 args = f'shape={self.shape}, dflt={self.dflt!r}'

510 if not hasattr(self.__class__.itemsize, '__int__'): # non-fixed

511 args = f'itemsize={self.itemsize}, {args}'

512 return f'{self.__class__.__name__}({args})'

513

514 __eq__ = _cmp_dispatcher('_is_equal_to_atom')

515

516 def __ne__(self, other):

517 return not self.__eq__(other)

518

519 # XXX: API incompatible change for PyTables 3 line

520 # Overriding __eq__ blocks inheritance of __hash__ in 3.x

521 # def __hash__(self):

522 # return hash((self.__class__, self.type, self.shape, self.itemsize,

523 # self.dflt))

524

525 def copy(self, **override):

526 """Get a copy of the atom, possibly overriding some arguments.

527

528 Constructor arguments to be overridden must be passed as

529 keyword arguments::

530

531 >>> atom1 = Int32Atom(shape=12)

532 >>> atom2 = atom1.copy()

533 >>> print(atom1)

534 Int32Atom(shape=(12,), dflt=0)

535 >>> print(atom2)

536 Int32Atom(shape=(12,), dflt=0)

537 >>> atom1 is atom2

538 False

539 >>> atom3 = atom1.copy(shape=(2, 2))

540 >>> print(atom3)

541 Int32Atom(shape=(2, 2), dflt=0)

542 >>> atom1.copy(foobar=42) #doctest: +ELLIPSIS

543 Traceback (most recent call last):

544 ...

545 TypeError: ...__init__() got an unexpected keyword argument 'foobar'

546

547 """

548 newargs = self._get_init_args()

549 newargs.update(override)

550 return self.__class__(**newargs)

551

552 def _get_init_args(self):

553 """Get a dictionary of instance constructor arguments.

554

555 This implementation works on classes which use the same names

556 for both constructor arguments and instance attributes.

557

558 """

559 signature = inspect.signature(self.__init__)

560 parameters = signature.parameters

561 args = [arg for arg, p in parameters.items()

562 if p.kind is p.POSITIONAL_OR_KEYWORD]

563

564 return {arg: getattr(self, arg) for arg in args if arg != 'self'}

565

566 def _is_equal_to_atom(self, atom):

567 """Is this object equal to the given `atom`?"""

568

569 return (self.type == atom.type and self.shape == atom.shape

570 and self.itemsize == atom.itemsize

571 and np.all(self.dflt == atom.dflt))

572

573

574class StringAtom(Atom):

575 """Defines an atom of type string.

576

577 The item size is the *maximum* length in characters of strings.

578

579 """

580

581 kind = 'string'

582 type = 'string'

583 _defvalue = b''

584

585 @property

586 def itemsize(self):

587 """Size in bytes of a sigle item in the atom."""

588 return self.dtype.base.itemsize

589

590 def __init__(self, itemsize, shape=(), dflt=_defvalue):

591 if not hasattr(itemsize, '__int__') or int(itemsize) < 0:

592 raise ValueError("invalid item size for kind ``%s``: %r; "

593 "it must be a positive integer"

594 % ('string', itemsize))

595 Atom.__init__(self, 'S%d' % itemsize, shape, dflt)

596

597

598class BoolAtom(Atom):

599 """Defines an atom of type bool."""

600

601 kind = 'bool'

602 itemsize = 1

603 type = 'bool'

604 _deftype = 'bool8'

605 _defvalue = False

606

607 def __init__(self, shape=(), dflt=_defvalue):

608 Atom.__init__(self, self.type, shape, dflt)

609

610

611class IntAtom(Atom):

612 """Defines an atom of a signed integral type (int kind)."""

613

614 kind = 'int'

615 signed = True

616 _deftype = 'int32'

617 _defvalue = 0

618 __init__ = _abstract_atom_init(_deftype, _defvalue)

619

620

621class UIntAtom(Atom):

622 """Defines an atom of an unsigned integral type (uint kind)."""

623

624 kind = 'uint'

625 signed = False

626 _deftype = 'uint32'

627 _defvalue = 0

628 __init__ = _abstract_atom_init(_deftype, _defvalue)

629

630

631class FloatAtom(Atom):

632 """Defines an atom of a floating point type (float kind)."""

633

634 kind = 'float'

635 _deftype = 'float64'

636 _defvalue = 0.0

637 __init__ = _abstract_atom_init(_deftype, _defvalue)

638

639

640def _create_numeric_class(baseclass, itemsize):

641 """Create a numeric atom class with the given `baseclass` and an

642 `itemsize`."""

643

644 prefix = '%s%d' % (baseclass.prefix(), itemsize * 8)

645 type_ = prefix.lower()

646 classdict = {'itemsize': itemsize, 'type': type_,

647 '__doc__': "Defines an atom of type ``%s``." % type_}

648

649 def __init__(self, shape=(), dflt=baseclass._defvalue):

650 Atom.__init__(self, self.type, shape, dflt)

651 classdict['__init__'] = __init__

652 return type('%sAtom' % prefix, (baseclass,), classdict)

653

654

655Int8Atom = _create_numeric_class(IntAtom, 1)

656Int16Atom = _create_numeric_class(IntAtom, 2)

657Int32Atom = _create_numeric_class(IntAtom, 4)

658Int64Atom = _create_numeric_class(IntAtom, 8)

659UInt8Atom = _create_numeric_class(UIntAtom, 1)

660UInt16Atom = _create_numeric_class(UIntAtom, 2)

661UInt32Atom = _create_numeric_class(UIntAtom, 4)

662UInt64Atom = _create_numeric_class(UIntAtom, 8)

663

664if hasattr(np, 'float16'):

665 Float16Atom = _create_numeric_class(FloatAtom, 2)

666Float32Atom = _create_numeric_class(FloatAtom, 4)

667Float64Atom = _create_numeric_class(FloatAtom, 8)

668if hasattr(np, 'float96'):

669 Float96Atom = _create_numeric_class(FloatAtom, 12)

670if hasattr(np, 'float128'):

671 Float128Atom = _create_numeric_class(FloatAtom, 16)

672

673

674class ComplexAtom(Atom):

675 """Defines an atom of kind complex.

676

677 Allowed item sizes are 8 (single precision) and 16 (double precision). This

678 class must be used instead of more concrete ones to avoid confusions with

679 numarray-like precision specifications used in PyTables 1.X.

680

681 """

682

683 # This definition is a little more complex (no pun intended)

684 # because, although the complex kind is a normal numerical one,

685 # the usage of bottom-level classes is artificially forbidden.

686 # Everything will be back to normality when people has stopped

687 # using the old bottom-level complex classes.

688

689 kind = 'complex'

690 _deftype = 'complex128'

691 _defvalue = 0j

692 _isizes = [8, 16]

693

694 @property

695 def itemsize(self):

696 """Size in bytes of a sigle item in the atom."""

697 return self.dtype.base.itemsize

698

699 # Only instances have a `type` attribute, so complex types must be

700 # registered by hand.

701 all_types.add('complex64')

702 all_types.add('complex128')

703 if hasattr(np, 'complex192'):

704 all_types.add('complex192')

705 _isizes.append(24)

706 if hasattr(np, 'complex256'):

707 all_types.add('complex256')

708 _isizes.append(32)

709

710 def __init__(self, itemsize, shape=(), dflt=_defvalue):

711 if itemsize not in self._isizes:

712 raise _invalid_itemsize_error('complex', itemsize, self._isizes)

713 self.type = '%s%d' % (self.kind, itemsize * 8)

714 Atom.__init__(self, self.type, shape, dflt)

715

716

717class _ComplexErrorAtom(ComplexAtom, metaclass=type):

718 """Reminds the user to stop using the old complex atom names."""

719

720 def __init__(self, shape=(), dflt=ComplexAtom._defvalue):

721 raise TypeError(

722 "to avoid confusions with PyTables 1.X complex atom names, "

723 "please use ``ComplexAtom(itemsize=N)``, "

724 "where N=8 for single precision complex atoms, "

725 "and N=16 for double precision complex atoms")

726

727

728Complex32Atom = Complex64Atom = Complex128Atom = _ComplexErrorAtom

729if hasattr(np, 'complex192'):

730 Complex192Atom = _ComplexErrorAtom

731if hasattr(np, 'complex256'):

732 Complex256Atom = _ComplexErrorAtom

733

734

735class TimeAtom(Atom):

736 """Defines an atom of time type (time kind).

737

738 There are two distinct supported types of time: a 32 bit integer value and

739 a 64 bit floating point value. Both of them reflect the number of seconds

740 since the Unix epoch. This atom has the property of being stored using the

741 HDF5 time datatypes.

742

743 """

744

745 kind = 'time'

746 _deftype = 'time32'

747 _defvalue = 0

748 __init__ = _abstract_atom_init(_deftype, _defvalue)

749

750

751class Time32Atom(TimeAtom):

752 """Defines an atom of type time32."""

753

754 itemsize = 4

755 type = 'time32'

756 _defvalue = 0

757

758 def __init__(self, shape=(), dflt=_defvalue):

759 Atom.__init__(self, 'int32', shape, dflt)

760

761

762class Time64Atom(TimeAtom):

763 """Defines an atom of type time64."""

764

765 itemsize = 8

766 type = 'time64'

767 _defvalue = 0.0

768

769 def __init__(self, shape=(), dflt=_defvalue):

770 Atom.__init__(self, 'float64', shape, dflt)

771

772

773class EnumAtom(Atom):

774 """Description of an atom of an enumerated type.

775

776 Instances of this class describe the atom type used to store enumerated

777 values. Those values belong to an enumerated type, defined by the first

778 argument (enum) in the constructor of the atom, which accepts the same

779 kinds of arguments as the Enum class (see :ref:`EnumClassDescr`). The

780 enumerated type is stored in the enum attribute of the atom.

781

782 A default value must be specified as the second argument (dflt) in the

783 constructor; it must be the *name* (a string) of one of the enumerated

784 values in the enumerated type. When the atom is created, the corresponding

785 concrete value is broadcast and stored in the dflt attribute (setting

786 different default values for items in a multidimensional atom is not

787 supported yet). If the name does not match any value in the enumerated

788 type, a KeyError is raised.

789

790 Another atom must be specified as the base argument in order to determine

791 the base type used for storing the values of enumerated values in memory

792 and disk. This *storage atom* is kept in the base attribute of the created

793 atom. As a shorthand, you may specify a PyTables type instead of the

794 storage atom, implying that this has a scalar shape.

795

796 The storage atom should be able to represent each and every concrete value

797 in the enumeration. If it is not, a TypeError is raised. The default value

798 of the storage atom is ignored.

799

800 The type attribute of enumerated atoms is always enum.

801

802 Enumerated atoms also support comparisons with other objects::

803

804 >>> enum = ['T0', 'T1', 'T2']

805 >>> atom1 = EnumAtom(enum, 'T0', 'int8') # same as ``atom2``

806 >>> atom2 = EnumAtom(enum, 'T0', Int8Atom()) # same as ``atom1``

807 >>> atom3 = EnumAtom(enum, 'T0', 'int16')

808 >>> atom4 = Int8Atom()

809 >>> atom1 == enum

810 False

811 >>> atom1 == atom2

812 True

813 >>> atom2 != atom1

814 False

815 >>> atom1 == atom3

816 False

817 >>> atom1 == atom4

818 False

819 >>> atom4 != atom1

820 True

821

822 Examples

823 --------

824

825 The next C enum construction::

826

827 enum myEnum {

828 T0,

829 T1,

830 T2

831 };

832

833 would correspond to the following PyTables

834 declaration::

835

836 >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', 'int32')

837

838 Please note the dflt argument with a value of 'T0'. Since the concrete

839 value matching T0 is unknown right now (we have not used explicit concrete

840 values), using the name is the only option left for defining a default

841 value for the atom.

842

843 The chosen representation of values for this enumerated atom uses unsigned

844 32-bit integers, which surely wastes quite a lot of memory. Another size

845 could be selected by using the base argument (this time with a full-blown

846 storage atom)::

847

848 >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', UInt8Atom())

849

850 You can also define multidimensional arrays for data elements::

851

852 >>> my_enum_atom = EnumAtom(

853 ... ['T0', 'T1', 'T2'], 'T0', base='uint32', shape=(3,2))

854

855 for 3x2 arrays of uint32.

856

857 """

858

859 # Registering this class in the class map may be a little wrong,

860 # since the ``Atom.from_kind()`` method fails miserably with

861 # enumerations, as they don't support an ``itemsize`` argument.

862 # However, resetting ``__metaclass__`` to ``type`` doesn't seem to

863 # work and I don't feel like creating a subclass of ``MetaAtom``.

864

865 kind = 'enum'

866 type = 'enum'

867

868 @property

869 def itemsize(self):

870 """Size in bytes of a single item in the atom."""

871 return self.dtype.base.itemsize

872

873 def _checkbase(self, base):

874 """Check the `base` storage atom."""

875

876 if base.kind == 'enum':

877 raise TypeError("can not use an enumerated atom "

878 "as a storage atom: %r" % base)

879

880 # Check whether the storage atom can represent concrete values

881 # in the enumeration...

882 basedtype = base.dtype

883 pyvalues = [value for (name, value) in self.enum]

884 try:

885 npgenvalues = np.array(pyvalues)

886 except ValueError:

887 raise TypeError("concrete values are not uniformly-shaped")

888 try:

889 npvalues = np.array(npgenvalues, dtype=basedtype.base)

890 except ValueError:

891 raise TypeError("storage atom type is incompatible with "

892 "concrete values in the enumeration")

893 if npvalues.shape[1:] != basedtype.shape:

894 raise TypeError("storage atom shape does not match that of "

895 "concrete values in the enumeration")

896 if npvalues.tolist() != npgenvalues.tolist():

897 raise TypeError("storage atom type lacks precision for "

898 "concrete values in the enumeration")

899

900 # ...with some implementation limitations.

901 if npvalues.dtype.kind not in ['i', 'u']:

902 raise NotImplementedError("only integer concrete values "

903 "are supported for the moment, sorry")

904 if len(npvalues.shape) > 1:

905 raise NotImplementedError("only scalar concrete values "

906 "are supported for the moment, sorry")

907

908 def _get_init_args(self):

909 """Get a dictionary of instance constructor arguments."""

910

911 return dict(enum=self.enum, dflt=self._defname,

912 base=self.base, shape=self.shape)

913

914 def _is_equal_to_atom(self, atom):

915 """Is this object equal to the given `atom`?"""

916

917 return False

918

919 def _is_equal_to_enumatom(self, enumatom):

920 """Is this object equal to the given `enumatom`?"""

921

922 return (self.enum == enumatom.enum and self.shape == enumatom.shape

923 and np.all(self.dflt == enumatom.dflt)

924 and self.base == enumatom.base)

925

926 def __init__(self, enum, dflt, base, shape=()):

927 if not isinstance(enum, Enum):

928 enum = Enum(enum)

929 self.enum = enum

930

931 if isinstance(base, str):

932 base = Atom.from_type(base)

933 self._checkbase(base)

934 self.base = base

935

936 default = enum[dflt] # check default value

937 self._defname = dflt # kept for representation purposes

938

939 # These are kept to ease dumping this particular

940 # representation of the enumeration to storage.

941 names, values = [], []

942 for (name, value) in enum:

943 names.append(name)

944 values.append(value)

945 basedtype = self.base.dtype

946

947 self._names = names

948 self._values = np.array(values, dtype=basedtype.base)

949

950 Atom.__init__(self, basedtype, shape, default)

951

952 def __repr__(self):

953 return ('EnumAtom(enum=%r, dflt=%r, base=%r, shape=%r)'

954 % (self.enum, self._defname, self.base, self.shape))

955

956 __eq__ = _cmp_dispatcher('_is_equal_to_enumatom')

957

958 # XXX: API incompatible change for PyTables 3 line

959 # Overriding __eq__ blocks inheritance of __hash__ in 3.x

960 # def __hash__(self):

961 # return hash((self.__class__, self.enum, self.shape, self.dflt,

962 # self.base))

963

964

965class ReferenceAtom(Atom):

966 """Defines an atom of type object to read references.

967 This atom is read-only.

968 """

969

970 kind = 'reference'

971 type = 'object'

972 _deftype = 'NoneType'

973 _defvalue = None

974

975 @property

976 def itemsize(self):

977 """Size in bytes of a single item in the atom."""

978 return self.dtype.base.itemsize

979

980 def __init__(self, shape=()):

981 Atom.__init__(self, self.type, shape, self._defvalue)

982

983 def __repr__(self):

984 return f'ReferenceAtom(shape={self.shape})'

985

986# Pseudo-atom classes

987# ===================

988#

989# Now, there come three special classes, `ObjectAtom`, `VLStringAtom`

990# and `VLUnicodeAtom`, that actually do not descend from `Atom`, but

991# which goal is so similar that they should be described here.

992# Pseudo-atoms can only be used with `VLArray` datasets, and they do

993# not support multidimensional values, nor multiple values per row.

994#

995# They can be recognised because they also have ``kind``, ``type`` and

996# ``shape`` attributes, but no ``size``, ``itemsize`` or ``dflt``

997# ones. Instead, they have a ``base`` atom which defines the elements

998# used for storage.

999#

1000# See ``examples/vlarray1.py`` and ``examples/vlarray2.py`` for

1001# further examples on `VLArray` datasets, including object

1002# serialization and string management.

1003

1004

1005class PseudoAtom:

1006 """Pseudo-atoms can only be used in ``VLArray`` nodes.

1007

1008 They can be recognised because they also have `kind`, `type` and

1009 `shape` attributes, but no `size`, `itemsize` or `dflt` ones.

1010 Instead, they have a `base` atom which defines the elements used

1011 for storage.

1012 """

1013

1014 def __repr__(self):

1015 return '%s()' % self.__class__.__name__

1016

1017 def toarray(self, object_):

1018 """Convert an `object_` into an array of base atoms."""

1019

1020 raise NotImplementedError

1021

1022 def fromarray(self, array):

1023 """Convert an `array` of base atoms into an object."""

1024

1025 raise NotImplementedError

1026

1027

1028class _BufferedAtom(PseudoAtom):

1029 """Pseudo-atom which stores data as a buffer (flat array of uints)."""

1030

1031 shape = ()

1032

1033 def toarray(self, object_):

1034 buffer_ = self._tobuffer(object_)

1035 array = np.ndarray(buffer=buffer_, dtype=self.base.dtype,

1036 shape=len(buffer_))

1037 return array

1038

1039 def _tobuffer(self, object_):

1040 """Convert an `object_` into a buffer."""

1041

1042 raise NotImplementedError

1043

1044

1045class VLStringAtom(_BufferedAtom):

1046 """Defines an atom of type ``vlstring``.

1047

1048 This class describes a *row* of the VLArray class, rather than an atom. It

1049 differs from the StringAtom class in that you can only add *one instance of

1050 it to one specific row*, i.e. the :meth:`VLArray.append` method only

1051 accepts one object when the base atom is of this type.

1052

1053 This class stores bytestrings. It does not make assumptions on the

1054 encoding of the string, and raw bytes are stored as is. To store a string

1055 you will need to *explicitly* convert it to a bytestring before you can

1056 save them::

1057

1058 >>> s = 'A unicode string: hbar = \u210f'

1059 >>> bytestring = s.encode('utf-8')

1060 >>> VLArray.append(bytestring) # doctest: +SKIP

1061

1062 For full Unicode support, using VLUnicodeAtom (see :ref:`VLUnicodeAtom`) is

1063 recommended.

1064

1065 Variable-length string atoms do not accept parameters and they cause the

1066 reads of rows to always return Python bytestrings. You can regard vlstring

1067 atoms as an easy way to save generic variable length strings.

1068

1069 """

1070

1071 kind = 'vlstring'

1072 type = 'vlstring'

1073 base = UInt8Atom()

1074

1075 def _tobuffer(self, object_):

1076 if isinstance(object_, str):

1077 warnings.warn("Storing non bytestrings in VLStringAtom is "

1078 "deprecated.", DeprecationWarning)

1079 elif not isinstance(object_, bytes):

1080 raise TypeError(f"object is not a string: {object_!r}")

1081 return np.string_(object_)

1082

1083 def fromarray(self, array):

1084 return array.tobytes()

1085

1086

1087class VLUnicodeAtom(_BufferedAtom):

1088 """Defines an atom of type vlunicode.

1089

1090 This class describes a *row* of the VLArray class, rather than an atom. It

1091 is very similar to VLStringAtom (see :ref:`VLStringAtom`), but it stores

1092 Unicode strings (using 32-bit characters a la UCS-4, so all strings of the

1093 same length also take up the same space).

1094

1095 This class does not make assumptions on the encoding of plain input

1096 strings. Plain strings are supported as long as no character is out of the

1097 ASCII set; otherwise, you will need to *explicitly* convert them to Unicode

1098 before you can save them.

1099

1100 Variable-length Unicode atoms do not accept parameters and they cause the

1101 reads of rows to always return Python Unicode strings. You can regard

1102 vlunicode atoms as an easy way to save variable length Unicode strings.

1103

1104 """

1105

1106 kind = 'vlunicode'

1107 type = 'vlunicode'

1108 base = UInt32Atom()

1109

1110 # numpy.unicode_ no more implements the buffer interface in Python 3

1111 #

1112 # When the Python build is UCS-2, we need to promote the

1113 # Unicode string to UCS-4. We *must* use a 0-d array since

1114 # NumPy scalars inherit the UCS-2 encoding from Python (see

1115 # NumPy ticket #525). Since ``_tobuffer()`` can't return an

1116 # array, we must override ``toarray()`` itself.

1117 def toarray(self, object_):

1118 if isinstance(object_, bytes):

1119 warnings.warn("Storing bytestrings in VLUnicodeAtom is "

1120 "deprecated.", DeprecationWarning)

1121 elif not isinstance(object_, str):

1122 raise TypeError(f"object is not a string: {object_!r}")

1123 ustr = str(object_)

1124 uarr = np.array(ustr, dtype='U')

1125 return np.ndarray(

1126 buffer=uarr, dtype=self.base.dtype, shape=len(ustr))

1127

1128 def _tobuffer(self, object_):

1129 # This works (and is used) only with UCS-4 builds of Python,

1130 # where the width of the internal representation of a

1131 # character matches that of the base atoms.

1132 if isinstance(object_, bytes):

1133 warnings.warn("Storing bytestrings in VLUnicodeAtom is "

1134 "deprecated.", DeprecationWarning)

1135 elif not isinstance(object_, str):

1136 raise TypeError(f"object is not a string: {object_!r}")

1137 return np.unicode_(object_)

1138

1139 def fromarray(self, array):

1140 length = len(array)

1141 if length == 0:

1142 return '' # ``array.view('U0')`` raises a `TypeError`

1143 return array.view('U%d' % length).item()

1144

1145

1146class ObjectAtom(_BufferedAtom):

1147 """Defines an atom of type object.

1148

1149 This class is meant to fit *any* kind of Python object in a row of a

1150 VLArray dataset by using pickle behind the scenes. Due to the fact that

1151 you can not foresee how long will be the output of the pickle

1152 serialization (i.e. the atom already has a *variable* length), you can only

1153 fit *one object per row*. However, you can still group several objects in a

1154 single tuple or list and pass it to the :meth:`VLArray.append` method.

1155

1156 Object atoms do not accept parameters and they cause the reads of rows to

1157 always return Python objects. You can regard object atoms as an easy way to

1158 save an arbitrary number of generic Python objects in a VLArray dataset.

1159

1160 """

1161

1162 kind = 'object'

1163 type = 'object'

1164 base = UInt8Atom()

1165

1166 def _tobuffer(self, object_):

1167 return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL)

1168

1169 def fromarray(self, array):

1170 # We have to check for an empty array because of a possible

1171 # bug in HDF5 which makes it claim that a dataset has one

1172 # record when in fact it is empty.

1173 if array.size == 0:

1174 return None

1175 return pickle.loads(array.tobytes())