Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/atom.py: 56%

416 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Atom classes for describing dataset contents.""" 

2 

3import re 

4import inspect 

5import warnings 

6 

7import numpy as np 

8 

9from .utils import SizeType 

10from .misc.enum import Enum 

11 

12import pickle 

13 

14from .exceptions import FlavorWarning 

15 

16__docformat__ = 'reStructuredText' 

17"""The format of documentation strings in this module.""" 

18 

19all_types = set() # filled as atom classes are created 

20"""Set of all PyTables types.""" 

21 

22atom_map = {} # filled as atom classes are created 

23"""Maps atom kinds to item sizes and atom classes. 

24 

25If there is a fixed set of possible item sizes for a given kind, the 

26kind maps to another mapping from item size in bytes to atom class. 

27Otherwise, the kind maps directly to the atom class. 

28""" 

29 

30deftype_from_kind = {} # filled as atom classes are created 

31"""Maps atom kinds to their default atom type (if any).""" 

32 

33 

34_type_re = re.compile(r'^([a-z]+)([0-9]*)$') 

35 

36 

37def split_type(type): 

38 """Split a PyTables type into a PyTables kind and an item size. 

39 

40 Returns a tuple of (kind, itemsize). If no item size is present in the type 

41 (in the form of a precision), the returned item size is None:: 

42 

43 >>> split_type('int32') 

44 ('int', 4) 

45 >>> split_type('string') 

46 ('string', None) 

47 >>> split_type('int20') 

48 Traceback (most recent call last): 

49 ... 

50 ValueError: precision must be a multiple of 8: 20 

51 >>> split_type('foo bar') 

52 Traceback (most recent call last): 

53 ... 

54 ValueError: malformed type: 'foo bar' 

55 

56 """ 

57 

58 match = _type_re.match(type) 

59 if not match: 

60 raise ValueError("malformed type: %r" % type) 

61 kind, precision = match.groups() 

62 itemsize = None 

63 if precision: 

64 precision = int(precision) 

65 itemsize, remainder = divmod(precision, 8) 

66 if remainder: # 0 could be a valid item size 

67 raise ValueError("precision must be a multiple of 8: %d" 

68 % precision) 

69 return (kind, itemsize) 

70 

71 

72def _invalid_itemsize_error(kind, itemsize, itemsizes): 

73 isizes = sorted(itemsizes) 

74 return ValueError("invalid item size for kind ``%s``: %r; " 

75 "it must be one of ``%r``" 

76 % (kind, itemsize, isizes)) 

77 

78 

79def _abstract_atom_init(deftype, defvalue): 

80 """Return a constructor for an abstract `Atom` class.""" 

81 

82 defitemsize = split_type(deftype)[1] 

83 

84 def __init__(self, itemsize=defitemsize, shape=(), dflt=defvalue): 

85 assert self.kind in atom_map 

86 try: 

87 atomclass = atom_map[self.kind][itemsize] 

88 except KeyError: 

89 raise _invalid_itemsize_error(self.kind, itemsize, 

90 atom_map[self.kind]) 

91 self.__class__ = atomclass 

92 atomclass.__init__(self, shape, dflt) 

93 return __init__ 

94 

95 

96def _normalize_shape(shape): 

97 """Check that the `shape` is safe to be used and return it as a tuple.""" 

98 

99 if isinstance(shape, (np.integer, int)): 

100 if shape < 1: 

101 raise ValueError("shape value must be greater than 0: %d" 

102 % shape) 

103 shape = (shape,) # N is a shorthand for (N,) 

104 try: 

105 shape = tuple(shape) 

106 except TypeError: 

107 raise TypeError("shape must be an integer or sequence: %r" 

108 % (shape,)) 

109 

110 # XXX Get from HDF5 library if possible. 

111 # HDF5 does not support ranks greater than 32 

112 if len(shape) > 32: 

113 raise ValueError( 

114 f"shapes with rank > 32 are not supported: {shape!r}") 

115 

116 return tuple(SizeType(s) for s in shape) 

117 

118 

119def _normalize_default(value, dtype): 

120 """Return `value` as a valid default of NumPy type `dtype`.""" 

121 

122 # Create NumPy objects as defaults 

123 # This is better in order to serialize them as attributes 

124 if value is None: 

125 value = 0 

126 basedtype = dtype.base 

127 try: 

128 default = np.array(value, dtype=basedtype) 

129 except ValueError: 

130 array = np.array(value) 

131 if array.shape != basedtype.shape: 

132 raise 

133 # Maybe nested dtype with "scalar" value. 

134 default = np.array(value, dtype=basedtype.base) 

135 # 0-dim arrays will be representented as NumPy scalars 

136 # (PyTables attribute convention) 

137 if default.shape == (): 

138 default = default[()] 

139 return default 

140 

141 

142def _cmp_dispatcher(other_method_name): 

143 """Dispatch comparisons to a method of the *other* object. 

144 

145 Returns a new *rich comparison* method which dispatches calls to 

146 the method `other_method_name` of the *other* object. If there is 

147 no such method in the object, ``False`` is returned. 

148 

149 This is part of the implementation of a double dispatch pattern. 

150 """ 

151 

152 def dispatched_cmp(self, other): 

153 try: 

154 other_method = getattr(other, other_method_name) 

155 except AttributeError: 

156 return False 

157 return other_method(self) 

158 return dispatched_cmp 

159 

160 

161class MetaAtom(type): 

162 """Atom metaclass. 

163 

164 This metaclass ensures that data about atom classes gets inserted 

165 into the suitable registries. 

166 

167 """ 

168 

169 def __init__(cls, name, bases, dict_): 

170 super().__init__(name, bases, dict_) 

171 

172 kind = dict_.get('kind') 

173 itemsize = dict_.get('itemsize') 

174 type_ = dict_.get('type') 

175 deftype = dict_.get('_deftype') 

176 

177 if kind and deftype: 

178 deftype_from_kind[kind] = deftype 

179 

180 if type_: 

181 all_types.add(type_) 

182 

183 if kind and itemsize and not hasattr(itemsize, '__int__'): 

184 # Atom classes with a non-fixed item size do have an 

185 # ``itemsize``, but it's not a number (e.g. property). 

186 atom_map[kind] = cls 

187 return 

188 

189 if kind: # first definition of kind, make new entry 

190 atom_map[kind] = {} 

191 

192 if itemsize and hasattr(itemsize, '__int__'): # fixed 

193 kind = cls.kind # maybe from superclasses 

194 atom_map[kind][int(itemsize)] = cls 

195 

196 

197class Atom(metaclass=MetaAtom): 

198 """Defines the type of atomic cells stored in a dataset. 

199 

200 The meaning of *atomic* is that individual elements of a cell can 

201 not be extracted directly by indexing (i.e. __getitem__()) the 

202 dataset; e.g. if a dataset has shape (2, 2) and its atoms have 

203 shape (3,), to get the third element of the cell at (1, 0) one 

204 should use dataset[1,0][2] instead of dataset[1,0,2]. 

205 

206 The Atom class is meant to declare the different properties of the 

207 *base element* (also known as *atom*) of CArray, EArray and 

208 VLArray datasets, although they are also used to describe the base 

209 elements of Array datasets. Atoms have the property that their 

210 length is always the same. However, you can grow datasets along 

211 the extensible dimension in the case of EArray or put a variable 

212 number of them on a VLArray row. Moreover, they are not restricted 

213 to scalar values, and they can be *fully multidimensional 

214 objects*. 

215 

216 Parameters 

217 ---------- 

218 itemsize : int 

219 For types with a non-fixed size, this sets the size in 

220 bytes of individual items in the atom. 

221 shape : tuple 

222 Sets the shape of the atom. An integer shape of 

223 N is equivalent to the tuple (N,). 

224 dflt 

225 Sets the default value for the atom. 

226 

227 The following are the public methods and attributes of the Atom class. 

228 

229 Notes 

230 ----- 

231 A series of descendant classes are offered in order to make the 

232 use of these element descriptions easier. You should use a 

233 particular Atom descendant class whenever you know the exact type 

234 you will need when writing your code. Otherwise, you may use one 

235 of the Atom.from_*() factory Methods. 

236 

237 .. rubric:: Atom attributes 

238 

239 .. attribute:: dflt 

240 

241 The default value of the atom. 

242 

243 If the user does not supply a value for an element while 

244 filling a dataset, this default value will be written to disk. 

245 If the user supplies a scalar value for a multidimensional 

246 atom, this value is automatically *broadcast* to all the items 

247 in the atom cell. If dflt is not supplied, an appropriate zero 

248 value (or *null* string) will be chosen by default. Please 

249 note that default values are kept internally as NumPy objects. 

250 

251 .. attribute:: dtype 

252 

253 The NumPy dtype that most closely matches this atom. 

254 

255 .. attribute:: itemsize 

256 

257 Size in bytes of a single item in the atom. 

258 Specially useful for atoms of the string kind. 

259 

260 .. attribute:: kind 

261 

262 The PyTables kind of the atom (a string). 

263 

264 .. attribute:: shape 

265 

266 The shape of the atom (a tuple for scalar atoms). 

267 

268 .. attribute:: type 

269 

270 The PyTables type of the atom (a string). 

271 

272 Atoms can be compared with atoms and other objects for 

273 strict (in)equality without having to compare individual 

274 attributes:: 

275 

276 >>> atom1 = StringAtom(itemsize=10) # same as ``atom2`` 

277 >>> atom2 = Atom.from_kind('string', 10) # same as ``atom1`` 

278 >>> atom3 = IntAtom() 

279 >>> atom1 == 'foo' 

280 False 

281 >>> atom1 == atom2 

282 True 

283 >>> atom2 != atom1 

284 False 

285 >>> atom1 == atom3 

286 False 

287 >>> atom3 != atom2 

288 True 

289 

290 """ 

291 

292 @classmethod 

293 def prefix(cls): 

294 """Return the atom class prefix.""" 

295 cname = cls.__name__ 

296 return cname[:cname.rfind('Atom')] 

297 

298 @classmethod 

299 def from_sctype(cls, sctype, shape=(), dflt=None): 

300 """Create an Atom from a NumPy scalar type sctype. 

301 

302 Optional shape and default value may be specified as the 

303 shape and dflt 

304 arguments, respectively. Information in the 

305 sctype not represented in an Atom is ignored:: 

306 

307 >>> import numpy as np 

308 >>> Atom.from_sctype(np.int16, shape=(2, 2)) 

309 Int16Atom(shape=(2, 2), dflt=0) 

310 >>> Atom.from_sctype('S5', dflt='hello') 

311 Traceback (most recent call last): 

312 ... 

313 ValueError: unknown NumPy scalar type: 'S5' 

314 >>> Atom.from_sctype('float64') 

315 Float64Atom(shape=(), dflt=0.0) 

316 

317 """ 

318 if (not isinstance(sctype, type) 

319 or not issubclass(sctype, np.generic)): 

320 if sctype not in np.sctypeDict: 

321 raise ValueError(f"unknown NumPy scalar type: {sctype!r}") 

322 sctype = np.sctypeDict[sctype] 

323 return cls.from_dtype(np.dtype((sctype, shape)), dflt) 

324 

325 @classmethod 

326 def from_dtype(cls, dtype, dflt=None): 

327 """Create an Atom from a NumPy dtype. 

328 

329 An optional default value may be specified as the dflt 

330 argument. Information in the dtype not represented in an Atom is 

331 ignored:: 

332 

333 >>> import numpy as np 

334 >>> Atom.from_dtype(np.dtype((np.int16, (2, 2)))) 

335 Int16Atom(shape=(2, 2), dflt=0) 

336 >>> Atom.from_dtype(np.dtype('float64')) 

337 Float64Atom(shape=(), dflt=0.0) 

338 

339 Note: for easier use in Python 3, where all strings lead to the 

340 Unicode dtype, this dtype will also generate a StringAtom. Since 

341 this is only viable for strings that are castable as ascii, a 

342 warning is issued. 

343 

344 >>> Atom.from_dtype(np.dtype('U20')) # doctest: +SKIP 

345 Atom.py:392: FlavorWarning: support for unicode type is very 

346 limited, and only works for strings that can be cast as ascii 

347 StringAtom(itemsize=20, shape=(), dflt=b'') 

348 

349 """ 

350 basedtype = dtype.base 

351 if basedtype.names: 

352 raise ValueError("compound data types are not supported: %r" 

353 % dtype) 

354 if basedtype.shape != (): 

355 raise ValueError("nested data types are not supported: %r" 

356 % dtype) 

357 if basedtype.kind == 'S': # can not reuse something like 'string80' 

358 itemsize = basedtype.itemsize 

359 return cls.from_kind('string', itemsize, dtype.shape, dflt) 

360 elif basedtype.kind == 'U': 

361 # workaround for unicode type (standard string type in Python 3) 

362 warnings.warn("support for unicode type is very limited, and " 

363 "only works for strings that can be cast as ascii", 

364 FlavorWarning) 

365 itemsize = basedtype.itemsize // 4 

366 assert str(itemsize) in basedtype.str, ( 

367 "something went wrong in handling unicode.") 

368 return cls.from_kind('string', itemsize, dtype.shape, dflt) 

369 # Most NumPy types have direct correspondence with PyTables types. 

370 return cls.from_type(basedtype.name, dtype.shape, dflt) 

371 

372 @classmethod 

373 def from_type(cls, type, shape=(), dflt=None): 

374 """Create an Atom from a PyTables type. 

375 

376 Optional shape and default value may be specified as the 

377 shape and dflt arguments, respectively:: 

378 

379 >>> Atom.from_type('bool') 

380 BoolAtom(shape=(), dflt=False) 

381 >>> Atom.from_type('int16', shape=(2, 2)) 

382 Int16Atom(shape=(2, 2), dflt=0) 

383 >>> Atom.from_type('string40', dflt='hello') 

384 Traceback (most recent call last): 

385 ... 

386 ValueError: unknown type: 'string40' 

387 >>> Atom.from_type('Float64') 

388 Traceback (most recent call last): 

389 ... 

390 ValueError: unknown type: 'Float64' 

391 

392 """ 

393 

394 if type not in all_types: 

395 raise ValueError(f"unknown type: {type!r}") 

396 kind, itemsize = split_type(type) 

397 return cls.from_kind(kind, itemsize, shape, dflt) 

398 

399 @classmethod 

400 def from_kind(cls, kind, itemsize=None, shape=(), dflt=None): 

401 """Create an Atom from a PyTables kind. 

402 

403 Optional item size, shape and default value may be 

404 specified as the itemsize, shape and dflt 

405 arguments, respectively. Bear in mind that not all atoms support 

406 a default item size:: 

407 

408 >>> Atom.from_kind('int', itemsize=2, shape=(2, 2)) 

409 Int16Atom(shape=(2, 2), dflt=0) 

410 >>> Atom.from_kind('int', shape=(2, 2)) 

411 Int32Atom(shape=(2, 2), dflt=0) 

412 >>> Atom.from_kind('int', shape=1) 

413 Int32Atom(shape=(1,), dflt=0) 

414 >>> Atom.from_kind('string', dflt=b'hello') 

415 Traceback (most recent call last): 

416 ... 

417 ValueError: no default item size for kind ``string`` 

418 >>> Atom.from_kind('Float') 

419 Traceback (most recent call last): 

420 ... 

421 ValueError: unknown kind: 'Float' 

422 

423 Moreover, some kinds with atypical constructor signatures 

424 are not supported; you need to use the proper 

425 constructor:: 

426 

427 >>> Atom.from_kind('enum') #doctest: +ELLIPSIS 

428 Traceback (most recent call last): 

429 ... 

430 ValueError: the ``enum`` kind is not supported... 

431 

432 """ 

433 

434 kwargs = {'shape': shape} 

435 if kind not in atom_map: 

436 raise ValueError(f"unknown kind: {kind!r}") 

437 # This incompatibility detection may get out-of-date and is 

438 # too hard-wired, but I couldn't come up with something 

439 # smarter. -- Ivan (2007-02-08) 

440 if kind in ['enum']: 

441 raise ValueError("the ``%s`` kind is not supported; " 

442 "please use the appropriate constructor" 

443 % kind) 

444 # If no `itemsize` is given, try to get the default type of the 

445 # kind (which has a fixed item size). 

446 if itemsize is None: 

447 if kind not in deftype_from_kind: 

448 raise ValueError("no default item size for kind ``%s``" 

449 % kind) 

450 type_ = deftype_from_kind[kind] 

451 kind, itemsize = split_type(type_) 

452 kdata = atom_map[kind] 

453 # Look up the class and set a possible item size. 

454 if hasattr(kdata, 'kind'): # atom class: non-fixed item size 

455 atomclass = kdata 

456 kwargs['itemsize'] = itemsize 

457 else: # dictionary: fixed item size 

458 if itemsize not in kdata: 

459 raise _invalid_itemsize_error(kind, itemsize, kdata) 

460 atomclass = kdata[itemsize] 

461 # Only set a `dflt` argument if given (`None` may not be understood). 

462 if dflt is not None: 

463 kwargs['dflt'] = dflt 

464 

465 return atomclass(**kwargs) 

466 

467 @property 

468 def size(self): 

469 """Total size in bytes of the atom.""" 

470 return self.dtype.itemsize 

471 

472 @property 

473 def recarrtype(self): 

474 """String type to be used in numpy.rec.array().""" 

475 return str(self.dtype.shape) + self.dtype.base.str[1:] 

476 

477 @property 

478 def ndim(self): 

479 """The number of dimensions of the atom. 

480 

481 .. versionadded:: 2.4""" 

482 return len(self.shape) 

483 

484 def __init__(self, nptype, shape, dflt): 

485 if not hasattr(self, 'type'): 

486 raise NotImplementedError("``%s`` is an abstract class; " 

487 "please use one of its subclasses" 

488 % self.__class__.__name__) 

489 self.shape = shape = _normalize_shape(shape) 

490 """The shape of the atom (a tuple for scalar atoms).""" 

491 # Curiously enough, NumPy isn't generally able to accept NumPy 

492 # integers in a shape. ;( 

493 npshape = tuple(int(s) for s in shape) 

494 self.dtype = dtype = np.dtype((nptype, npshape)) 

495 """The NumPy dtype that most closely matches this atom.""" 

496 self.dflt = _normalize_default(dflt, dtype) 

497 """The default value of the atom. 

498 

499 If the user does not supply a value for an element while 

500 filling a dataset, this default value will be written to 

501 disk. If the user supplies a scalar value for a 

502 multidimensional atom, this value is automatically *broadcast* 

503 to all the items in the atom cell. If dflt is not supplied, an 

504 appropriate zero value (or *null* string) will be chosen by 

505 default. Please note that default values are kept internally 

506 as NumPy objects.""" 

507 

508 def __repr__(self): 

509 args = f'shape={self.shape}, dflt={self.dflt!r}' 

510 if not hasattr(self.__class__.itemsize, '__int__'): # non-fixed 

511 args = f'itemsize={self.itemsize}, {args}' 

512 return f'{self.__class__.__name__}({args})' 

513 

514 __eq__ = _cmp_dispatcher('_is_equal_to_atom') 

515 

516 def __ne__(self, other): 

517 return not self.__eq__(other) 

518 

519 # XXX: API incompatible change for PyTables 3 line 

520 # Overriding __eq__ blocks inheritance of __hash__ in 3.x 

521 # def __hash__(self): 

522 # return hash((self.__class__, self.type, self.shape, self.itemsize, 

523 # self.dflt)) 

524 

525 def copy(self, **override): 

526 """Get a copy of the atom, possibly overriding some arguments. 

527 

528 Constructor arguments to be overridden must be passed as 

529 keyword arguments:: 

530 

531 >>> atom1 = Int32Atom(shape=12) 

532 >>> atom2 = atom1.copy() 

533 >>> print(atom1) 

534 Int32Atom(shape=(12,), dflt=0) 

535 >>> print(atom2) 

536 Int32Atom(shape=(12,), dflt=0) 

537 >>> atom1 is atom2 

538 False 

539 >>> atom3 = atom1.copy(shape=(2, 2)) 

540 >>> print(atom3) 

541 Int32Atom(shape=(2, 2), dflt=0) 

542 >>> atom1.copy(foobar=42) #doctest: +ELLIPSIS 

543 Traceback (most recent call last): 

544 ... 

545 TypeError: ...__init__() got an unexpected keyword argument 'foobar' 

546 

547 """ 

548 newargs = self._get_init_args() 

549 newargs.update(override) 

550 return self.__class__(**newargs) 

551 

552 def _get_init_args(self): 

553 """Get a dictionary of instance constructor arguments. 

554 

555 This implementation works on classes which use the same names 

556 for both constructor arguments and instance attributes. 

557 

558 """ 

559 signature = inspect.signature(self.__init__) 

560 parameters = signature.parameters 

561 args = [arg for arg, p in parameters.items() 

562 if p.kind is p.POSITIONAL_OR_KEYWORD] 

563 

564 return {arg: getattr(self, arg) for arg in args if arg != 'self'} 

565 

566 def _is_equal_to_atom(self, atom): 

567 """Is this object equal to the given `atom`?""" 

568 

569 return (self.type == atom.type and self.shape == atom.shape 

570 and self.itemsize == atom.itemsize 

571 and np.all(self.dflt == atom.dflt)) 

572 

573 

574class StringAtom(Atom): 

575 """Defines an atom of type string. 

576 

577 The item size is the *maximum* length in characters of strings. 

578 

579 """ 

580 

581 kind = 'string' 

582 type = 'string' 

583 _defvalue = b'' 

584 

585 @property 

586 def itemsize(self): 

587 """Size in bytes of a sigle item in the atom.""" 

588 return self.dtype.base.itemsize 

589 

590 def __init__(self, itemsize, shape=(), dflt=_defvalue): 

591 if not hasattr(itemsize, '__int__') or int(itemsize) < 0: 

592 raise ValueError("invalid item size for kind ``%s``: %r; " 

593 "it must be a positive integer" 

594 % ('string', itemsize)) 

595 Atom.__init__(self, 'S%d' % itemsize, shape, dflt) 

596 

597 

598class BoolAtom(Atom): 

599 """Defines an atom of type bool.""" 

600 

601 kind = 'bool' 

602 itemsize = 1 

603 type = 'bool' 

604 _deftype = 'bool8' 

605 _defvalue = False 

606 

607 def __init__(self, shape=(), dflt=_defvalue): 

608 Atom.__init__(self, self.type, shape, dflt) 

609 

610 

611class IntAtom(Atom): 

612 """Defines an atom of a signed integral type (int kind).""" 

613 

614 kind = 'int' 

615 signed = True 

616 _deftype = 'int32' 

617 _defvalue = 0 

618 __init__ = _abstract_atom_init(_deftype, _defvalue) 

619 

620 

621class UIntAtom(Atom): 

622 """Defines an atom of an unsigned integral type (uint kind).""" 

623 

624 kind = 'uint' 

625 signed = False 

626 _deftype = 'uint32' 

627 _defvalue = 0 

628 __init__ = _abstract_atom_init(_deftype, _defvalue) 

629 

630 

631class FloatAtom(Atom): 

632 """Defines an atom of a floating point type (float kind).""" 

633 

634 kind = 'float' 

635 _deftype = 'float64' 

636 _defvalue = 0.0 

637 __init__ = _abstract_atom_init(_deftype, _defvalue) 

638 

639 

640def _create_numeric_class(baseclass, itemsize): 

641 """Create a numeric atom class with the given `baseclass` and an 

642 `itemsize`.""" 

643 

644 prefix = '%s%d' % (baseclass.prefix(), itemsize * 8) 

645 type_ = prefix.lower() 

646 classdict = {'itemsize': itemsize, 'type': type_, 

647 '__doc__': "Defines an atom of type ``%s``." % type_} 

648 

649 def __init__(self, shape=(), dflt=baseclass._defvalue): 

650 Atom.__init__(self, self.type, shape, dflt) 

651 classdict['__init__'] = __init__ 

652 return type('%sAtom' % prefix, (baseclass,), classdict) 

653 

654 

655Int8Atom = _create_numeric_class(IntAtom, 1) 

656Int16Atom = _create_numeric_class(IntAtom, 2) 

657Int32Atom = _create_numeric_class(IntAtom, 4) 

658Int64Atom = _create_numeric_class(IntAtom, 8) 

659UInt8Atom = _create_numeric_class(UIntAtom, 1) 

660UInt16Atom = _create_numeric_class(UIntAtom, 2) 

661UInt32Atom = _create_numeric_class(UIntAtom, 4) 

662UInt64Atom = _create_numeric_class(UIntAtom, 8) 

663 

664if hasattr(np, 'float16'): 

665 Float16Atom = _create_numeric_class(FloatAtom, 2) 

666Float32Atom = _create_numeric_class(FloatAtom, 4) 

667Float64Atom = _create_numeric_class(FloatAtom, 8) 

668if hasattr(np, 'float96'): 

669 Float96Atom = _create_numeric_class(FloatAtom, 12) 

670if hasattr(np, 'float128'): 

671 Float128Atom = _create_numeric_class(FloatAtom, 16) 

672 

673 

674class ComplexAtom(Atom): 

675 """Defines an atom of kind complex. 

676 

677 Allowed item sizes are 8 (single precision) and 16 (double precision). This 

678 class must be used instead of more concrete ones to avoid confusions with 

679 numarray-like precision specifications used in PyTables 1.X. 

680 

681 """ 

682 

683 # This definition is a little more complex (no pun intended) 

684 # because, although the complex kind is a normal numerical one, 

685 # the usage of bottom-level classes is artificially forbidden. 

686 # Everything will be back to normality when people has stopped 

687 # using the old bottom-level complex classes. 

688 

689 kind = 'complex' 

690 _deftype = 'complex128' 

691 _defvalue = 0j 

692 _isizes = [8, 16] 

693 

694 @property 

695 def itemsize(self): 

696 """Size in bytes of a sigle item in the atom.""" 

697 return self.dtype.base.itemsize 

698 

699 # Only instances have a `type` attribute, so complex types must be 

700 # registered by hand. 

701 all_types.add('complex64') 

702 all_types.add('complex128') 

703 if hasattr(np, 'complex192'): 

704 all_types.add('complex192') 

705 _isizes.append(24) 

706 if hasattr(np, 'complex256'): 

707 all_types.add('complex256') 

708 _isizes.append(32) 

709 

710 def __init__(self, itemsize, shape=(), dflt=_defvalue): 

711 if itemsize not in self._isizes: 

712 raise _invalid_itemsize_error('complex', itemsize, self._isizes) 

713 self.type = '%s%d' % (self.kind, itemsize * 8) 

714 Atom.__init__(self, self.type, shape, dflt) 

715 

716 

717class _ComplexErrorAtom(ComplexAtom, metaclass=type): 

718 """Reminds the user to stop using the old complex atom names.""" 

719 

720 def __init__(self, shape=(), dflt=ComplexAtom._defvalue): 

721 raise TypeError( 

722 "to avoid confusions with PyTables 1.X complex atom names, " 

723 "please use ``ComplexAtom(itemsize=N)``, " 

724 "where N=8 for single precision complex atoms, " 

725 "and N=16 for double precision complex atoms") 

726 

727 

728Complex32Atom = Complex64Atom = Complex128Atom = _ComplexErrorAtom 

729if hasattr(np, 'complex192'): 

730 Complex192Atom = _ComplexErrorAtom 

731if hasattr(np, 'complex256'): 

732 Complex256Atom = _ComplexErrorAtom 

733 

734 

735class TimeAtom(Atom): 

736 """Defines an atom of time type (time kind). 

737 

738 There are two distinct supported types of time: a 32 bit integer value and 

739 a 64 bit floating point value. Both of them reflect the number of seconds 

740 since the Unix epoch. This atom has the property of being stored using the 

741 HDF5 time datatypes. 

742 

743 """ 

744 

745 kind = 'time' 

746 _deftype = 'time32' 

747 _defvalue = 0 

748 __init__ = _abstract_atom_init(_deftype, _defvalue) 

749 

750 

751class Time32Atom(TimeAtom): 

752 """Defines an atom of type time32.""" 

753 

754 itemsize = 4 

755 type = 'time32' 

756 _defvalue = 0 

757 

758 def __init__(self, shape=(), dflt=_defvalue): 

759 Atom.__init__(self, 'int32', shape, dflt) 

760 

761 

762class Time64Atom(TimeAtom): 

763 """Defines an atom of type time64.""" 

764 

765 itemsize = 8 

766 type = 'time64' 

767 _defvalue = 0.0 

768 

769 def __init__(self, shape=(), dflt=_defvalue): 

770 Atom.__init__(self, 'float64', shape, dflt) 

771 

772 

773class EnumAtom(Atom): 

774 """Description of an atom of an enumerated type. 

775 

776 Instances of this class describe the atom type used to store enumerated 

777 values. Those values belong to an enumerated type, defined by the first 

778 argument (enum) in the constructor of the atom, which accepts the same 

779 kinds of arguments as the Enum class (see :ref:`EnumClassDescr`). The 

780 enumerated type is stored in the enum attribute of the atom. 

781 

782 A default value must be specified as the second argument (dflt) in the 

783 constructor; it must be the *name* (a string) of one of the enumerated 

784 values in the enumerated type. When the atom is created, the corresponding 

785 concrete value is broadcast and stored in the dflt attribute (setting 

786 different default values for items in a multidimensional atom is not 

787 supported yet). If the name does not match any value in the enumerated 

788 type, a KeyError is raised. 

789 

790 Another atom must be specified as the base argument in order to determine 

791 the base type used for storing the values of enumerated values in memory 

792 and disk. This *storage atom* is kept in the base attribute of the created 

793 atom. As a shorthand, you may specify a PyTables type instead of the 

794 storage atom, implying that this has a scalar shape. 

795 

796 The storage atom should be able to represent each and every concrete value 

797 in the enumeration. If it is not, a TypeError is raised. The default value 

798 of the storage atom is ignored. 

799 

800 The type attribute of enumerated atoms is always enum. 

801 

802 Enumerated atoms also support comparisons with other objects:: 

803 

804 >>> enum = ['T0', 'T1', 'T2'] 

805 >>> atom1 = EnumAtom(enum, 'T0', 'int8') # same as ``atom2`` 

806 >>> atom2 = EnumAtom(enum, 'T0', Int8Atom()) # same as ``atom1`` 

807 >>> atom3 = EnumAtom(enum, 'T0', 'int16') 

808 >>> atom4 = Int8Atom() 

809 >>> atom1 == enum 

810 False 

811 >>> atom1 == atom2 

812 True 

813 >>> atom2 != atom1 

814 False 

815 >>> atom1 == atom3 

816 False 

817 >>> atom1 == atom4 

818 False 

819 >>> atom4 != atom1 

820 True 

821 

822 Examples 

823 -------- 

824 

825 The next C enum construction:: 

826 

827 enum myEnum { 

828 T0, 

829 T1, 

830 T2 

831 }; 

832 

833 would correspond to the following PyTables 

834 declaration:: 

835 

836 >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', 'int32') 

837 

838 Please note the dflt argument with a value of 'T0'. Since the concrete 

839 value matching T0 is unknown right now (we have not used explicit concrete 

840 values), using the name is the only option left for defining a default 

841 value for the atom. 

842 

843 The chosen representation of values for this enumerated atom uses unsigned 

844 32-bit integers, which surely wastes quite a lot of memory. Another size 

845 could be selected by using the base argument (this time with a full-blown 

846 storage atom):: 

847 

848 >>> my_enum_atom = EnumAtom(['T0', 'T1', 'T2'], 'T0', UInt8Atom()) 

849 

850 You can also define multidimensional arrays for data elements:: 

851 

852 >>> my_enum_atom = EnumAtom( 

853 ... ['T0', 'T1', 'T2'], 'T0', base='uint32', shape=(3,2)) 

854 

855 for 3x2 arrays of uint32. 

856 

857 """ 

858 

859 # Registering this class in the class map may be a little wrong, 

860 # since the ``Atom.from_kind()`` method fails miserably with 

861 # enumerations, as they don't support an ``itemsize`` argument. 

862 # However, resetting ``__metaclass__`` to ``type`` doesn't seem to 

863 # work and I don't feel like creating a subclass of ``MetaAtom``. 

864 

865 kind = 'enum' 

866 type = 'enum' 

867 

868 @property 

869 def itemsize(self): 

870 """Size in bytes of a single item in the atom.""" 

871 return self.dtype.base.itemsize 

872 

873 def _checkbase(self, base): 

874 """Check the `base` storage atom.""" 

875 

876 if base.kind == 'enum': 

877 raise TypeError("can not use an enumerated atom " 

878 "as a storage atom: %r" % base) 

879 

880 # Check whether the storage atom can represent concrete values 

881 # in the enumeration... 

882 basedtype = base.dtype 

883 pyvalues = [value for (name, value) in self.enum] 

884 try: 

885 npgenvalues = np.array(pyvalues) 

886 except ValueError: 

887 raise TypeError("concrete values are not uniformly-shaped") 

888 try: 

889 npvalues = np.array(npgenvalues, dtype=basedtype.base) 

890 except ValueError: 

891 raise TypeError("storage atom type is incompatible with " 

892 "concrete values in the enumeration") 

893 if npvalues.shape[1:] != basedtype.shape: 

894 raise TypeError("storage atom shape does not match that of " 

895 "concrete values in the enumeration") 

896 if npvalues.tolist() != npgenvalues.tolist(): 

897 raise TypeError("storage atom type lacks precision for " 

898 "concrete values in the enumeration") 

899 

900 # ...with some implementation limitations. 

901 if npvalues.dtype.kind not in ['i', 'u']: 

902 raise NotImplementedError("only integer concrete values " 

903 "are supported for the moment, sorry") 

904 if len(npvalues.shape) > 1: 

905 raise NotImplementedError("only scalar concrete values " 

906 "are supported for the moment, sorry") 

907 

908 def _get_init_args(self): 

909 """Get a dictionary of instance constructor arguments.""" 

910 

911 return dict(enum=self.enum, dflt=self._defname, 

912 base=self.base, shape=self.shape) 

913 

914 def _is_equal_to_atom(self, atom): 

915 """Is this object equal to the given `atom`?""" 

916 

917 return False 

918 

919 def _is_equal_to_enumatom(self, enumatom): 

920 """Is this object equal to the given `enumatom`?""" 

921 

922 return (self.enum == enumatom.enum and self.shape == enumatom.shape 

923 and np.all(self.dflt == enumatom.dflt) 

924 and self.base == enumatom.base) 

925 

926 def __init__(self, enum, dflt, base, shape=()): 

927 if not isinstance(enum, Enum): 

928 enum = Enum(enum) 

929 self.enum = enum 

930 

931 if isinstance(base, str): 

932 base = Atom.from_type(base) 

933 self._checkbase(base) 

934 self.base = base 

935 

936 default = enum[dflt] # check default value 

937 self._defname = dflt # kept for representation purposes 

938 

939 # These are kept to ease dumping this particular 

940 # representation of the enumeration to storage. 

941 names, values = [], [] 

942 for (name, value) in enum: 

943 names.append(name) 

944 values.append(value) 

945 basedtype = self.base.dtype 

946 

947 self._names = names 

948 self._values = np.array(values, dtype=basedtype.base) 

949 

950 Atom.__init__(self, basedtype, shape, default) 

951 

952 def __repr__(self): 

953 return ('EnumAtom(enum=%r, dflt=%r, base=%r, shape=%r)' 

954 % (self.enum, self._defname, self.base, self.shape)) 

955 

956 __eq__ = _cmp_dispatcher('_is_equal_to_enumatom') 

957 

958 # XXX: API incompatible change for PyTables 3 line 

959 # Overriding __eq__ blocks inheritance of __hash__ in 3.x 

960 # def __hash__(self): 

961 # return hash((self.__class__, self.enum, self.shape, self.dflt, 

962 # self.base)) 

963 

964 

965class ReferenceAtom(Atom): 

966 """Defines an atom of type object to read references. 

967 This atom is read-only. 

968 """ 

969 

970 kind = 'reference' 

971 type = 'object' 

972 _deftype = 'NoneType' 

973 _defvalue = None 

974 

975 @property 

976 def itemsize(self): 

977 """Size in bytes of a single item in the atom.""" 

978 return self.dtype.base.itemsize 

979 

980 def __init__(self, shape=()): 

981 Atom.__init__(self, self.type, shape, self._defvalue) 

982 

983 def __repr__(self): 

984 return f'ReferenceAtom(shape={self.shape})' 

985 

986# Pseudo-atom classes 

987# =================== 

988# 

989# Now, there come three special classes, `ObjectAtom`, `VLStringAtom` 

990# and `VLUnicodeAtom`, that actually do not descend from `Atom`, but 

991# which goal is so similar that they should be described here. 

992# Pseudo-atoms can only be used with `VLArray` datasets, and they do 

993# not support multidimensional values, nor multiple values per row. 

994# 

995# They can be recognised because they also have ``kind``, ``type`` and 

996# ``shape`` attributes, but no ``size``, ``itemsize`` or ``dflt`` 

997# ones. Instead, they have a ``base`` atom which defines the elements 

998# used for storage. 

999# 

1000# See ``examples/vlarray1.py`` and ``examples/vlarray2.py`` for 

1001# further examples on `VLArray` datasets, including object 

1002# serialization and string management. 

1003 

1004 

1005class PseudoAtom: 

1006 """Pseudo-atoms can only be used in ``VLArray`` nodes. 

1007 

1008 They can be recognised because they also have `kind`, `type` and 

1009 `shape` attributes, but no `size`, `itemsize` or `dflt` ones. 

1010 Instead, they have a `base` atom which defines the elements used 

1011 for storage. 

1012 """ 

1013 

1014 def __repr__(self): 

1015 return '%s()' % self.__class__.__name__ 

1016 

1017 def toarray(self, object_): 

1018 """Convert an `object_` into an array of base atoms.""" 

1019 

1020 raise NotImplementedError 

1021 

1022 def fromarray(self, array): 

1023 """Convert an `array` of base atoms into an object.""" 

1024 

1025 raise NotImplementedError 

1026 

1027 

1028class _BufferedAtom(PseudoAtom): 

1029 """Pseudo-atom which stores data as a buffer (flat array of uints).""" 

1030 

1031 shape = () 

1032 

1033 def toarray(self, object_): 

1034 buffer_ = self._tobuffer(object_) 

1035 array = np.ndarray(buffer=buffer_, dtype=self.base.dtype, 

1036 shape=len(buffer_)) 

1037 return array 

1038 

1039 def _tobuffer(self, object_): 

1040 """Convert an `object_` into a buffer.""" 

1041 

1042 raise NotImplementedError 

1043 

1044 

1045class VLStringAtom(_BufferedAtom): 

1046 """Defines an atom of type ``vlstring``. 

1047 

1048 This class describes a *row* of the VLArray class, rather than an atom. It 

1049 differs from the StringAtom class in that you can only add *one instance of 

1050 it to one specific row*, i.e. the :meth:`VLArray.append` method only 

1051 accepts one object when the base atom is of this type. 

1052 

1053 This class stores bytestrings. It does not make assumptions on the 

1054 encoding of the string, and raw bytes are stored as is. To store a string 

1055 you will need to *explicitly* convert it to a bytestring before you can 

1056 save them:: 

1057 

1058 >>> s = 'A unicode string: hbar = \u210f' 

1059 >>> bytestring = s.encode('utf-8') 

1060 >>> VLArray.append(bytestring) # doctest: +SKIP 

1061 

1062 For full Unicode support, using VLUnicodeAtom (see :ref:`VLUnicodeAtom`) is 

1063 recommended. 

1064 

1065 Variable-length string atoms do not accept parameters and they cause the 

1066 reads of rows to always return Python bytestrings. You can regard vlstring 

1067 atoms as an easy way to save generic variable length strings. 

1068 

1069 """ 

1070 

1071 kind = 'vlstring' 

1072 type = 'vlstring' 

1073 base = UInt8Atom() 

1074 

1075 def _tobuffer(self, object_): 

1076 if isinstance(object_, str): 

1077 warnings.warn("Storing non bytestrings in VLStringAtom is " 

1078 "deprecated.", DeprecationWarning) 

1079 elif not isinstance(object_, bytes): 

1080 raise TypeError(f"object is not a string: {object_!r}") 

1081 return np.string_(object_) 

1082 

1083 def fromarray(self, array): 

1084 return array.tobytes() 

1085 

1086 

1087class VLUnicodeAtom(_BufferedAtom): 

1088 """Defines an atom of type vlunicode. 

1089 

1090 This class describes a *row* of the VLArray class, rather than an atom. It 

1091 is very similar to VLStringAtom (see :ref:`VLStringAtom`), but it stores 

1092 Unicode strings (using 32-bit characters a la UCS-4, so all strings of the 

1093 same length also take up the same space). 

1094 

1095 This class does not make assumptions on the encoding of plain input 

1096 strings. Plain strings are supported as long as no character is out of the 

1097 ASCII set; otherwise, you will need to *explicitly* convert them to Unicode 

1098 before you can save them. 

1099 

1100 Variable-length Unicode atoms do not accept parameters and they cause the 

1101 reads of rows to always return Python Unicode strings. You can regard 

1102 vlunicode atoms as an easy way to save variable length Unicode strings. 

1103 

1104 """ 

1105 

1106 kind = 'vlunicode' 

1107 type = 'vlunicode' 

1108 base = UInt32Atom() 

1109 

1110 # numpy.unicode_ no more implements the buffer interface in Python 3 

1111 # 

1112 # When the Python build is UCS-2, we need to promote the 

1113 # Unicode string to UCS-4. We *must* use a 0-d array since 

1114 # NumPy scalars inherit the UCS-2 encoding from Python (see 

1115 # NumPy ticket #525). Since ``_tobuffer()`` can't return an 

1116 # array, we must override ``toarray()`` itself. 

1117 def toarray(self, object_): 

1118 if isinstance(object_, bytes): 

1119 warnings.warn("Storing bytestrings in VLUnicodeAtom is " 

1120 "deprecated.", DeprecationWarning) 

1121 elif not isinstance(object_, str): 

1122 raise TypeError(f"object is not a string: {object_!r}") 

1123 ustr = str(object_) 

1124 uarr = np.array(ustr, dtype='U') 

1125 return np.ndarray( 

1126 buffer=uarr, dtype=self.base.dtype, shape=len(ustr)) 

1127 

1128 def _tobuffer(self, object_): 

1129 # This works (and is used) only with UCS-4 builds of Python, 

1130 # where the width of the internal representation of a 

1131 # character matches that of the base atoms. 

1132 if isinstance(object_, bytes): 

1133 warnings.warn("Storing bytestrings in VLUnicodeAtom is " 

1134 "deprecated.", DeprecationWarning) 

1135 elif not isinstance(object_, str): 

1136 raise TypeError(f"object is not a string: {object_!r}") 

1137 return np.unicode_(object_) 

1138 

1139 def fromarray(self, array): 

1140 length = len(array) 

1141 if length == 0: 

1142 return '' # ``array.view('U0')`` raises a `TypeError` 

1143 return array.view('U%d' % length).item() 

1144 

1145 

1146class ObjectAtom(_BufferedAtom): 

1147 """Defines an atom of type object. 

1148 

1149 This class is meant to fit *any* kind of Python object in a row of a 

1150 VLArray dataset by using pickle behind the scenes. Due to the fact that 

1151 you can not foresee how long will be the output of the pickle 

1152 serialization (i.e. the atom already has a *variable* length), you can only 

1153 fit *one object per row*. However, you can still group several objects in a 

1154 single tuple or list and pass it to the :meth:`VLArray.append` method. 

1155 

1156 Object atoms do not accept parameters and they cause the reads of rows to 

1157 always return Python objects. You can regard object atoms as an easy way to 

1158 save an arbitrary number of generic Python objects in a VLArray dataset. 

1159 

1160 """ 

1161 

1162 kind = 'object' 

1163 type = 'object' 

1164 base = UInt8Atom() 

1165 

1166 def _tobuffer(self, object_): 

1167 return pickle.dumps(object_, pickle.HIGHEST_PROTOCOL) 

1168 

1169 def fromarray(self, array): 

1170 # We have to check for an empty array because of a possible 

1171 # bug in HDF5 which makes it claim that a dataset has one 

1172 # record when in fact it is empty. 

1173 if array.size == 0: 

1174 return None 

1175 return pickle.loads(array.tobytes())