1"""
2This module contains a set of functions for record arrays.
3"""
4import os
5import warnings
6from collections import Counter
7from contextlib import nullcontext
8
9from .._utils import set_module
10from . import numeric as sb
11from . import numerictypes as nt
12from .arrayprint import _get_legacy_print_mode
13
14# All of the functions allow formats to be a dtype
15__all__ = [
16 'record', 'recarray', 'format_parser', 'fromarrays', 'fromrecords',
17 'fromstring', 'fromfile', 'array', 'find_duplicate',
18]
19
20
21ndarray = sb.ndarray
22
23_byteorderconv = {'b': '>',
24 'l': '<',
25 'n': '=',
26 'B': '>',
27 'L': '<',
28 'N': '=',
29 'S': 's',
30 's': 's',
31 '>': '>',
32 '<': '<',
33 '=': '=',
34 '|': '|',
35 'I': '|',
36 'i': '|'}
37
38# formats regular expression
39# allows multidimensional spec with a tuple syntax in front
40# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
41# are equally allowed
42
43numfmt = nt.sctypeDict
44
45
46@set_module('numpy.rec')
47def find_duplicate(list):
48 """Find duplication in a list, return a list of duplicated elements"""
49 return [
50 item
51 for item, counts in Counter(list).items()
52 if counts > 1
53 ]
54
55
56@set_module('numpy.rec')
57class format_parser:
58 """
59 Class to convert formats, names, titles description to a dtype.
60
61 After constructing the format_parser object, the dtype attribute is
62 the converted data-type:
63 ``dtype = format_parser(formats, names, titles).dtype``
64
65 Attributes
66 ----------
67 dtype : dtype
68 The converted data-type.
69
70 Parameters
71 ----------
72 formats : str or list of str
73 The format description, either specified as a string with
74 comma-separated format descriptions in the form ``'f8, i4, S5'``, or
75 a list of format description strings in the form
76 ``['f8', 'i4', 'S5']``.
77 names : str or list/tuple of str
78 The field names, either specified as a comma-separated string in the
79 form ``'col1, col2, col3'``, or as a list or tuple of strings in the
80 form ``['col1', 'col2', 'col3']``.
81 An empty list can be used, in that case default field names
82 ('f0', 'f1', ...) are used.
83 titles : sequence
84 Sequence of title strings. An empty list can be used to leave titles
85 out.
86 aligned : bool, optional
87 If True, align the fields by padding as the C-compiler would.
88 Default is False.
89 byteorder : str, optional
90 If specified, all the fields will be changed to the
91 provided byte-order. Otherwise, the default byte-order is
92 used. For all available string specifiers, see `dtype.newbyteorder`.
93
94 See Also
95 --------
96 numpy.dtype, numpy.typename
97
98 Examples
99 --------
100 >>> import numpy as np
101 >>> np.rec.format_parser(['<f8', '<i4'], ['col1', 'col2'],
102 ... ['T1', 'T2']).dtype
103 dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4')])
104
105 `names` and/or `titles` can be empty lists. If `titles` is an empty list,
106 titles will simply not appear. If `names` is empty, default field names
107 will be used.
108
109 >>> np.rec.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
110 ... []).dtype
111 dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
112 >>> np.rec.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
113 dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
114
115 """
116
117 def __init__(self, formats, names, titles, aligned=False, byteorder=None):
118 self._parseFormats(formats, aligned)
119 self._setfieldnames(names, titles)
120 self._createdtype(byteorder)
121
122 def _parseFormats(self, formats, aligned=False):
123 """ Parse the field formats """
124
125 if formats is None:
126 raise ValueError("Need formats argument")
127 if isinstance(formats, list):
128 dtype = sb.dtype(
129 [
130 ('f{}'.format(i), format_)
131 for i, format_ in enumerate(formats)
132 ],
133 aligned,
134 )
135 else:
136 dtype = sb.dtype(formats, aligned)
137 fields = dtype.fields
138 if fields is None:
139 dtype = sb.dtype([('f1', dtype)], aligned)
140 fields = dtype.fields
141 keys = dtype.names
142 self._f_formats = [fields[key][0] for key in keys]
143 self._offsets = [fields[key][1] for key in keys]
144 self._nfields = len(keys)
145
146 def _setfieldnames(self, names, titles):
147 """convert input field names into a list and assign to the _names
148 attribute """
149
150 if names:
151 if type(names) in [list, tuple]:
152 pass
153 elif isinstance(names, str):
154 names = names.split(',')
155 else:
156 raise NameError("illegal input names %s" % repr(names))
157
158 self._names = [n.strip() for n in names[:self._nfields]]
159 else:
160 self._names = []
161
162 # if the names are not specified, they will be assigned as
163 # "f0, f1, f2,..."
164 # if not enough names are specified, they will be assigned as "f[n],
165 # f[n+1],..." etc. where n is the number of specified names..."
166 self._names += ['f%d' % i for i in range(len(self._names),
167 self._nfields)]
168 # check for redundant names
169 _dup = find_duplicate(self._names)
170 if _dup:
171 raise ValueError("Duplicate field names: %s" % _dup)
172
173 if titles:
174 self._titles = [n.strip() for n in titles[:self._nfields]]
175 else:
176 self._titles = []
177 titles = []
178
179 if self._nfields > len(titles):
180 self._titles += [None] * (self._nfields - len(titles))
181
182 def _createdtype(self, byteorder):
183 dtype = sb.dtype({
184 'names': self._names,
185 'formats': self._f_formats,
186 'offsets': self._offsets,
187 'titles': self._titles,
188 })
189 if byteorder is not None:
190 byteorder = _byteorderconv[byteorder[0]]
191 dtype = dtype.newbyteorder(byteorder)
192
193 self.dtype = dtype
194
195
196class record(nt.void):
197 """A data-type scalar that allows field access as attribute lookup.
198 """
199
200 # manually set name and module so that this class's type shows up
201 # as numpy.record when printed
202 __name__ = 'record'
203 __module__ = 'numpy'
204
205 def __repr__(self):
206 if _get_legacy_print_mode() <= 113:
207 return self.__str__()
208 return super().__repr__()
209
210 def __str__(self):
211 if _get_legacy_print_mode() <= 113:
212 return str(self.item())
213 return super().__str__()
214
215 def __getattribute__(self, attr):
216 if attr in ('setfield', 'getfield', 'dtype'):
217 return nt.void.__getattribute__(self, attr)
218 try:
219 return nt.void.__getattribute__(self, attr)
220 except AttributeError:
221 pass
222 fielddict = nt.void.__getattribute__(self, 'dtype').fields
223 res = fielddict.get(attr, None)
224 if res:
225 obj = self.getfield(*res[:2])
226 # if it has fields return a record,
227 # otherwise return the object
228 try:
229 dt = obj.dtype
230 except AttributeError:
231 #happens if field is Object type
232 return obj
233 if dt.names is not None:
234 return obj.view((self.__class__, obj.dtype))
235 return obj
236 else:
237 raise AttributeError("'record' object has no "
238 "attribute '%s'" % attr)
239
240 def __setattr__(self, attr, val):
241 if attr in ('setfield', 'getfield', 'dtype'):
242 raise AttributeError("Cannot set '%s' attribute" % attr)
243 fielddict = nt.void.__getattribute__(self, 'dtype').fields
244 res = fielddict.get(attr, None)
245 if res:
246 return self.setfield(val, *res[:2])
247 else:
248 if getattr(self, attr, None):
249 return nt.void.__setattr__(self, attr, val)
250 else:
251 raise AttributeError("'record' object has no "
252 "attribute '%s'" % attr)
253
254 def __getitem__(self, indx):
255 obj = nt.void.__getitem__(self, indx)
256
257 # copy behavior of record.__getattribute__,
258 if isinstance(obj, nt.void) and obj.dtype.names is not None:
259 return obj.view((self.__class__, obj.dtype))
260 else:
261 # return a single element
262 return obj
263
264 def pprint(self):
265 """Pretty-print all fields."""
266 # pretty-print all fields
267 names = self.dtype.names
268 maxlen = max(len(name) for name in names)
269 fmt = '%% %ds: %%s' % maxlen
270 rows = [fmt % (name, getattr(self, name)) for name in names]
271 return "\n".join(rows)
272
273# The recarray is almost identical to a standard array (which supports
274# named fields already) The biggest difference is that it can use
275# attribute-lookup to find the fields and it is constructed using
276# a record.
277
278# If byteorder is given it forces a particular byteorder on all
279# the fields (and any subfields)
280
281
282@set_module("numpy.rec")
283class recarray(ndarray):
284 """Construct an ndarray that allows field access using attributes.
285
286 Arrays may have a data-types containing fields, analogous
287 to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
288 where each entry in the array is a pair of ``(int, float)``. Normally,
289 these attributes are accessed using dictionary lookups such as ``arr['x']``
290 and ``arr['y']``. Record arrays allow the fields to be accessed as members
291 of the array, using ``arr.x`` and ``arr.y``.
292
293 Parameters
294 ----------
295 shape : tuple
296 Shape of output array.
297 dtype : data-type, optional
298 The desired data-type. By default, the data-type is determined
299 from `formats`, `names`, `titles`, `aligned` and `byteorder`.
300 formats : list of data-types, optional
301 A list containing the data-types for the different columns, e.g.
302 ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
303 convention of using types directly, i.e. ``(int, float, int)``.
304 Note that `formats` must be a list, not a tuple.
305 Given that `formats` is somewhat limited, we recommend specifying
306 `dtype` instead.
307 names : tuple of str, optional
308 The name of each column, e.g. ``('x', 'y', 'z')``.
309 buf : buffer, optional
310 By default, a new array is created of the given shape and data-type.
311 If `buf` is specified and is an object exposing the buffer interface,
312 the array will use the memory from the existing buffer. In this case,
313 the `offset` and `strides` keywords are available.
314
315 Other Parameters
316 ----------------
317 titles : tuple of str, optional
318 Aliases for column names. For example, if `names` were
319 ``('x', 'y', 'z')`` and `titles` is
320 ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
321 ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
322 byteorder : {'<', '>', '='}, optional
323 Byte-order for all fields.
324 aligned : bool, optional
325 Align the fields in memory as the C-compiler would.
326 strides : tuple of ints, optional
327 Buffer (`buf`) is interpreted according to these strides (strides
328 define how many bytes each array element, row, column, etc.
329 occupy in memory).
330 offset : int, optional
331 Start reading buffer (`buf`) from this offset onwards.
332 order : {'C', 'F'}, optional
333 Row-major (C-style) or column-major (Fortran-style) order.
334
335 Returns
336 -------
337 rec : recarray
338 Empty array of the given shape and type.
339
340 See Also
341 --------
342 numpy.rec.fromrecords : Construct a record array from data.
343 numpy.record : fundamental data-type for `recarray`.
344 numpy.rec.format_parser : determine data-type from formats, names, titles.
345
346 Notes
347 -----
348 This constructor can be compared to ``empty``: it creates a new record
349 array but does not fill it with data. To create a record array from data,
350 use one of the following methods:
351
352 1. Create a standard ndarray and convert it to a record array,
353 using ``arr.view(np.recarray)``
354 2. Use the `buf` keyword.
355 3. Use `np.rec.fromrecords`.
356
357 Examples
358 --------
359 Create an array with two fields, ``x`` and ``y``:
360
361 >>> import numpy as np
362 >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
363 >>> x
364 array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
365
366 >>> x['x']
367 array([1., 3.])
368
369 View the array as a record array:
370
371 >>> x = x.view(np.recarray)
372
373 >>> x.x
374 array([1., 3.])
375
376 >>> x.y
377 array([2, 4])
378
379 Create a new, empty record array:
380
381 >>> np.recarray((2,),
382 ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
383 rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
384 (3471280, 1.2134086255804012e-316, 0)],
385 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
386
387 """
388
389 def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
390 formats=None, names=None, titles=None,
391 byteorder=None, aligned=False, order='C'):
392
393 if dtype is not None:
394 descr = sb.dtype(dtype)
395 else:
396 descr = format_parser(
397 formats, names, titles, aligned, byteorder
398 ).dtype
399
400 if buf is None:
401 self = ndarray.__new__(
402 subtype, shape, (record, descr), order=order
403 )
404 else:
405 self = ndarray.__new__(
406 subtype, shape, (record, descr), buffer=buf,
407 offset=offset, strides=strides, order=order
408 )
409 return self
410
411 def __array_finalize__(self, obj):
412 if self.dtype.type is not record and self.dtype.names is not None:
413 # if self.dtype is not np.record, invoke __setattr__ which will
414 # convert it to a record if it is a void dtype.
415 self.dtype = self.dtype
416
417 def __getattribute__(self, attr):
418 # See if ndarray has this attr, and return it if so. (note that this
419 # means a field with the same name as an ndarray attr cannot be
420 # accessed by attribute).
421 try:
422 return object.__getattribute__(self, attr)
423 except AttributeError: # attr must be a fieldname
424 pass
425
426 # look for a field with this name
427 fielddict = ndarray.__getattribute__(self, 'dtype').fields
428 try:
429 res = fielddict[attr][:2]
430 except (TypeError, KeyError) as e:
431 raise AttributeError("recarray has no attribute %s" % attr) from e
432 obj = self.getfield(*res)
433
434 # At this point obj will always be a recarray, since (see
435 # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
436 # non-structured, convert it to an ndarray. Then if obj is structured
437 # with void type convert it to the same dtype.type (eg to preserve
438 # numpy.record type if present), since nested structured fields do not
439 # inherit type. Don't do this for non-void structures though.
440 if obj.dtype.names is not None:
441 if issubclass(obj.dtype.type, nt.void):
442 return obj.view(dtype=(self.dtype.type, obj.dtype))
443 return obj
444 else:
445 return obj.view(ndarray)
446
447 # Save the dictionary.
448 # If the attr is a field name and not in the saved dictionary
449 # Undo any "setting" of the attribute and do a setfield
450 # Thus, you can't create attributes on-the-fly that are field names.
451 def __setattr__(self, attr, val):
452
453 # Automatically convert (void) structured types to records
454 # (but not non-void structures, subarrays, or non-structured voids)
455 if (
456 attr == 'dtype' and
457 issubclass(val.type, nt.void) and
458 val.names is not None
459 ):
460 val = sb.dtype((record, val))
461
462 newattr = attr not in self.__dict__
463 try:
464 ret = object.__setattr__(self, attr, val)
465 except Exception:
466 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
467 if attr not in fielddict:
468 raise
469 else:
470 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
471 if attr not in fielddict:
472 return ret
473 if newattr:
474 # We just added this one or this setattr worked on an
475 # internal attribute.
476 try:
477 object.__delattr__(self, attr)
478 except Exception:
479 return ret
480 try:
481 res = fielddict[attr][:2]
482 except (TypeError, KeyError) as e:
483 raise AttributeError(
484 "record array has no attribute %s" % attr
485 ) from e
486 return self.setfield(val, *res)
487
488 def __getitem__(self, indx):
489 obj = super().__getitem__(indx)
490
491 # copy behavior of getattr, except that here
492 # we might also be returning a single element
493 if isinstance(obj, ndarray):
494 if obj.dtype.names is not None:
495 obj = obj.view(type(self))
496 if issubclass(obj.dtype.type, nt.void):
497 return obj.view(dtype=(self.dtype.type, obj.dtype))
498 return obj
499 else:
500 return obj.view(type=ndarray)
501 else:
502 # return a single element
503 return obj
504
505 def __repr__(self):
506
507 repr_dtype = self.dtype
508 if (
509 self.dtype.type is record or
510 not issubclass(self.dtype.type, nt.void)
511 ):
512 # If this is a full record array (has numpy.record dtype),
513 # or if it has a scalar (non-void) dtype with no records,
514 # represent it using the rec.array function. Since rec.array
515 # converts dtype to a numpy.record for us, convert back
516 # to non-record before printing
517 if repr_dtype.type is record:
518 repr_dtype = sb.dtype((nt.void, repr_dtype))
519 prefix = "rec.array("
520 fmt = 'rec.array(%s,%sdtype=%s)'
521 else:
522 # otherwise represent it using np.array plus a view
523 # This should only happen if the user is playing
524 # strange games with dtypes.
525 prefix = "array("
526 fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
527
528 # get data/shape string. logic taken from numeric.array_repr
529 if self.size > 0 or self.shape == (0,):
530 lst = sb.array2string(
531 self, separator=', ', prefix=prefix, suffix=',')
532 else:
533 # show zero-length shape unless it is (0,)
534 lst = "[], shape=%s" % (repr(self.shape),)
535
536 lf = '\n'+' '*len(prefix)
537 if _get_legacy_print_mode() <= 113:
538 lf = ' ' + lf # trailing space
539 return fmt % (lst, lf, repr_dtype)
540
541 def field(self, attr, val=None):
542 if isinstance(attr, int):
543 names = ndarray.__getattribute__(self, 'dtype').names
544 attr = names[attr]
545
546 fielddict = ndarray.__getattribute__(self, 'dtype').fields
547
548 res = fielddict[attr][:2]
549
550 if val is None:
551 obj = self.getfield(*res)
552 if obj.dtype.names is not None:
553 return obj
554 return obj.view(ndarray)
555 else:
556 return self.setfield(val, *res)
557
558
559def _deprecate_shape_0_as_None(shape):
560 if shape == 0:
561 warnings.warn(
562 "Passing `shape=0` to have the shape be inferred is deprecated, "
563 "and in future will be equivalent to `shape=(0,)`. To infer "
564 "the shape and suppress this warning, pass `shape=None` instead.",
565 FutureWarning, stacklevel=3)
566 return None
567 else:
568 return shape
569
570
571@set_module("numpy.rec")
572def fromarrays(arrayList, dtype=None, shape=None, formats=None,
573 names=None, titles=None, aligned=False, byteorder=None):
574 """Create a record array from a (flat) list of arrays
575
576 Parameters
577 ----------
578 arrayList : list or tuple
579 List of array-like objects (such as lists, tuples,
580 and ndarrays).
581 dtype : data-type, optional
582 valid dtype for all arrays
583 shape : int or tuple of ints, optional
584 Shape of the resulting array. If not provided, inferred from
585 ``arrayList[0]``.
586 formats, names, titles, aligned, byteorder :
587 If `dtype` is ``None``, these arguments are passed to
588 `numpy.rec.format_parser` to construct a dtype. See that function for
589 detailed documentation.
590
591 Returns
592 -------
593 np.recarray
594 Record array consisting of given arrayList columns.
595
596 Examples
597 --------
598 >>> x1=np.array([1,2,3,4])
599 >>> x2=np.array(['a','dd','xyz','12'])
600 >>> x3=np.array([1.1,2,3,4])
601 >>> r = np.rec.fromarrays([x1,x2,x3],names='a,b,c')
602 >>> print(r[1])
603 (2, 'dd', 2.0) # may vary
604 >>> x1[1]=34
605 >>> r.a
606 array([1, 2, 3, 4])
607
608 >>> x1 = np.array([1, 2, 3, 4])
609 >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
610 >>> x3 = np.array([1.1, 2, 3,4])
611 >>> r = np.rec.fromarrays(
612 ... [x1, x2, x3],
613 ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
614 >>> r
615 rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
616 (4, b'12', 4. )],
617 dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
618 """
619
620 arrayList = [sb.asarray(x) for x in arrayList]
621
622 # NumPy 1.19.0, 2020-01-01
623 shape = _deprecate_shape_0_as_None(shape)
624
625 if shape is None:
626 shape = arrayList[0].shape
627 elif isinstance(shape, int):
628 shape = (shape,)
629
630 if formats is None and dtype is None:
631 # go through each object in the list to see if it is an ndarray
632 # and determine the formats.
633 formats = [obj.dtype for obj in arrayList]
634
635 if dtype is not None:
636 descr = sb.dtype(dtype)
637 else:
638 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
639 _names = descr.names
640
641 # Determine shape from data-type.
642 if len(descr) != len(arrayList):
643 raise ValueError("mismatch between the number of fields "
644 "and the number of arrays")
645
646 d0 = descr[0].shape
647 nn = len(d0)
648 if nn > 0:
649 shape = shape[:-nn]
650
651 _array = recarray(shape, descr)
652
653 # populate the record array (makes a copy)
654 for k, obj in enumerate(arrayList):
655 nn = descr[k].ndim
656 testshape = obj.shape[:obj.ndim - nn]
657 name = _names[k]
658 if testshape != shape:
659 raise ValueError(f'array-shape mismatch in array {k} ("{name}")')
660
661 _array[name] = obj
662
663 return _array
664
665
666@set_module("numpy.rec")
667def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
668 titles=None, aligned=False, byteorder=None):
669 """Create a recarray from a list of records in text form.
670
671 Parameters
672 ----------
673 recList : sequence
674 data in the same field may be heterogeneous - they will be promoted
675 to the highest data type.
676 dtype : data-type, optional
677 valid dtype for all arrays
678 shape : int or tuple of ints, optional
679 shape of each array.
680 formats, names, titles, aligned, byteorder :
681 If `dtype` is ``None``, these arguments are passed to
682 `numpy.format_parser` to construct a dtype. See that function for
683 detailed documentation.
684
685 If both `formats` and `dtype` are None, then this will auto-detect
686 formats. Use list of tuples rather than list of lists for faster
687 processing.
688
689 Returns
690 -------
691 np.recarray
692 record array consisting of given recList rows.
693
694 Examples
695 --------
696 >>> r=np.rec.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
697 ... names='col1,col2,col3')
698 >>> print(r[0])
699 (456, 'dbe', 1.2)
700 >>> r.col1
701 array([456, 2])
702 >>> r.col2
703 array(['dbe', 'de'], dtype='<U3')
704 >>> import pickle
705 >>> pickle.loads(pickle.dumps(r))
706 rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
707 dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
708 """
709
710 if formats is None and dtype is None: # slower
711 obj = sb.array(recList, dtype=object)
712 arrlist = [
713 sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])
714 ]
715 return fromarrays(arrlist, formats=formats, shape=shape, names=names,
716 titles=titles, aligned=aligned, byteorder=byteorder)
717
718 if dtype is not None:
719 descr = sb.dtype((record, dtype))
720 else:
721 descr = format_parser(
722 formats, names, titles, aligned, byteorder
723 ).dtype
724
725 try:
726 retval = sb.array(recList, dtype=descr)
727 except (TypeError, ValueError):
728 # NumPy 1.19.0, 2020-01-01
729 shape = _deprecate_shape_0_as_None(shape)
730 if shape is None:
731 shape = len(recList)
732 if isinstance(shape, int):
733 shape = (shape,)
734 if len(shape) > 1:
735 raise ValueError("Can only deal with 1-d array.")
736 _array = recarray(shape, descr)
737 for k in range(_array.size):
738 _array[k] = tuple(recList[k])
739 # list of lists instead of list of tuples ?
740 # 2018-02-07, 1.14.1
741 warnings.warn(
742 "fromrecords expected a list of tuples, may have received a list "
743 "of lists instead. In the future that will raise an error",
744 FutureWarning, stacklevel=2)
745 return _array
746 else:
747 if shape is not None and retval.shape != shape:
748 retval.shape = shape
749
750 res = retval.view(recarray)
751
752 return res
753
754
755@set_module("numpy.rec")
756def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
757 names=None, titles=None, aligned=False, byteorder=None):
758 r"""Create a record array from binary data
759
760 Note that despite the name of this function it does not accept `str`
761 instances.
762
763 Parameters
764 ----------
765 datastring : bytes-like
766 Buffer of binary data
767 dtype : data-type, optional
768 Valid dtype for all arrays
769 shape : int or tuple of ints, optional
770 Shape of each array.
771 offset : int, optional
772 Position in the buffer to start reading from.
773 formats, names, titles, aligned, byteorder :
774 If `dtype` is ``None``, these arguments are passed to
775 `numpy.format_parser` to construct a dtype. See that function for
776 detailed documentation.
777
778
779 Returns
780 -------
781 np.recarray
782 Record array view into the data in datastring. This will be readonly
783 if `datastring` is readonly.
784
785 See Also
786 --------
787 numpy.frombuffer
788
789 Examples
790 --------
791 >>> a = b'\x01\x02\x03abc'
792 >>> np.rec.fromstring(a, dtype='u1,u1,u1,S3')
793 rec.array([(1, 2, 3, b'abc')],
794 dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
795
796 >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
797 ... ('GradeLevel', np.int32)]
798 >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
799 ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
800 >>> np.rec.fromstring(grades_array.tobytes(), dtype=grades_dtype)
801 rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
802 dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
803
804 >>> s = '\x01\x02\x03abc'
805 >>> np.rec.fromstring(s, dtype='u1,u1,u1,S3')
806 Traceback (most recent call last):
807 ...
808 TypeError: a bytes-like object is required, not 'str'
809 """
810
811 if dtype is None and formats is None:
812 raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
813
814 if dtype is not None:
815 descr = sb.dtype(dtype)
816 else:
817 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
818
819 itemsize = descr.itemsize
820
821 # NumPy 1.19.0, 2020-01-01
822 shape = _deprecate_shape_0_as_None(shape)
823
824 if shape in (None, -1):
825 shape = (len(datastring) - offset) // itemsize
826
827 _array = recarray(shape, descr, buf=datastring, offset=offset)
828 return _array
829
830def get_remaining_size(fd):
831 pos = fd.tell()
832 try:
833 fd.seek(0, 2)
834 return fd.tell() - pos
835 finally:
836 fd.seek(pos, 0)
837
838
839@set_module("numpy.rec")
840def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
841 names=None, titles=None, aligned=False, byteorder=None):
842 """Create an array from binary file data
843
844 Parameters
845 ----------
846 fd : str or file type
847 If file is a string or a path-like object then that file is opened,
848 else it is assumed to be a file object. The file object must
849 support random access (i.e. it must have tell and seek methods).
850 dtype : data-type, optional
851 valid dtype for all arrays
852 shape : int or tuple of ints, optional
853 shape of each array.
854 offset : int, optional
855 Position in the file to start reading from.
856 formats, names, titles, aligned, byteorder :
857 If `dtype` is ``None``, these arguments are passed to
858 `numpy.format_parser` to construct a dtype. See that function for
859 detailed documentation
860
861 Returns
862 -------
863 np.recarray
864 record array consisting of data enclosed in file.
865
866 Examples
867 --------
868 >>> from tempfile import TemporaryFile
869 >>> a = np.empty(10,dtype='f8,i4,a5')
870 >>> a[5] = (0.5,10,'abcde')
871 >>>
872 >>> fd=TemporaryFile()
873 >>> a = a.view(a.dtype.newbyteorder('<'))
874 >>> a.tofile(fd)
875 >>>
876 >>> _ = fd.seek(0)
877 >>> r=np.rec.fromfile(fd, formats='f8,i4,a5', shape=10,
878 ... byteorder='<')
879 >>> print(r[5])
880 (0.5, 10, b'abcde')
881 >>> r.shape
882 (10,)
883 """
884
885 if dtype is None and formats is None:
886 raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
887
888 # NumPy 1.19.0, 2020-01-01
889 shape = _deprecate_shape_0_as_None(shape)
890
891 if shape is None:
892 shape = (-1,)
893 elif isinstance(shape, int):
894 shape = (shape,)
895
896 if hasattr(fd, 'readinto'):
897 # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase
898 # interface. Example of fd: gzip, BytesIO, BufferedReader
899 # file already opened
900 ctx = nullcontext(fd)
901 else:
902 # open file
903 ctx = open(os.fspath(fd), 'rb')
904
905 with ctx as fd:
906 if offset > 0:
907 fd.seek(offset, 1)
908 size = get_remaining_size(fd)
909
910 if dtype is not None:
911 descr = sb.dtype(dtype)
912 else:
913 descr = format_parser(
914 formats, names, titles, aligned, byteorder
915 ).dtype
916
917 itemsize = descr.itemsize
918
919 shapeprod = sb.array(shape).prod(dtype=nt.intp)
920 shapesize = shapeprod * itemsize
921 if shapesize < 0:
922 shape = list(shape)
923 shape[shape.index(-1)] = size // -shapesize
924 shape = tuple(shape)
925 shapeprod = sb.array(shape).prod(dtype=nt.intp)
926
927 nbytes = shapeprod * itemsize
928
929 if nbytes > size:
930 raise ValueError(
931 "Not enough bytes left in file for specified "
932 "shape and type."
933 )
934
935 # create the array
936 _array = recarray(shape, descr)
937 nbytesread = fd.readinto(_array.data)
938 if nbytesread != nbytes:
939 raise OSError("Didn't read as many bytes as expected")
940
941 return _array
942
943
944@set_module("numpy.rec")
945def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
946 names=None, titles=None, aligned=False, byteorder=None, copy=True):
947 """
948 Construct a record array from a wide-variety of objects.
949
950 A general-purpose record array constructor that dispatches to the
951 appropriate `recarray` creation function based on the inputs (see Notes).
952
953 Parameters
954 ----------
955 obj : any
956 Input object. See Notes for details on how various input types are
957 treated.
958 dtype : data-type, optional
959 Valid dtype for array.
960 shape : int or tuple of ints, optional
961 Shape of each array.
962 offset : int, optional
963 Position in the file or buffer to start reading from.
964 strides : tuple of ints, optional
965 Buffer (`buf`) is interpreted according to these strides (strides
966 define how many bytes each array element, row, column, etc.
967 occupy in memory).
968 formats, names, titles, aligned, byteorder :
969 If `dtype` is ``None``, these arguments are passed to
970 `numpy.format_parser` to construct a dtype. See that function for
971 detailed documentation.
972 copy : bool, optional
973 Whether to copy the input object (True), or to use a reference instead.
974 This option only applies when the input is an ndarray or recarray.
975 Defaults to True.
976
977 Returns
978 -------
979 np.recarray
980 Record array created from the specified object.
981
982 Notes
983 -----
984 If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
985 `obj` is a string, then call the `fromstring` constructor. If `obj` is a
986 list or a tuple, then if the first object is an `~numpy.ndarray`, call
987 `fromarrays`, otherwise call `fromrecords`. If `obj` is a
988 `~numpy.recarray`, then make a copy of the data in the recarray
989 (if ``copy=True``) and use the new formats, names, and titles. If `obj`
990 is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
991 return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
992
993 Examples
994 --------
995 >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
996 >>> a
997 array([[1, 2, 3],
998 [4, 5, 6],
999 [7, 8, 9]])
1000
1001 >>> np.rec.array(a)
1002 rec.array([[1, 2, 3],
1003 [4, 5, 6],
1004 [7, 8, 9]],
1005 dtype=int64)
1006
1007 >>> b = [(1, 1), (2, 4), (3, 9)]
1008 >>> c = np.rec.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
1009 >>> c
1010 rec.array([(1, 1.), (2, 4.), (3, 9.)],
1011 dtype=[('x', '<i2'), ('y', '<f2')])
1012
1013 >>> c.x
1014 array([1, 2, 3], dtype=int16)
1015
1016 >>> c.y
1017 array([1., 4., 9.], dtype=float16)
1018
1019 >>> r = np.rec.array(['abc','def'], names=['col1','col2'])
1020 >>> print(r.col1)
1021 abc
1022
1023 >>> r.col1
1024 array('abc', dtype='<U3')
1025
1026 >>> r.col2
1027 array('def', dtype='<U3')
1028 """
1029
1030 if ((isinstance(obj, (type(None), str)) or hasattr(obj, 'readinto')) and
1031 formats is None and dtype is None):
1032 raise ValueError("Must define formats (or dtype) if object is "
1033 "None, string, or an open file")
1034
1035 kwds = {}
1036 if dtype is not None:
1037 dtype = sb.dtype(dtype)
1038 elif formats is not None:
1039 dtype = format_parser(formats, names, titles,
1040 aligned, byteorder).dtype
1041 else:
1042 kwds = {'formats': formats,
1043 'names': names,
1044 'titles': titles,
1045 'aligned': aligned,
1046 'byteorder': byteorder
1047 }
1048
1049 if obj is None:
1050 if shape is None:
1051 raise ValueError("Must define a shape if obj is None")
1052 return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
1053
1054 elif isinstance(obj, bytes):
1055 return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
1056
1057 elif isinstance(obj, (list, tuple)):
1058 if isinstance(obj[0], (tuple, list)):
1059 return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
1060 else:
1061 return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
1062
1063 elif isinstance(obj, recarray):
1064 if dtype is not None and (obj.dtype != dtype):
1065 new = obj.view(dtype)
1066 else:
1067 new = obj
1068 if copy:
1069 new = new.copy()
1070 return new
1071
1072 elif hasattr(obj, 'readinto'):
1073 return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
1074
1075 elif isinstance(obj, ndarray):
1076 if dtype is not None and (obj.dtype != dtype):
1077 new = obj.view(dtype)
1078 else:
1079 new = obj
1080 if copy:
1081 new = new.copy()
1082 return new.view(recarray)
1083
1084 else:
1085 interface = getattr(obj, "__array_interface__", None)
1086 if interface is None or not isinstance(interface, dict):
1087 raise ValueError("Unknown input type")
1088 obj = sb.array(obj)
1089 if dtype is not None and (obj.dtype != dtype):
1090 obj = obj.view(dtype)
1091 return obj.view(recarray)