Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/numpy/_core/records.py: 14%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2This module contains a set of functions for record arrays.
3"""
4import os
5import warnings
6from collections import Counter
7from contextlib import nullcontext
9from numpy._utils import set_module
11from . import numeric as sb, numerictypes as nt
12from .arrayprint import _get_legacy_print_mode
14# All of the functions allow formats to be a dtype
15__all__ = [
16 'record', 'recarray', 'format_parser', 'fromarrays', 'fromrecords',
17 'fromstring', 'fromfile', 'array', 'find_duplicate',
18]
21ndarray = sb.ndarray
23_byteorderconv = {'b': '>',
24 'l': '<',
25 'n': '=',
26 'B': '>',
27 'L': '<',
28 'N': '=',
29 'S': 's',
30 's': 's',
31 '>': '>',
32 '<': '<',
33 '=': '=',
34 '|': '|',
35 'I': '|',
36 'i': '|'}
38# formats regular expression
39# allows multidimensional spec with a tuple syntax in front
40# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
41# are equally allowed
43numfmt = nt.sctypeDict
46@set_module('numpy.rec')
47def find_duplicate(list):
48 """Find duplication in a list, return a list of duplicated elements"""
49 return [
50 item
51 for item, counts in Counter(list).items()
52 if counts > 1
53 ]
56@set_module('numpy.rec')
57class format_parser:
58 """
59 Class to convert formats, names, titles description to a dtype.
61 After constructing the format_parser object, the dtype attribute is
62 the converted data-type:
63 ``dtype = format_parser(formats, names, titles).dtype``
65 Attributes
66 ----------
67 dtype : dtype
68 The converted data-type.
70 Parameters
71 ----------
72 formats : str or list of str
73 The format description, either specified as a string with
74 comma-separated format descriptions in the form ``'f8, i4, S5'``, or
75 a list of format description strings in the form
76 ``['f8', 'i4', 'S5']``.
77 names : str or list/tuple of str
78 The field names, either specified as a comma-separated string in the
79 form ``'col1, col2, col3'``, or as a list or tuple of strings in the
80 form ``['col1', 'col2', 'col3']``.
81 An empty list can be used, in that case default field names
82 ('f0', 'f1', ...) are used.
83 titles : sequence
84 Sequence of title strings. An empty list can be used to leave titles
85 out.
86 aligned : bool, optional
87 If True, align the fields by padding as the C-compiler would.
88 Default is False.
89 byteorder : str, optional
90 If specified, all the fields will be changed to the
91 provided byte-order. Otherwise, the default byte-order is
92 used. For all available string specifiers, see `dtype.newbyteorder`.
94 See Also
95 --------
96 numpy.dtype, numpy.typename
98 Examples
99 --------
100 >>> import numpy as np
101 >>> np.rec.format_parser(['<f8', '<i4'], ['col1', 'col2'],
102 ... ['T1', 'T2']).dtype
103 dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4')])
105 `names` and/or `titles` can be empty lists. If `titles` is an empty list,
106 titles will simply not appear. If `names` is empty, default field names
107 will be used.
109 >>> np.rec.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
110 ... []).dtype
111 dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
112 >>> np.rec.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
113 dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
115 """
117 def __init__(self, formats, names, titles, aligned=False, byteorder=None):
118 self._parseFormats(formats, aligned)
119 self._setfieldnames(names, titles)
120 self._createdtype(byteorder)
122 def _parseFormats(self, formats, aligned=False):
123 """ Parse the field formats """
125 if formats is None:
126 raise ValueError("Need formats argument")
127 if isinstance(formats, list):
128 dtype = sb.dtype(
129 [
130 (f'f{i}', format_)
131 for i, format_ in enumerate(formats)
132 ],
133 aligned,
134 )
135 else:
136 dtype = sb.dtype(formats, aligned)
137 fields = dtype.fields
138 if fields is None:
139 dtype = sb.dtype([('f1', dtype)], aligned)
140 fields = dtype.fields
141 keys = dtype.names
142 self._f_formats = [fields[key][0] for key in keys]
143 self._offsets = [fields[key][1] for key in keys]
144 self._nfields = len(keys)
146 def _setfieldnames(self, names, titles):
147 """convert input field names into a list and assign to the _names
148 attribute """
150 if names:
151 if type(names) in [list, tuple]:
152 pass
153 elif isinstance(names, str):
154 names = names.split(',')
155 else:
156 raise NameError(f"illegal input names {repr(names)}")
158 self._names = [n.strip() for n in names[:self._nfields]]
159 else:
160 self._names = []
162 # if the names are not specified, they will be assigned as
163 # "f0, f1, f2,..."
164 # if not enough names are specified, they will be assigned as "f[n],
165 # f[n+1],..." etc. where n is the number of specified names..."
166 self._names += ['f%d' % i for i in range(len(self._names),
167 self._nfields)]
168 # check for redundant names
169 _dup = find_duplicate(self._names)
170 if _dup:
171 raise ValueError(f"Duplicate field names: {_dup}")
173 if titles:
174 self._titles = [n.strip() for n in titles[:self._nfields]]
175 else:
176 self._titles = []
177 titles = []
179 if self._nfields > len(titles):
180 self._titles += [None] * (self._nfields - len(titles))
182 def _createdtype(self, byteorder):
183 dtype = sb.dtype({
184 'names': self._names,
185 'formats': self._f_formats,
186 'offsets': self._offsets,
187 'titles': self._titles,
188 })
189 if byteorder is not None:
190 byteorder = _byteorderconv[byteorder[0]]
191 dtype = dtype.newbyteorder(byteorder)
193 self.dtype = dtype
196class record(nt.void):
197 """A data-type scalar that allows field access as attribute lookup.
198 """
200 # manually set name and module so that this class's type shows up
201 # as numpy.record when printed
202 __name__ = 'record'
203 __module__ = 'numpy'
205 def __repr__(self):
206 if _get_legacy_print_mode() <= 113:
207 return self.__str__()
208 return super().__repr__()
210 def __str__(self):
211 if _get_legacy_print_mode() <= 113:
212 return str(self.item())
213 return super().__str__()
215 def __getattribute__(self, attr):
216 if attr in ('setfield', 'getfield', 'dtype'):
217 return nt.void.__getattribute__(self, attr)
218 try:
219 return nt.void.__getattribute__(self, attr)
220 except AttributeError:
221 pass
222 fielddict = nt.void.__getattribute__(self, 'dtype').fields
223 res = fielddict.get(attr, None)
224 if res:
225 obj = self.getfield(*res[:2])
226 # if it has fields return a record,
227 # otherwise return the object
228 try:
229 dt = obj.dtype
230 except AttributeError:
231 # happens if field is Object type
232 return obj
233 if dt.names is not None:
234 return obj.view((self.__class__, obj.dtype))
235 return obj
236 else:
237 raise AttributeError(f"'record' object has no attribute '{attr}'")
239 def __setattr__(self, attr, val):
240 if attr in ('setfield', 'getfield', 'dtype'):
241 raise AttributeError(f"Cannot set '{attr}' attribute")
242 fielddict = nt.void.__getattribute__(self, 'dtype').fields
243 res = fielddict.get(attr, None)
244 if res:
245 return self.setfield(val, *res[:2])
246 elif getattr(self, attr, None):
247 return nt.void.__setattr__(self, attr, val)
248 else:
249 raise AttributeError(f"'record' object has no attribute '{attr}'")
251 def __getitem__(self, indx):
252 obj = nt.void.__getitem__(self, indx)
254 # copy behavior of record.__getattribute__,
255 if isinstance(obj, nt.void) and obj.dtype.names is not None:
256 return obj.view((self.__class__, obj.dtype))
257 else:
258 # return a single element
259 return obj
261 def pprint(self):
262 """Pretty-print all fields."""
263 # pretty-print all fields
264 names = self.dtype.names
265 maxlen = max(len(name) for name in names)
266 fmt = '%% %ds: %%s' % maxlen
267 rows = [fmt % (name, getattr(self, name)) for name in names]
268 return "\n".join(rows)
270# The recarray is almost identical to a standard array (which supports
271# named fields already) The biggest difference is that it can use
272# attribute-lookup to find the fields and it is constructed using
273# a record.
275# If byteorder is given it forces a particular byteorder on all
276# the fields (and any subfields)
279@set_module("numpy.rec")
280class recarray(ndarray):
281 """Construct an ndarray that allows field access using attributes.
283 Arrays may have a data-types containing fields, analogous
284 to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
285 where each entry in the array is a pair of ``(int, float)``. Normally,
286 these attributes are accessed using dictionary lookups such as ``arr['x']``
287 and ``arr['y']``. Record arrays allow the fields to be accessed as members
288 of the array, using ``arr.x`` and ``arr.y``.
290 Parameters
291 ----------
292 shape : tuple
293 Shape of output array.
294 dtype : data-type, optional
295 The desired data-type. By default, the data-type is determined
296 from `formats`, `names`, `titles`, `aligned` and `byteorder`.
297 formats : list of data-types, optional
298 A list containing the data-types for the different columns, e.g.
299 ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
300 convention of using types directly, i.e. ``(int, float, int)``.
301 Note that `formats` must be a list, not a tuple.
302 Given that `formats` is somewhat limited, we recommend specifying
303 `dtype` instead.
304 names : tuple of str, optional
305 The name of each column, e.g. ``('x', 'y', 'z')``.
306 buf : buffer, optional
307 By default, a new array is created of the given shape and data-type.
308 If `buf` is specified and is an object exposing the buffer interface,
309 the array will use the memory from the existing buffer. In this case,
310 the `offset` and `strides` keywords are available.
312 Other Parameters
313 ----------------
314 titles : tuple of str, optional
315 Aliases for column names. For example, if `names` were
316 ``('x', 'y', 'z')`` and `titles` is
317 ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
318 ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
319 byteorder : {'<', '>', '='}, optional
320 Byte-order for all fields.
321 aligned : bool, optional
322 Align the fields in memory as the C-compiler would.
323 strides : tuple of ints, optional
324 Buffer (`buf`) is interpreted according to these strides (strides
325 define how many bytes each array element, row, column, etc.
326 occupy in memory).
327 offset : int, optional
328 Start reading buffer (`buf`) from this offset onwards.
329 order : {'C', 'F'}, optional
330 Row-major (C-style) or column-major (Fortran-style) order.
332 Returns
333 -------
334 rec : recarray
335 Empty array of the given shape and type.
337 See Also
338 --------
339 numpy.rec.fromrecords : Construct a record array from data.
340 numpy.record : fundamental data-type for `recarray`.
341 numpy.rec.format_parser : determine data-type from formats, names, titles.
343 Notes
344 -----
345 This constructor can be compared to ``empty``: it creates a new record
346 array but does not fill it with data. To create a record array from data,
347 use one of the following methods:
349 1. Create a standard ndarray and convert it to a record array,
350 using ``arr.view(np.recarray)``
351 2. Use the `buf` keyword.
352 3. Use `np.rec.fromrecords`.
354 Examples
355 --------
356 Create an array with two fields, ``x`` and ``y``:
358 >>> import numpy as np
359 >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
360 >>> x
361 array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
363 >>> x['x']
364 array([1., 3.])
366 View the array as a record array:
368 >>> x = x.view(np.recarray)
370 >>> x.x
371 array([1., 3.])
373 >>> x.y
374 array([2, 4])
376 Create a new, empty record array:
378 >>> np.recarray((2,),
379 ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
380 rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
381 (3471280, 1.2134086255804012e-316, 0)],
382 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
384 """
386 def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
387 formats=None, names=None, titles=None,
388 byteorder=None, aligned=False, order='C'):
390 if dtype is not None:
391 descr = sb.dtype(dtype)
392 else:
393 descr = format_parser(
394 formats, names, titles, aligned, byteorder
395 ).dtype
397 if buf is None:
398 self = ndarray.__new__(
399 subtype, shape, (record, descr), order=order
400 )
401 else:
402 self = ndarray.__new__(
403 subtype, shape, (record, descr), buffer=buf,
404 offset=offset, strides=strides, order=order
405 )
406 return self
408 def __array_finalize__(self, obj):
409 if self.dtype.type is not record and self.dtype.names is not None:
410 # if self.dtype is not np.record, invoke __setattr__ which will
411 # convert it to a record if it is a void dtype.
412 self.dtype = self.dtype
414 def __getattribute__(self, attr):
415 # See if ndarray has this attr, and return it if so. (note that this
416 # means a field with the same name as an ndarray attr cannot be
417 # accessed by attribute).
418 try:
419 return object.__getattribute__(self, attr)
420 except AttributeError: # attr must be a fieldname
421 pass
423 # look for a field with this name
424 fielddict = ndarray.__getattribute__(self, 'dtype').fields
425 try:
426 res = fielddict[attr][:2]
427 except (TypeError, KeyError) as e:
428 raise AttributeError(f"recarray has no attribute {attr}") from e
429 obj = self.getfield(*res)
431 # At this point obj will always be a recarray, since (see
432 # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
433 # non-structured, convert it to an ndarray. Then if obj is structured
434 # with void type convert it to the same dtype.type (eg to preserve
435 # numpy.record type if present), since nested structured fields do not
436 # inherit type. Don't do this for non-void structures though.
437 if obj.dtype.names is not None:
438 if issubclass(obj.dtype.type, nt.void):
439 return obj.view(dtype=(self.dtype.type, obj.dtype))
440 return obj
441 else:
442 return obj.view(ndarray)
444 # Save the dictionary.
445 # If the attr is a field name and not in the saved dictionary
446 # Undo any "setting" of the attribute and do a setfield
447 # Thus, you can't create attributes on-the-fly that are field names.
448 def __setattr__(self, attr, val):
450 # Automatically convert (void) structured types to records
451 # (but not non-void structures, subarrays, or non-structured voids)
452 if (
453 attr == 'dtype' and
454 issubclass(val.type, nt.void) and
455 val.names is not None
456 ):
457 val = sb.dtype((record, val))
459 newattr = attr not in self.__dict__
460 try:
461 ret = object.__setattr__(self, attr, val)
462 except Exception:
463 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
464 if attr not in fielddict:
465 raise
466 else:
467 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
468 if attr not in fielddict:
469 return ret
470 if newattr:
471 # We just added this one or this setattr worked on an
472 # internal attribute.
473 try:
474 object.__delattr__(self, attr)
475 except Exception:
476 return ret
477 try:
478 res = fielddict[attr][:2]
479 except (TypeError, KeyError) as e:
480 raise AttributeError(
481 f"record array has no attribute {attr}"
482 ) from e
483 return self.setfield(val, *res)
485 def __getitem__(self, indx):
486 obj = super().__getitem__(indx)
488 # copy behavior of getattr, except that here
489 # we might also be returning a single element
490 if isinstance(obj, ndarray):
491 if obj.dtype.names is not None:
492 obj = obj.view(type(self))
493 if issubclass(obj.dtype.type, nt.void):
494 return obj.view(dtype=(self.dtype.type, obj.dtype))
495 return obj
496 else:
497 return obj.view(type=ndarray)
498 else:
499 # return a single element
500 return obj
502 def __repr__(self):
504 repr_dtype = self.dtype
505 if (
506 self.dtype.type is record or
507 not issubclass(self.dtype.type, nt.void)
508 ):
509 # If this is a full record array (has numpy.record dtype),
510 # or if it has a scalar (non-void) dtype with no records,
511 # represent it using the rec.array function. Since rec.array
512 # converts dtype to a numpy.record for us, convert back
513 # to non-record before printing
514 if repr_dtype.type is record:
515 repr_dtype = sb.dtype((nt.void, repr_dtype))
516 prefix = "rec.array("
517 fmt = 'rec.array(%s,%sdtype=%s)'
518 else:
519 # otherwise represent it using np.array plus a view
520 # This should only happen if the user is playing
521 # strange games with dtypes.
522 prefix = "array("
523 fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
525 # get data/shape string. logic taken from numeric.array_repr
526 if self.size > 0 or self.shape == (0,):
527 lst = sb.array2string(
528 self, separator=', ', prefix=prefix, suffix=',')
529 else:
530 # show zero-length shape unless it is (0,)
531 lst = f"[], shape={repr(self.shape)}"
533 lf = '\n' + ' ' * len(prefix)
534 if _get_legacy_print_mode() <= 113:
535 lf = ' ' + lf # trailing space
536 return fmt % (lst, lf, repr_dtype)
538 def field(self, attr, val=None):
539 if isinstance(attr, int):
540 names = ndarray.__getattribute__(self, 'dtype').names
541 attr = names[attr]
543 fielddict = ndarray.__getattribute__(self, 'dtype').fields
545 res = fielddict[attr][:2]
547 if val is None:
548 obj = self.getfield(*res)
549 if obj.dtype.names is not None:
550 return obj
551 return obj.view(ndarray)
552 else:
553 return self.setfield(val, *res)
556def _deprecate_shape_0_as_None(shape):
557 if shape == 0:
558 warnings.warn(
559 "Passing `shape=0` to have the shape be inferred is deprecated, "
560 "and in future will be equivalent to `shape=(0,)`. To infer "
561 "the shape and suppress this warning, pass `shape=None` instead.",
562 FutureWarning, stacklevel=3)
563 return None
564 else:
565 return shape
568@set_module("numpy.rec")
569def fromarrays(arrayList, dtype=None, shape=None, formats=None,
570 names=None, titles=None, aligned=False, byteorder=None):
571 """Create a record array from a (flat) list of arrays
573 Parameters
574 ----------
575 arrayList : list or tuple
576 List of array-like objects (such as lists, tuples,
577 and ndarrays).
578 dtype : data-type, optional
579 valid dtype for all arrays
580 shape : int or tuple of ints, optional
581 Shape of the resulting array. If not provided, inferred from
582 ``arrayList[0]``.
583 formats, names, titles, aligned, byteorder :
584 If `dtype` is ``None``, these arguments are passed to
585 `numpy.rec.format_parser` to construct a dtype. See that function for
586 detailed documentation.
588 Returns
589 -------
590 np.recarray
591 Record array consisting of given arrayList columns.
593 Examples
594 --------
595 >>> x1=np.array([1,2,3,4])
596 >>> x2=np.array(['a','dd','xyz','12'])
597 >>> x3=np.array([1.1,2,3,4])
598 >>> r = np.rec.fromarrays([x1,x2,x3],names='a,b,c')
599 >>> print(r[1])
600 (2, 'dd', 2.0) # may vary
601 >>> x1[1]=34
602 >>> r.a
603 array([1, 2, 3, 4])
605 >>> x1 = np.array([1, 2, 3, 4])
606 >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
607 >>> x3 = np.array([1.1, 2, 3,4])
608 >>> r = np.rec.fromarrays(
609 ... [x1, x2, x3],
610 ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
611 >>> r
612 rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
613 (4, b'12', 4. )],
614 dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
615 """
617 arrayList = [sb.asarray(x) for x in arrayList]
619 # NumPy 1.19.0, 2020-01-01
620 shape = _deprecate_shape_0_as_None(shape)
622 if shape is None:
623 shape = arrayList[0].shape
624 elif isinstance(shape, int):
625 shape = (shape,)
627 if formats is None and dtype is None:
628 # go through each object in the list to see if it is an ndarray
629 # and determine the formats.
630 formats = [obj.dtype for obj in arrayList]
632 if dtype is not None:
633 descr = sb.dtype(dtype)
634 else:
635 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
636 _names = descr.names
638 # Determine shape from data-type.
639 if len(descr) != len(arrayList):
640 raise ValueError("mismatch between the number of fields "
641 "and the number of arrays")
643 d0 = descr[0].shape
644 nn = len(d0)
645 if nn > 0:
646 shape = shape[:-nn]
648 _array = recarray(shape, descr)
650 # populate the record array (makes a copy)
651 for k, obj in enumerate(arrayList):
652 nn = descr[k].ndim
653 testshape = obj.shape[:obj.ndim - nn]
654 name = _names[k]
655 if testshape != shape:
656 raise ValueError(f'array-shape mismatch in array {k} ("{name}")')
658 _array[name] = obj
660 return _array
663@set_module("numpy.rec")
664def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
665 titles=None, aligned=False, byteorder=None):
666 """Create a recarray from a list of records in text form.
668 Parameters
669 ----------
670 recList : sequence
671 data in the same field may be heterogeneous - they will be promoted
672 to the highest data type.
673 dtype : data-type, optional
674 valid dtype for all arrays
675 shape : int or tuple of ints, optional
676 shape of each array.
677 formats, names, titles, aligned, byteorder :
678 If `dtype` is ``None``, these arguments are passed to
679 `numpy.format_parser` to construct a dtype. See that function for
680 detailed documentation.
682 If both `formats` and `dtype` are None, then this will auto-detect
683 formats. Use list of tuples rather than list of lists for faster
684 processing.
686 Returns
687 -------
688 np.recarray
689 record array consisting of given recList rows.
691 Examples
692 --------
693 >>> r=np.rec.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
694 ... names='col1,col2,col3')
695 >>> print(r[0])
696 (456, 'dbe', 1.2)
697 >>> r.col1
698 array([456, 2])
699 >>> r.col2
700 array(['dbe', 'de'], dtype='<U3')
701 >>> import pickle
702 >>> pickle.loads(pickle.dumps(r))
703 rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
704 dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
705 """
707 if formats is None and dtype is None: # slower
708 obj = sb.array(recList, dtype=object)
709 arrlist = [
710 sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])
711 ]
712 return fromarrays(arrlist, formats=formats, shape=shape, names=names,
713 titles=titles, aligned=aligned, byteorder=byteorder)
715 if dtype is not None:
716 descr = sb.dtype((record, dtype))
717 else:
718 descr = format_parser(
719 formats, names, titles, aligned, byteorder
720 ).dtype
722 try:
723 retval = sb.array(recList, dtype=descr)
724 except (TypeError, ValueError):
725 # NumPy 1.19.0, 2020-01-01
726 shape = _deprecate_shape_0_as_None(shape)
727 if shape is None:
728 shape = len(recList)
729 if isinstance(shape, int):
730 shape = (shape,)
731 if len(shape) > 1:
732 raise ValueError("Can only deal with 1-d array.")
733 _array = recarray(shape, descr)
734 for k in range(_array.size):
735 _array[k] = tuple(recList[k])
736 # list of lists instead of list of tuples ?
737 # 2018-02-07, 1.14.1
738 warnings.warn(
739 "fromrecords expected a list of tuples, may have received a list "
740 "of lists instead. In the future that will raise an error",
741 FutureWarning, stacklevel=2)
742 return _array
743 else:
744 if shape is not None and retval.shape != shape:
745 retval.shape = shape
747 res = retval.view(recarray)
749 return res
752@set_module("numpy.rec")
753def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
754 names=None, titles=None, aligned=False, byteorder=None):
755 r"""Create a record array from binary data
757 Note that despite the name of this function it does not accept `str`
758 instances.
760 Parameters
761 ----------
762 datastring : bytes-like
763 Buffer of binary data
764 dtype : data-type, optional
765 Valid dtype for all arrays
766 shape : int or tuple of ints, optional
767 Shape of each array.
768 offset : int, optional
769 Position in the buffer to start reading from.
770 formats, names, titles, aligned, byteorder :
771 If `dtype` is ``None``, these arguments are passed to
772 `numpy.format_parser` to construct a dtype. See that function for
773 detailed documentation.
776 Returns
777 -------
778 np.recarray
779 Record array view into the data in datastring. This will be readonly
780 if `datastring` is readonly.
782 See Also
783 --------
784 numpy.frombuffer
786 Examples
787 --------
788 >>> a = b'\x01\x02\x03abc'
789 >>> np.rec.fromstring(a, dtype='u1,u1,u1,S3')
790 rec.array([(1, 2, 3, b'abc')],
791 dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
793 >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
794 ... ('GradeLevel', np.int32)]
795 >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
796 ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
797 >>> np.rec.fromstring(grades_array.tobytes(), dtype=grades_dtype)
798 rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
799 dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
801 >>> s = '\x01\x02\x03abc'
802 >>> np.rec.fromstring(s, dtype='u1,u1,u1,S3')
803 Traceback (most recent call last):
804 ...
805 TypeError: a bytes-like object is required, not 'str'
806 """
808 if dtype is None and formats is None:
809 raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
811 if dtype is not None:
812 descr = sb.dtype(dtype)
813 else:
814 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
816 itemsize = descr.itemsize
818 # NumPy 1.19.0, 2020-01-01
819 shape = _deprecate_shape_0_as_None(shape)
821 if shape in (None, -1):
822 shape = (len(datastring) - offset) // itemsize
824 _array = recarray(shape, descr, buf=datastring, offset=offset)
825 return _array
827def get_remaining_size(fd):
828 pos = fd.tell()
829 try:
830 fd.seek(0, 2)
831 return fd.tell() - pos
832 finally:
833 fd.seek(pos, 0)
836@set_module("numpy.rec")
837def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
838 names=None, titles=None, aligned=False, byteorder=None):
839 """Create an array from binary file data
841 Parameters
842 ----------
843 fd : str or file type
844 If file is a string or a path-like object then that file is opened,
845 else it is assumed to be a file object. The file object must
846 support random access (i.e. it must have tell and seek methods).
847 dtype : data-type, optional
848 valid dtype for all arrays
849 shape : int or tuple of ints, optional
850 shape of each array.
851 offset : int, optional
852 Position in the file to start reading from.
853 formats, names, titles, aligned, byteorder :
854 If `dtype` is ``None``, these arguments are passed to
855 `numpy.format_parser` to construct a dtype. See that function for
856 detailed documentation
858 Returns
859 -------
860 np.recarray
861 record array consisting of data enclosed in file.
863 Examples
864 --------
865 >>> from tempfile import TemporaryFile
866 >>> a = np.empty(10,dtype='f8,i4,a5')
867 >>> a[5] = (0.5,10,'abcde')
868 >>>
869 >>> fd=TemporaryFile()
870 >>> a = a.view(a.dtype.newbyteorder('<'))
871 >>> a.tofile(fd)
872 >>>
873 >>> _ = fd.seek(0)
874 >>> r=np.rec.fromfile(fd, formats='f8,i4,a5', shape=10,
875 ... byteorder='<')
876 >>> print(r[5])
877 (0.5, 10, b'abcde')
878 >>> r.shape
879 (10,)
880 """
882 if dtype is None and formats is None:
883 raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
885 # NumPy 1.19.0, 2020-01-01
886 shape = _deprecate_shape_0_as_None(shape)
888 if shape is None:
889 shape = (-1,)
890 elif isinstance(shape, int):
891 shape = (shape,)
893 if hasattr(fd, 'readinto'):
894 # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase
895 # interface. Example of fd: gzip, BytesIO, BufferedReader
896 # file already opened
897 ctx = nullcontext(fd)
898 else:
899 # open file
900 ctx = open(os.fspath(fd), 'rb')
902 with ctx as fd:
903 if offset > 0:
904 fd.seek(offset, 1)
905 size = get_remaining_size(fd)
907 if dtype is not None:
908 descr = sb.dtype(dtype)
909 else:
910 descr = format_parser(
911 formats, names, titles, aligned, byteorder
912 ).dtype
914 itemsize = descr.itemsize
916 shapeprod = sb.array(shape).prod(dtype=nt.intp)
917 shapesize = shapeprod * itemsize
918 if shapesize < 0:
919 shape = list(shape)
920 shape[shape.index(-1)] = size // -shapesize
921 shape = tuple(shape)
922 shapeprod = sb.array(shape).prod(dtype=nt.intp)
924 nbytes = shapeprod * itemsize
926 if nbytes > size:
927 raise ValueError(
928 "Not enough bytes left in file for specified "
929 "shape and type."
930 )
932 # create the array
933 _array = recarray(shape, descr)
934 nbytesread = fd.readinto(_array.data)
935 if nbytesread != nbytes:
936 raise OSError("Didn't read as many bytes as expected")
938 return _array
941@set_module("numpy.rec")
942def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
943 names=None, titles=None, aligned=False, byteorder=None, copy=True):
944 """
945 Construct a record array from a wide-variety of objects.
947 A general-purpose record array constructor that dispatches to the
948 appropriate `recarray` creation function based on the inputs (see Notes).
950 Parameters
951 ----------
952 obj : any
953 Input object. See Notes for details on how various input types are
954 treated.
955 dtype : data-type, optional
956 Valid dtype for array.
957 shape : int or tuple of ints, optional
958 Shape of each array.
959 offset : int, optional
960 Position in the file or buffer to start reading from.
961 strides : tuple of ints, optional
962 Buffer (`buf`) is interpreted according to these strides (strides
963 define how many bytes each array element, row, column, etc.
964 occupy in memory).
965 formats, names, titles, aligned, byteorder :
966 If `dtype` is ``None``, these arguments are passed to
967 `numpy.format_parser` to construct a dtype. See that function for
968 detailed documentation.
969 copy : bool, optional
970 Whether to copy the input object (True), or to use a reference instead.
971 This option only applies when the input is an ndarray or recarray.
972 Defaults to True.
974 Returns
975 -------
976 np.recarray
977 Record array created from the specified object.
979 Notes
980 -----
981 If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
982 `obj` is a string, then call the `fromstring` constructor. If `obj` is a
983 list or a tuple, then if the first object is an `~numpy.ndarray`, call
984 `fromarrays`, otherwise call `fromrecords`. If `obj` is a
985 `~numpy.recarray`, then make a copy of the data in the recarray
986 (if ``copy=True``) and use the new formats, names, and titles. If `obj`
987 is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
988 return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
990 Examples
991 --------
992 >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
993 >>> a
994 array([[1, 2, 3],
995 [4, 5, 6],
996 [7, 8, 9]])
998 >>> np.rec.array(a)
999 rec.array([[1, 2, 3],
1000 [4, 5, 6],
1001 [7, 8, 9]],
1002 dtype=int64)
1004 >>> b = [(1, 1), (2, 4), (3, 9)]
1005 >>> c = np.rec.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
1006 >>> c
1007 rec.array([(1, 1.), (2, 4.), (3, 9.)],
1008 dtype=[('x', '<i2'), ('y', '<f2')])
1010 >>> c.x
1011 array([1, 2, 3], dtype=int16)
1013 >>> c.y
1014 array([1., 4., 9.], dtype=float16)
1016 >>> r = np.rec.array(['abc','def'], names=['col1','col2'])
1017 >>> print(r.col1)
1018 abc
1020 >>> r.col1
1021 array('abc', dtype='<U3')
1023 >>> r.col2
1024 array('def', dtype='<U3')
1025 """
1027 if ((isinstance(obj, (type(None), str)) or hasattr(obj, 'readinto')) and
1028 formats is None and dtype is None):
1029 raise ValueError("Must define formats (or dtype) if object is "
1030 "None, string, or an open file")
1032 kwds = {}
1033 if dtype is not None:
1034 dtype = sb.dtype(dtype)
1035 elif formats is not None:
1036 dtype = format_parser(formats, names, titles,
1037 aligned, byteorder).dtype
1038 else:
1039 kwds = {'formats': formats,
1040 'names': names,
1041 'titles': titles,
1042 'aligned': aligned,
1043 'byteorder': byteorder
1044 }
1046 if obj is None:
1047 if shape is None:
1048 raise ValueError("Must define a shape if obj is None")
1049 return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
1051 elif isinstance(obj, bytes):
1052 return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
1054 elif isinstance(obj, (list, tuple)):
1055 if isinstance(obj[0], (tuple, list)):
1056 return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
1057 else:
1058 return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
1060 elif isinstance(obj, recarray):
1061 if dtype is not None and (obj.dtype != dtype):
1062 new = obj.view(dtype)
1063 else:
1064 new = obj
1065 if copy:
1066 new = new.copy()
1067 return new
1069 elif hasattr(obj, 'readinto'):
1070 return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
1072 elif isinstance(obj, ndarray):
1073 if dtype is not None and (obj.dtype != dtype):
1074 new = obj.view(dtype)
1075 else:
1076 new = obj
1077 if copy:
1078 new = new.copy()
1079 return new.view(recarray)
1081 else:
1082 interface = getattr(obj, "__array_interface__", None)
1083 if interface is None or not isinstance(interface, dict):
1084 raise ValueError("Unknown input type")
1085 obj = sb.array(obj)
1086 if dtype is not None and (obj.dtype != dtype):
1087 obj = obj.view(dtype)
1088 return obj.view(recarray)