Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/_core/records.py: 14%
360 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
1"""
2This module contains a set of functions for record arrays.
3"""
4import os
5import warnings
6from collections import Counter
7from contextlib import nullcontext
9from .._utils import set_module
10from . import numeric as sb
11from . import numerictypes as nt
12from .arrayprint import _get_legacy_print_mode
14# All of the functions allow formats to be a dtype
15__all__ = [
16 'record', 'recarray', 'format_parser', 'fromarrays', 'fromrecords',
17 'fromstring', 'fromfile', 'array', 'find_duplicate',
18]
21ndarray = sb.ndarray
23_byteorderconv = {'b': '>',
24 'l': '<',
25 'n': '=',
26 'B': '>',
27 'L': '<',
28 'N': '=',
29 'S': 's',
30 's': 's',
31 '>': '>',
32 '<': '<',
33 '=': '=',
34 '|': '|',
35 'I': '|',
36 'i': '|'}
38# formats regular expression
39# allows multidimensional spec with a tuple syntax in front
40# of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
41# are equally allowed
43numfmt = nt.sctypeDict
46@set_module('numpy.rec')
47def find_duplicate(list):
48 """Find duplication in a list, return a list of duplicated elements"""
49 return [
50 item
51 for item, counts in Counter(list).items()
52 if counts > 1
53 ]
56@set_module('numpy.rec')
57class format_parser:
58 """
59 Class to convert formats, names, titles description to a dtype.
61 After constructing the format_parser object, the dtype attribute is
62 the converted data-type:
63 ``dtype = format_parser(formats, names, titles).dtype``
65 Attributes
66 ----------
67 dtype : dtype
68 The converted data-type.
70 Parameters
71 ----------
72 formats : str or list of str
73 The format description, either specified as a string with
74 comma-separated format descriptions in the form ``'f8, i4, S5'``, or
75 a list of format description strings in the form
76 ``['f8', 'i4', 'S5']``.
77 names : str or list/tuple of str
78 The field names, either specified as a comma-separated string in the
79 form ``'col1, col2, col3'``, or as a list or tuple of strings in the
80 form ``['col1', 'col2', 'col3']``.
81 An empty list can be used, in that case default field names
82 ('f0', 'f1', ...) are used.
83 titles : sequence
84 Sequence of title strings. An empty list can be used to leave titles
85 out.
86 aligned : bool, optional
87 If True, align the fields by padding as the C-compiler would.
88 Default is False.
89 byteorder : str, optional
90 If specified, all the fields will be changed to the
91 provided byte-order. Otherwise, the default byte-order is
92 used. For all available string specifiers, see `dtype.newbyteorder`.
94 See Also
95 --------
96 numpy.dtype, numpy.typename
98 Examples
99 --------
100 >>> np.rec.format_parser(['<f8', '<i4'], ['col1', 'col2'],
101 ... ['T1', 'T2']).dtype
102 dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4')])
104 `names` and/or `titles` can be empty lists. If `titles` is an empty list,
105 titles will simply not appear. If `names` is empty, default field names
106 will be used.
108 >>> np.rec.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
109 ... []).dtype
110 dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
111 >>> np.rec.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
112 dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
114 """
116 def __init__(self, formats, names, titles, aligned=False, byteorder=None):
117 self._parseFormats(formats, aligned)
118 self._setfieldnames(names, titles)
119 self._createdtype(byteorder)
121 def _parseFormats(self, formats, aligned=False):
122 """ Parse the field formats """
124 if formats is None:
125 raise ValueError("Need formats argument")
126 if isinstance(formats, list):
127 dtype = sb.dtype(
128 [
129 ('f{}'.format(i), format_)
130 for i, format_ in enumerate(formats)
131 ],
132 aligned,
133 )
134 else:
135 dtype = sb.dtype(formats, aligned)
136 fields = dtype.fields
137 if fields is None:
138 dtype = sb.dtype([('f1', dtype)], aligned)
139 fields = dtype.fields
140 keys = dtype.names
141 self._f_formats = [fields[key][0] for key in keys]
142 self._offsets = [fields[key][1] for key in keys]
143 self._nfields = len(keys)
145 def _setfieldnames(self, names, titles):
146 """convert input field names into a list and assign to the _names
147 attribute """
149 if names:
150 if type(names) in [list, tuple]:
151 pass
152 elif isinstance(names, str):
153 names = names.split(',')
154 else:
155 raise NameError("illegal input names %s" % repr(names))
157 self._names = [n.strip() for n in names[:self._nfields]]
158 else:
159 self._names = []
161 # if the names are not specified, they will be assigned as
162 # "f0, f1, f2,..."
163 # if not enough names are specified, they will be assigned as "f[n],
164 # f[n+1],..." etc. where n is the number of specified names..."
165 self._names += ['f%d' % i for i in range(len(self._names),
166 self._nfields)]
167 # check for redundant names
168 _dup = find_duplicate(self._names)
169 if _dup:
170 raise ValueError("Duplicate field names: %s" % _dup)
172 if titles:
173 self._titles = [n.strip() for n in titles[:self._nfields]]
174 else:
175 self._titles = []
176 titles = []
178 if self._nfields > len(titles):
179 self._titles += [None] * (self._nfields - len(titles))
181 def _createdtype(self, byteorder):
182 dtype = sb.dtype({
183 'names': self._names,
184 'formats': self._f_formats,
185 'offsets': self._offsets,
186 'titles': self._titles,
187 })
188 if byteorder is not None:
189 byteorder = _byteorderconv[byteorder[0]]
190 dtype = dtype.newbyteorder(byteorder)
192 self.dtype = dtype
195class record(nt.void):
196 """A data-type scalar that allows field access as attribute lookup.
197 """
199 # manually set name and module so that this class's type shows up
200 # as numpy.record when printed
201 __name__ = 'record'
202 __module__ = 'numpy'
204 def __repr__(self):
205 if _get_legacy_print_mode() <= 113:
206 return self.__str__()
207 return super().__repr__()
209 def __str__(self):
210 if _get_legacy_print_mode() <= 113:
211 return str(self.item())
212 return super().__str__()
214 def __getattribute__(self, attr):
215 if attr in ('setfield', 'getfield', 'dtype'):
216 return nt.void.__getattribute__(self, attr)
217 try:
218 return nt.void.__getattribute__(self, attr)
219 except AttributeError:
220 pass
221 fielddict = nt.void.__getattribute__(self, 'dtype').fields
222 res = fielddict.get(attr, None)
223 if res:
224 obj = self.getfield(*res[:2])
225 # if it has fields return a record,
226 # otherwise return the object
227 try:
228 dt = obj.dtype
229 except AttributeError:
230 #happens if field is Object type
231 return obj
232 if dt.names is not None:
233 return obj.view((self.__class__, obj.dtype))
234 return obj
235 else:
236 raise AttributeError("'record' object has no "
237 "attribute '%s'" % attr)
239 def __setattr__(self, attr, val):
240 if attr in ('setfield', 'getfield', 'dtype'):
241 raise AttributeError("Cannot set '%s' attribute" % attr)
242 fielddict = nt.void.__getattribute__(self, 'dtype').fields
243 res = fielddict.get(attr, None)
244 if res:
245 return self.setfield(val, *res[:2])
246 else:
247 if getattr(self, attr, None):
248 return nt.void.__setattr__(self, attr, val)
249 else:
250 raise AttributeError("'record' object has no "
251 "attribute '%s'" % attr)
253 def __getitem__(self, indx):
254 obj = nt.void.__getitem__(self, indx)
256 # copy behavior of record.__getattribute__,
257 if isinstance(obj, nt.void) and obj.dtype.names is not None:
258 return obj.view((self.__class__, obj.dtype))
259 else:
260 # return a single element
261 return obj
263 def pprint(self):
264 """Pretty-print all fields."""
265 # pretty-print all fields
266 names = self.dtype.names
267 maxlen = max(len(name) for name in names)
268 fmt = '%% %ds: %%s' % maxlen
269 rows = [fmt % (name, getattr(self, name)) for name in names]
270 return "\n".join(rows)
272# The recarray is almost identical to a standard array (which supports
273# named fields already) The biggest difference is that it can use
274# attribute-lookup to find the fields and it is constructed using
275# a record.
277# If byteorder is given it forces a particular byteorder on all
278# the fields (and any subfields)
281@set_module("numpy.rec")
282class recarray(ndarray):
283 """Construct an ndarray that allows field access using attributes.
285 Arrays may have a data-types containing fields, analogous
286 to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
287 where each entry in the array is a pair of ``(int, float)``. Normally,
288 these attributes are accessed using dictionary lookups such as ``arr['x']``
289 and ``arr['y']``. Record arrays allow the fields to be accessed as members
290 of the array, using ``arr.x`` and ``arr.y``.
292 Parameters
293 ----------
294 shape : tuple
295 Shape of output array.
296 dtype : data-type, optional
297 The desired data-type. By default, the data-type is determined
298 from `formats`, `names`, `titles`, `aligned` and `byteorder`.
299 formats : list of data-types, optional
300 A list containing the data-types for the different columns, e.g.
301 ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
302 convention of using types directly, i.e. ``(int, float, int)``.
303 Note that `formats` must be a list, not a tuple.
304 Given that `formats` is somewhat limited, we recommend specifying
305 `dtype` instead.
306 names : tuple of str, optional
307 The name of each column, e.g. ``('x', 'y', 'z')``.
308 buf : buffer, optional
309 By default, a new array is created of the given shape and data-type.
310 If `buf` is specified and is an object exposing the buffer interface,
311 the array will use the memory from the existing buffer. In this case,
312 the `offset` and `strides` keywords are available.
314 Other Parameters
315 ----------------
316 titles : tuple of str, optional
317 Aliases for column names. For example, if `names` were
318 ``('x', 'y', 'z')`` and `titles` is
319 ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
320 ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
321 byteorder : {'<', '>', '='}, optional
322 Byte-order for all fields.
323 aligned : bool, optional
324 Align the fields in memory as the C-compiler would.
325 strides : tuple of ints, optional
326 Buffer (`buf`) is interpreted according to these strides (strides
327 define how many bytes each array element, row, column, etc.
328 occupy in memory).
329 offset : int, optional
330 Start reading buffer (`buf`) from this offset onwards.
331 order : {'C', 'F'}, optional
332 Row-major (C-style) or column-major (Fortran-style) order.
334 Returns
335 -------
336 rec : recarray
337 Empty array of the given shape and type.
339 See Also
340 --------
341 numpy.rec.fromrecords : Construct a record array from data.
342 numpy.record : fundamental data-type for `recarray`.
343 numpy.rec.format_parser : determine data-type from formats, names, titles.
345 Notes
346 -----
347 This constructor can be compared to ``empty``: it creates a new record
348 array but does not fill it with data. To create a record array from data,
349 use one of the following methods:
351 1. Create a standard ndarray and convert it to a record array,
352 using ``arr.view(np.recarray)``
353 2. Use the `buf` keyword.
354 3. Use `np.rec.fromrecords`.
356 Examples
357 --------
358 Create an array with two fields, ``x`` and ``y``:
360 >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
361 >>> x
362 array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
364 >>> x['x']
365 array([1., 3.])
367 View the array as a record array:
369 >>> x = x.view(np.recarray)
371 >>> x.x
372 array([1., 3.])
374 >>> x.y
375 array([2, 4])
377 Create a new, empty record array:
379 >>> np.recarray((2,),
380 ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
381 rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
382 (3471280, 1.2134086255804012e-316, 0)],
383 dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
385 """
387 def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
388 formats=None, names=None, titles=None,
389 byteorder=None, aligned=False, order='C'):
391 if dtype is not None:
392 descr = sb.dtype(dtype)
393 else:
394 descr = format_parser(
395 formats, names, titles, aligned, byteorder
396 ).dtype
398 if buf is None:
399 self = ndarray.__new__(
400 subtype, shape, (record, descr), order=order
401 )
402 else:
403 self = ndarray.__new__(
404 subtype, shape, (record, descr), buffer=buf,
405 offset=offset, strides=strides, order=order
406 )
407 return self
409 def __array_finalize__(self, obj):
410 if self.dtype.type is not record and self.dtype.names is not None:
411 # if self.dtype is not np.record, invoke __setattr__ which will
412 # convert it to a record if it is a void dtype.
413 self.dtype = self.dtype
415 def __getattribute__(self, attr):
416 # See if ndarray has this attr, and return it if so. (note that this
417 # means a field with the same name as an ndarray attr cannot be
418 # accessed by attribute).
419 try:
420 return object.__getattribute__(self, attr)
421 except AttributeError: # attr must be a fieldname
422 pass
424 # look for a field with this name
425 fielddict = ndarray.__getattribute__(self, 'dtype').fields
426 try:
427 res = fielddict[attr][:2]
428 except (TypeError, KeyError) as e:
429 raise AttributeError("recarray has no attribute %s" % attr) from e
430 obj = self.getfield(*res)
432 # At this point obj will always be a recarray, since (see
433 # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
434 # non-structured, convert it to an ndarray. Then if obj is structured
435 # with void type convert it to the same dtype.type (eg to preserve
436 # numpy.record type if present), since nested structured fields do not
437 # inherit type. Don't do this for non-void structures though.
438 if obj.dtype.names is not None:
439 if issubclass(obj.dtype.type, nt.void):
440 return obj.view(dtype=(self.dtype.type, obj.dtype))
441 return obj
442 else:
443 return obj.view(ndarray)
445 # Save the dictionary.
446 # If the attr is a field name and not in the saved dictionary
447 # Undo any "setting" of the attribute and do a setfield
448 # Thus, you can't create attributes on-the-fly that are field names.
449 def __setattr__(self, attr, val):
451 # Automatically convert (void) structured types to records
452 # (but not non-void structures, subarrays, or non-structured voids)
453 if (
454 attr == 'dtype' and
455 issubclass(val.type, nt.void) and
456 val.names is not None
457 ):
458 val = sb.dtype((record, val))
460 newattr = attr not in self.__dict__
461 try:
462 ret = object.__setattr__(self, attr, val)
463 except Exception:
464 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
465 if attr not in fielddict:
466 raise
467 else:
468 fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
469 if attr not in fielddict:
470 return ret
471 if newattr:
472 # We just added this one or this setattr worked on an
473 # internal attribute.
474 try:
475 object.__delattr__(self, attr)
476 except Exception:
477 return ret
478 try:
479 res = fielddict[attr][:2]
480 except (TypeError, KeyError) as e:
481 raise AttributeError(
482 "record array has no attribute %s" % attr
483 ) from e
484 return self.setfield(val, *res)
486 def __getitem__(self, indx):
487 obj = super().__getitem__(indx)
489 # copy behavior of getattr, except that here
490 # we might also be returning a single element
491 if isinstance(obj, ndarray):
492 if obj.dtype.names is not None:
493 obj = obj.view(type(self))
494 if issubclass(obj.dtype.type, nt.void):
495 return obj.view(dtype=(self.dtype.type, obj.dtype))
496 return obj
497 else:
498 return obj.view(type=ndarray)
499 else:
500 # return a single element
501 return obj
503 def __repr__(self):
505 repr_dtype = self.dtype
506 if (
507 self.dtype.type is record or
508 not issubclass(self.dtype.type, nt.void)
509 ):
510 # If this is a full record array (has numpy.record dtype),
511 # or if it has a scalar (non-void) dtype with no records,
512 # represent it using the rec.array function. Since rec.array
513 # converts dtype to a numpy.record for us, convert back
514 # to non-record before printing
515 if repr_dtype.type is record:
516 repr_dtype = sb.dtype((nt.void, repr_dtype))
517 prefix = "rec.array("
518 fmt = 'rec.array(%s,%sdtype=%s)'
519 else:
520 # otherwise represent it using np.array plus a view
521 # This should only happen if the user is playing
522 # strange games with dtypes.
523 prefix = "array("
524 fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
526 # get data/shape string. logic taken from numeric.array_repr
527 if self.size > 0 or self.shape == (0,):
528 lst = sb.array2string(
529 self, separator=', ', prefix=prefix, suffix=',')
530 else:
531 # show zero-length shape unless it is (0,)
532 lst = "[], shape=%s" % (repr(self.shape),)
534 lf = '\n'+' '*len(prefix)
535 if _get_legacy_print_mode() <= 113:
536 lf = ' ' + lf # trailing space
537 return fmt % (lst, lf, repr_dtype)
539 def field(self, attr, val=None):
540 if isinstance(attr, int):
541 names = ndarray.__getattribute__(self, 'dtype').names
542 attr = names[attr]
544 fielddict = ndarray.__getattribute__(self, 'dtype').fields
546 res = fielddict[attr][:2]
548 if val is None:
549 obj = self.getfield(*res)
550 if obj.dtype.names is not None:
551 return obj
552 return obj.view(ndarray)
553 else:
554 return self.setfield(val, *res)
557def _deprecate_shape_0_as_None(shape):
558 if shape == 0:
559 warnings.warn(
560 "Passing `shape=0` to have the shape be inferred is deprecated, "
561 "and in future will be equivalent to `shape=(0,)`. To infer "
562 "the shape and suppress this warning, pass `shape=None` instead.",
563 FutureWarning, stacklevel=3)
564 return None
565 else:
566 return shape
569@set_module("numpy.rec")
570def fromarrays(arrayList, dtype=None, shape=None, formats=None,
571 names=None, titles=None, aligned=False, byteorder=None):
572 """Create a record array from a (flat) list of arrays
574 Parameters
575 ----------
576 arrayList : list or tuple
577 List of array-like objects (such as lists, tuples,
578 and ndarrays).
579 dtype : data-type, optional
580 valid dtype for all arrays
581 shape : int or tuple of ints, optional
582 Shape of the resulting array. If not provided, inferred from
583 ``arrayList[0]``.
584 formats, names, titles, aligned, byteorder :
585 If `dtype` is ``None``, these arguments are passed to
586 `numpy.rec.format_parser` to construct a dtype. See that function for
587 detailed documentation.
589 Returns
590 -------
591 np.recarray
592 Record array consisting of given arrayList columns.
594 Examples
595 --------
596 >>> x1=np.array([1,2,3,4])
597 >>> x2=np.array(['a','dd','xyz','12'])
598 >>> x3=np.array([1.1,2,3,4])
599 >>> r = np.rec.fromarrays([x1,x2,x3],names='a,b,c')
600 >>> print(r[1])
601 (2, 'dd', 2.0) # may vary
602 >>> x1[1]=34
603 >>> r.a
604 array([1, 2, 3, 4])
606 >>> x1 = np.array([1, 2, 3, 4])
607 >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
608 >>> x3 = np.array([1.1, 2, 3,4])
609 >>> r = np.rec.fromarrays(
610 ... [x1, x2, x3],
611 ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
612 >>> r
613 rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
614 (4, b'12', 4. )],
615 dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
616 """
618 arrayList = [sb.asarray(x) for x in arrayList]
620 # NumPy 1.19.0, 2020-01-01
621 shape = _deprecate_shape_0_as_None(shape)
623 if shape is None:
624 shape = arrayList[0].shape
625 elif isinstance(shape, int):
626 shape = (shape,)
628 if formats is None and dtype is None:
629 # go through each object in the list to see if it is an ndarray
630 # and determine the formats.
631 formats = [obj.dtype for obj in arrayList]
633 if dtype is not None:
634 descr = sb.dtype(dtype)
635 else:
636 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
637 _names = descr.names
639 # Determine shape from data-type.
640 if len(descr) != len(arrayList):
641 raise ValueError("mismatch between the number of fields "
642 "and the number of arrays")
644 d0 = descr[0].shape
645 nn = len(d0)
646 if nn > 0:
647 shape = shape[:-nn]
649 _array = recarray(shape, descr)
651 # populate the record array (makes a copy)
652 for k, obj in enumerate(arrayList):
653 nn = descr[k].ndim
654 testshape = obj.shape[:obj.ndim - nn]
655 name = _names[k]
656 if testshape != shape:
657 raise ValueError(f'array-shape mismatch in array {k} ("{name}")')
659 _array[name] = obj
661 return _array
664@set_module("numpy.rec")
665def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
666 titles=None, aligned=False, byteorder=None):
667 """Create a recarray from a list of records in text form.
669 Parameters
670 ----------
671 recList : sequence
672 data in the same field may be heterogeneous - they will be promoted
673 to the highest data type.
674 dtype : data-type, optional
675 valid dtype for all arrays
676 shape : int or tuple of ints, optional
677 shape of each array.
678 formats, names, titles, aligned, byteorder :
679 If `dtype` is ``None``, these arguments are passed to
680 `numpy.format_parser` to construct a dtype. See that function for
681 detailed documentation.
683 If both `formats` and `dtype` are None, then this will auto-detect
684 formats. Use list of tuples rather than list of lists for faster
685 processing.
687 Returns
688 -------
689 np.recarray
690 record array consisting of given recList rows.
692 Examples
693 --------
694 >>> r=np.rec.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
695 ... names='col1,col2,col3')
696 >>> print(r[0])
697 (456, 'dbe', 1.2)
698 >>> r.col1
699 array([456, 2])
700 >>> r.col2
701 array(['dbe', 'de'], dtype='<U3')
702 >>> import pickle
703 >>> pickle.loads(pickle.dumps(r))
704 rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
705 dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
706 """
708 if formats is None and dtype is None: # slower
709 obj = sb.array(recList, dtype=object)
710 arrlist = [
711 sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])
712 ]
713 return fromarrays(arrlist, formats=formats, shape=shape, names=names,
714 titles=titles, aligned=aligned, byteorder=byteorder)
716 if dtype is not None:
717 descr = sb.dtype((record, dtype))
718 else:
719 descr = format_parser(
720 formats, names, titles, aligned, byteorder
721 ).dtype
723 try:
724 retval = sb.array(recList, dtype=descr)
725 except (TypeError, ValueError):
726 # NumPy 1.19.0, 2020-01-01
727 shape = _deprecate_shape_0_as_None(shape)
728 if shape is None:
729 shape = len(recList)
730 if isinstance(shape, int):
731 shape = (shape,)
732 if len(shape) > 1:
733 raise ValueError("Can only deal with 1-d array.")
734 _array = recarray(shape, descr)
735 for k in range(_array.size):
736 _array[k] = tuple(recList[k])
737 # list of lists instead of list of tuples ?
738 # 2018-02-07, 1.14.1
739 warnings.warn(
740 "fromrecords expected a list of tuples, may have received a list "
741 "of lists instead. In the future that will raise an error",
742 FutureWarning, stacklevel=2)
743 return _array
744 else:
745 if shape is not None and retval.shape != shape:
746 retval.shape = shape
748 res = retval.view(recarray)
750 return res
753@set_module("numpy.rec")
754def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
755 names=None, titles=None, aligned=False, byteorder=None):
756 r"""Create a record array from binary data
758 Note that despite the name of this function it does not accept `str`
759 instances.
761 Parameters
762 ----------
763 datastring : bytes-like
764 Buffer of binary data
765 dtype : data-type, optional
766 Valid dtype for all arrays
767 shape : int or tuple of ints, optional
768 Shape of each array.
769 offset : int, optional
770 Position in the buffer to start reading from.
771 formats, names, titles, aligned, byteorder :
772 If `dtype` is ``None``, these arguments are passed to
773 `numpy.format_parser` to construct a dtype. See that function for
774 detailed documentation.
777 Returns
778 -------
779 np.recarray
780 Record array view into the data in datastring. This will be readonly
781 if `datastring` is readonly.
783 See Also
784 --------
785 numpy.frombuffer
787 Examples
788 --------
789 >>> a = b'\x01\x02\x03abc'
790 >>> np.rec.fromstring(a, dtype='u1,u1,u1,S3')
791 rec.array([(1, 2, 3, b'abc')],
792 dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
794 >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
795 ... ('GradeLevel', np.int32)]
796 >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
797 ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
798 >>> np.rec.fromstring(grades_array.tobytes(), dtype=grades_dtype)
799 rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
800 dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
802 >>> s = '\x01\x02\x03abc'
803 >>> np.rec.fromstring(s, dtype='u1,u1,u1,S3')
804 Traceback (most recent call last):
805 ...
806 TypeError: a bytes-like object is required, not 'str'
807 """
809 if dtype is None and formats is None:
810 raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
812 if dtype is not None:
813 descr = sb.dtype(dtype)
814 else:
815 descr = format_parser(formats, names, titles, aligned, byteorder).dtype
817 itemsize = descr.itemsize
819 # NumPy 1.19.0, 2020-01-01
820 shape = _deprecate_shape_0_as_None(shape)
822 if shape in (None, -1):
823 shape = (len(datastring) - offset) // itemsize
825 _array = recarray(shape, descr, buf=datastring, offset=offset)
826 return _array
828def get_remaining_size(fd):
829 pos = fd.tell()
830 try:
831 fd.seek(0, 2)
832 return fd.tell() - pos
833 finally:
834 fd.seek(pos, 0)
837@set_module("numpy.rec")
838def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
839 names=None, titles=None, aligned=False, byteorder=None):
840 """Create an array from binary file data
842 Parameters
843 ----------
844 fd : str or file type
845 If file is a string or a path-like object then that file is opened,
846 else it is assumed to be a file object. The file object must
847 support random access (i.e. it must have tell and seek methods).
848 dtype : data-type, optional
849 valid dtype for all arrays
850 shape : int or tuple of ints, optional
851 shape of each array.
852 offset : int, optional
853 Position in the file to start reading from.
854 formats, names, titles, aligned, byteorder :
855 If `dtype` is ``None``, these arguments are passed to
856 `numpy.format_parser` to construct a dtype. See that function for
857 detailed documentation
859 Returns
860 -------
861 np.recarray
862 record array consisting of data enclosed in file.
864 Examples
865 --------
866 >>> from tempfile import TemporaryFile
867 >>> a = np.empty(10,dtype='f8,i4,a5')
868 >>> a[5] = (0.5,10,'abcde')
869 >>>
870 >>> fd=TemporaryFile()
871 >>> a = a.view(a.dtype.newbyteorder('<'))
872 >>> a.tofile(fd)
873 >>>
874 >>> _ = fd.seek(0)
875 >>> r=np.rec.fromfile(fd, formats='f8,i4,a5', shape=10,
876 ... byteorder='<')
877 >>> print(r[5])
878 (0.5, 10, b'abcde')
879 >>> r.shape
880 (10,)
881 """
883 if dtype is None and formats is None:
884 raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
886 # NumPy 1.19.0, 2020-01-01
887 shape = _deprecate_shape_0_as_None(shape)
889 if shape is None:
890 shape = (-1,)
891 elif isinstance(shape, int):
892 shape = (shape,)
894 if hasattr(fd, 'readinto'):
895 # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase
896 # interface. Example of fd: gzip, BytesIO, BufferedReader
897 # file already opened
898 ctx = nullcontext(fd)
899 else:
900 # open file
901 ctx = open(os.fspath(fd), 'rb')
903 with ctx as fd:
904 if offset > 0:
905 fd.seek(offset, 1)
906 size = get_remaining_size(fd)
908 if dtype is not None:
909 descr = sb.dtype(dtype)
910 else:
911 descr = format_parser(
912 formats, names, titles, aligned, byteorder
913 ).dtype
915 itemsize = descr.itemsize
917 shapeprod = sb.array(shape).prod(dtype=nt.intp)
918 shapesize = shapeprod * itemsize
919 if shapesize < 0:
920 shape = list(shape)
921 shape[shape.index(-1)] = size // -shapesize
922 shape = tuple(shape)
923 shapeprod = sb.array(shape).prod(dtype=nt.intp)
925 nbytes = shapeprod * itemsize
927 if nbytes > size:
928 raise ValueError(
929 "Not enough bytes left in file for specified "
930 "shape and type."
931 )
933 # create the array
934 _array = recarray(shape, descr)
935 nbytesread = fd.readinto(_array.data)
936 if nbytesread != nbytes:
937 raise OSError("Didn't read as many bytes as expected")
939 return _array
942@set_module("numpy.rec")
943def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
944 names=None, titles=None, aligned=False, byteorder=None, copy=True):
945 """
946 Construct a record array from a wide-variety of objects.
948 A general-purpose record array constructor that dispatches to the
949 appropriate `recarray` creation function based on the inputs (see Notes).
951 Parameters
952 ----------
953 obj : any
954 Input object. See Notes for details on how various input types are
955 treated.
956 dtype : data-type, optional
957 Valid dtype for array.
958 shape : int or tuple of ints, optional
959 Shape of each array.
960 offset : int, optional
961 Position in the file or buffer to start reading from.
962 strides : tuple of ints, optional
963 Buffer (`buf`) is interpreted according to these strides (strides
964 define how many bytes each array element, row, column, etc.
965 occupy in memory).
966 formats, names, titles, aligned, byteorder :
967 If `dtype` is ``None``, these arguments are passed to
968 `numpy.format_parser` to construct a dtype. See that function for
969 detailed documentation.
970 copy : bool, optional
971 Whether to copy the input object (True), or to use a reference instead.
972 This option only applies when the input is an ndarray or recarray.
973 Defaults to True.
975 Returns
976 -------
977 np.recarray
978 Record array created from the specified object.
980 Notes
981 -----
982 If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
983 `obj` is a string, then call the `fromstring` constructor. If `obj` is a
984 list or a tuple, then if the first object is an `~numpy.ndarray`, call
985 `fromarrays`, otherwise call `fromrecords`. If `obj` is a
986 `~numpy.recarray`, then make a copy of the data in the recarray
987 (if ``copy=True``) and use the new formats, names, and titles. If `obj`
988 is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
989 return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
991 Examples
992 --------
993 >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
994 >>> a
995 array([[1, 2, 3],
996 [4, 5, 6],
997 [7, 8, 9]])
999 >>> np.rec.array(a)
1000 rec.array([[1, 2, 3],
1001 [4, 5, 6],
1002 [7, 8, 9]],
1003 dtype=int64)
1005 >>> b = [(1, 1), (2, 4), (3, 9)]
1006 >>> c = np.rec.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
1007 >>> c
1008 rec.array([(1, 1.), (2, 4.), (3, 9.)],
1009 dtype=[('x', '<i2'), ('y', '<f2')])
1011 >>> c.x
1012 array([1, 2, 3], dtype=int16)
1014 >>> c.y
1015 array([1., 4., 9.], dtype=float16)
1017 >>> r = np.rec.array(['abc','def'], names=['col1','col2'])
1018 >>> print(r.col1)
1019 abc
1021 >>> r.col1
1022 array('abc', dtype='<U3')
1024 >>> r.col2
1025 array('def', dtype='<U3')
1026 """
1028 if ((isinstance(obj, (type(None), str)) or hasattr(obj, 'readinto')) and
1029 formats is None and dtype is None):
1030 raise ValueError("Must define formats (or dtype) if object is "
1031 "None, string, or an open file")
1033 kwds = {}
1034 if dtype is not None:
1035 dtype = sb.dtype(dtype)
1036 elif formats is not None:
1037 dtype = format_parser(formats, names, titles,
1038 aligned, byteorder).dtype
1039 else:
1040 kwds = {'formats': formats,
1041 'names': names,
1042 'titles': titles,
1043 'aligned': aligned,
1044 'byteorder': byteorder
1045 }
1047 if obj is None:
1048 if shape is None:
1049 raise ValueError("Must define a shape if obj is None")
1050 return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
1052 elif isinstance(obj, bytes):
1053 return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
1055 elif isinstance(obj, (list, tuple)):
1056 if isinstance(obj[0], (tuple, list)):
1057 return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
1058 else:
1059 return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
1061 elif isinstance(obj, recarray):
1062 if dtype is not None and (obj.dtype != dtype):
1063 new = obj.view(dtype)
1064 else:
1065 new = obj
1066 if copy:
1067 new = new.copy()
1068 return new
1070 elif hasattr(obj, 'readinto'):
1071 return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
1073 elif isinstance(obj, ndarray):
1074 if dtype is not None and (obj.dtype != dtype):
1075 new = obj.view(dtype)
1076 else:
1077 new = obj
1078 if copy:
1079 new = new.copy()
1080 return new.view(recarray)
1082 else:
1083 interface = getattr(obj, "__array_interface__", None)
1084 if interface is None or not isinstance(interface, dict):
1085 raise ValueError("Unknown input type")
1086 obj = sb.array(obj)
1087 if dtype is not None and (obj.dtype != dtype):
1088 obj = obj.view(dtype)
1089 return obj.view(recarray)