Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_npyio_impl.py: 16%
802 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
1"""
2IO related functions.
3"""
4import os
5import re
6import functools
7import itertools
8import warnings
9import weakref
10import contextlib
11import operator
12from operator import itemgetter, index as opindex, methodcaller
13from collections.abc import Mapping
14import pickle
16import numpy as np
17from . import format
18from ._datasource import DataSource
19from numpy._core import overrides
20from numpy._core.multiarray import packbits, unpackbits
21from numpy._core._multiarray_umath import _load_from_filelike
22from numpy._core.overrides import set_array_function_like_doc, set_module
23from ._iotools import (
24 LineSplitter, NameValidator, StringConverter, ConverterError,
25 ConverterLockError, ConversionWarning, _is_string_like,
26 has_nested_fields, flatten_dtype, easy_dtype, _decode_line
27 )
28from numpy._utils import asunicode, asbytes
31__all__ = [
32 'savetxt', 'loadtxt', 'genfromtxt', 'load', 'save', 'savez',
33 'savez_compressed', 'packbits', 'unpackbits', 'fromregex'
34 ]
37array_function_dispatch = functools.partial(
38 overrides.array_function_dispatch, module='numpy')
41class BagObj:
42 """
43 BagObj(obj)
45 Convert attribute look-ups to getitems on the object passed in.
47 Parameters
48 ----------
49 obj : class instance
50 Object on which attribute look-up is performed.
52 Examples
53 --------
54 >>> from numpy.lib._npyio_impl import BagObj as BO
55 >>> class BagDemo:
56 ... def __getitem__(self, key): # An instance of BagObj(BagDemo)
57 ... # will call this method when any
58 ... # attribute look-up is required
59 ... result = "Doesn't matter what you want, "
60 ... return result + "you're gonna get this"
61 ...
62 >>> demo_obj = BagDemo()
63 >>> bagobj = BO(demo_obj)
64 >>> bagobj.hello_there
65 "Doesn't matter what you want, you're gonna get this"
66 >>> bagobj.I_can_be_anything
67 "Doesn't matter what you want, you're gonna get this"
69 """
71 def __init__(self, obj):
72 # Use weakref to make NpzFile objects collectable by refcount
73 self._obj = weakref.proxy(obj)
75 def __getattribute__(self, key):
76 try:
77 return object.__getattribute__(self, '_obj')[key]
78 except KeyError:
79 raise AttributeError(key) from None
81 def __dir__(self):
82 """
83 Enables dir(bagobj) to list the files in an NpzFile.
85 This also enables tab-completion in an interpreter or IPython.
86 """
87 return list(object.__getattribute__(self, '_obj').keys())
90def zipfile_factory(file, *args, **kwargs):
91 """
92 Create a ZipFile.
94 Allows for Zip64, and the `file` argument can accept file, str, or
95 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile
96 constructor.
97 """
98 if not hasattr(file, 'read'):
99 file = os.fspath(file)
100 import zipfile
101 kwargs['allowZip64'] = True
102 return zipfile.ZipFile(file, *args, **kwargs)
105@set_module('numpy.lib.npyio')
106class NpzFile(Mapping):
107 """
108 NpzFile(fid)
110 A dictionary-like object with lazy-loading of files in the zipped
111 archive provided on construction.
113 `NpzFile` is used to load files in the NumPy ``.npz`` data archive
114 format. It assumes that files in the archive have a ``.npy`` extension,
115 other files are ignored.
117 The arrays and file strings are lazily loaded on either
118 getitem access using ``obj['key']`` or attribute lookup using
119 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can
120 be obtained with ``obj.files`` and the ZipFile object itself using
121 ``obj.zip``.
123 Attributes
124 ----------
125 files : list of str
126 List of all files in the archive with a ``.npy`` extension.
127 zip : ZipFile instance
128 The ZipFile object initialized with the zipped archive.
129 f : BagObj instance
130 An object on which attribute can be performed as an alternative
131 to getitem access on the `NpzFile` instance itself.
132 allow_pickle : bool, optional
133 Allow loading pickled data. Default: False
135 .. versionchanged:: 1.16.3
136 Made default False in response to CVE-2019-6446.
138 pickle_kwargs : dict, optional
139 Additional keyword arguments to pass on to pickle.load.
140 These are only useful when loading object arrays saved on
141 Python 2 when using Python 3.
142 max_header_size : int, optional
143 Maximum allowed size of the header. Large headers may not be safe
144 to load securely and thus require explicitly passing a larger value.
145 See :py:func:`ast.literal_eval()` for details.
146 This option is ignored when `allow_pickle` is passed. In that case
147 the file is by definition trusted and the limit is unnecessary.
149 Parameters
150 ----------
151 fid : file, str, or pathlib.Path
152 The zipped archive to open. This is either a file-like object
153 or a string containing the path to the archive.
154 own_fid : bool, optional
155 Whether NpzFile should close the file handle.
156 Requires that `fid` is a file-like object.
158 Examples
159 --------
160 >>> from tempfile import TemporaryFile
161 >>> outfile = TemporaryFile()
162 >>> x = np.arange(10)
163 >>> y = np.sin(x)
164 >>> np.savez(outfile, x=x, y=y)
165 >>> _ = outfile.seek(0)
167 >>> npz = np.load(outfile)
168 >>> isinstance(npz, np.lib.npyio.NpzFile)
169 True
170 >>> npz
171 NpzFile 'object' with keys: x, y
172 >>> sorted(npz.files)
173 ['x', 'y']
174 >>> npz['x'] # getitem access
175 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
176 >>> npz.f.x # attribute lookup
177 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
179 """
180 # Make __exit__ safe if zipfile_factory raises an exception
181 zip = None
182 fid = None
183 _MAX_REPR_ARRAY_COUNT = 5
185 def __init__(self, fid, own_fid=False, allow_pickle=False,
186 pickle_kwargs=None, *,
187 max_header_size=format._MAX_HEADER_SIZE):
188 # Import is postponed to here since zipfile depends on gzip, an
189 # optional component of the so-called standard library.
190 _zip = zipfile_factory(fid)
191 self._files = _zip.namelist()
192 self.files = []
193 self.allow_pickle = allow_pickle
194 self.max_header_size = max_header_size
195 self.pickle_kwargs = pickle_kwargs
196 for x in self._files:
197 if x.endswith('.npy'):
198 self.files.append(x[:-4])
199 else:
200 self.files.append(x)
201 self.zip = _zip
202 self.f = BagObj(self)
203 if own_fid:
204 self.fid = fid
206 def __enter__(self):
207 return self
209 def __exit__(self, exc_type, exc_value, traceback):
210 self.close()
212 def close(self):
213 """
214 Close the file.
216 """
217 if self.zip is not None:
218 self.zip.close()
219 self.zip = None
220 if self.fid is not None:
221 self.fid.close()
222 self.fid = None
223 self.f = None # break reference cycle
225 def __del__(self):
226 self.close()
228 # Implement the Mapping ABC
229 def __iter__(self):
230 return iter(self.files)
232 def __len__(self):
233 return len(self.files)
235 def __getitem__(self, key):
236 # FIXME: This seems like it will copy strings around
237 # more than is strictly necessary. The zipfile
238 # will read the string and then
239 # the format.read_array will copy the string
240 # to another place in memory.
241 # It would be better if the zipfile could read
242 # (or at least uncompress) the data
243 # directly into the array memory.
244 member = False
245 if key in self._files:
246 member = True
247 elif key in self.files:
248 member = True
249 key += '.npy'
250 if member:
251 bytes = self.zip.open(key)
252 magic = bytes.read(len(format.MAGIC_PREFIX))
253 bytes.close()
254 if magic == format.MAGIC_PREFIX:
255 bytes = self.zip.open(key)
256 return format.read_array(bytes,
257 allow_pickle=self.allow_pickle,
258 pickle_kwargs=self.pickle_kwargs,
259 max_header_size=self.max_header_size)
260 else:
261 return self.zip.read(key)
262 else:
263 raise KeyError(f"{key} is not a file in the archive")
265 def __contains__(self, key):
266 return (key in self._files or key in self.files)
268 def __repr__(self):
269 # Get filename or default to `object`
270 if isinstance(self.fid, str):
271 filename = self.fid
272 else:
273 filename = getattr(self.fid, "name", "object")
275 # Get the name of arrays
276 array_names = ', '.join(self.files[:self._MAX_REPR_ARRAY_COUNT])
277 if len(self.files) > self._MAX_REPR_ARRAY_COUNT:
278 array_names += "..."
279 return f"NpzFile {filename!r} with keys: {array_names}"
281 # Work around problems with the docstrings in the Mapping methods
282 # They contain a `->`, which confuses the type annotation interpretations
283 # of sphinx-docs. See gh-25964
285 def get(self, key, default=None, /):
286 """
287 D.get(k,[,d]) returns D[k] if k in D, else d. d defaults to None.
288 """
289 return Mapping.get(self, key, default)
291 def items(self):
292 """
293 D.items() returns a set-like object providing a view on the items
294 """
295 return Mapping.items(self)
297 def keys(self):
298 """
299 D.keys() returns a set-like object providing a view on the keys
300 """
301 return Mapping.keys(self)
303 def values(self):
304 """
305 D.values() returns a set-like object providing a view on the values
306 """
307 return Mapping.values(self)
310@set_module('numpy')
311def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True,
312 encoding='ASCII', *, max_header_size=format._MAX_HEADER_SIZE):
313 """
314 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
316 .. warning:: Loading files that contain object arrays uses the ``pickle``
317 module, which is not secure against erroneous or maliciously
318 constructed data. Consider passing ``allow_pickle=False`` to
319 load data that is known not to contain object arrays for the
320 safer handling of untrusted sources.
322 Parameters
323 ----------
324 file : file-like object, string, or pathlib.Path
325 The file to read. File-like objects must support the
326 ``seek()`` and ``read()`` methods and must always
327 be opened in binary mode. Pickled files require that the
328 file-like object support the ``readline()`` method as well.
329 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
330 If not None, then memory-map the file, using the given mode (see
331 `numpy.memmap` for a detailed description of the modes). A
332 memory-mapped array is kept on disk. However, it can be accessed
333 and sliced like any ndarray. Memory mapping is especially useful
334 for accessing small fragments of large files without reading the
335 entire file into memory.
336 allow_pickle : bool, optional
337 Allow loading pickled object arrays stored in npy files. Reasons for
338 disallowing pickles include security, as loading pickled data can
339 execute arbitrary code. If pickles are disallowed, loading object
340 arrays will fail. Default: False
342 .. versionchanged:: 1.16.3
343 Made default False in response to CVE-2019-6446.
345 fix_imports : bool, optional
346 Only useful when loading Python 2 generated pickled files on Python 3,
347 which includes npy/npz files containing object arrays. If `fix_imports`
348 is True, pickle will try to map the old Python 2 names to the new names
349 used in Python 3.
350 encoding : str, optional
351 What encoding to use when reading Python 2 strings. Only useful when
352 loading Python 2 generated pickled files in Python 3, which includes
353 npy/npz files containing object arrays. Values other than 'latin1',
354 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
355 data. Default: 'ASCII'
356 max_header_size : int, optional
357 Maximum allowed size of the header. Large headers may not be safe
358 to load securely and thus require explicitly passing a larger value.
359 See :py:func:`ast.literal_eval()` for details.
360 This option is ignored when `allow_pickle` is passed. In that case
361 the file is by definition trusted and the limit is unnecessary.
363 Returns
364 -------
365 result : array, tuple, dict, etc.
366 Data stored in the file. For ``.npz`` files, the returned instance
367 of NpzFile class must be closed to avoid leaking file descriptors.
369 Raises
370 ------
371 OSError
372 If the input file does not exist or cannot be read.
373 UnpicklingError
374 If ``allow_pickle=True``, but the file cannot be loaded as a pickle.
375 ValueError
376 The file contains an object array, but ``allow_pickle=False`` given.
377 EOFError
378 When calling ``np.load`` multiple times on the same file handle,
379 if all data has already been read
381 See Also
382 --------
383 save, savez, savez_compressed, loadtxt
384 memmap : Create a memory-map to an array stored in a file on disk.
385 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.
387 Notes
388 -----
389 - If the file contains pickle data, then whatever object is stored
390 in the pickle is returned.
391 - If the file is a ``.npy`` file, then a single array is returned.
392 - If the file is a ``.npz`` file, then a dictionary-like object is
393 returned, containing ``{filename: array}`` key-value pairs, one for
394 each file in the archive.
395 - If the file is a ``.npz`` file, the returned value supports the
396 context manager protocol in a similar fashion to the open function::
398 with load('foo.npz') as data:
399 a = data['a']
401 The underlying file descriptor is closed when exiting the 'with'
402 block.
404 Examples
405 --------
406 Store data to disk, and load it again:
408 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
409 >>> np.load('/tmp/123.npy')
410 array([[1, 2, 3],
411 [4, 5, 6]])
413 Store compressed data to disk, and load it again:
415 >>> a=np.array([[1, 2, 3], [4, 5, 6]])
416 >>> b=np.array([1, 2])
417 >>> np.savez('/tmp/123.npz', a=a, b=b)
418 >>> data = np.load('/tmp/123.npz')
419 >>> data['a']
420 array([[1, 2, 3],
421 [4, 5, 6]])
422 >>> data['b']
423 array([1, 2])
424 >>> data.close()
426 Mem-map the stored array, and then access the second row
427 directly from disk:
429 >>> X = np.load('/tmp/123.npy', mmap_mode='r')
430 >>> X[1, :]
431 memmap([4, 5, 6])
433 """
434 if encoding not in ('ASCII', 'latin1', 'bytes'):
435 # The 'encoding' value for pickle also affects what encoding
436 # the serialized binary data of NumPy arrays is loaded
437 # in. Pickle does not pass on the encoding information to
438 # NumPy. The unpickling code in numpy._core.multiarray is
439 # written to assume that unicode data appearing where binary
440 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.
441 #
442 # Other encoding values can corrupt binary data, and we
443 # purposefully disallow them. For the same reason, the errors=
444 # argument is not exposed, as values other than 'strict'
445 # result can similarly silently corrupt numerical data.
446 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")
448 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)
450 with contextlib.ExitStack() as stack:
451 if hasattr(file, 'read'):
452 fid = file
453 own_fid = False
454 else:
455 fid = stack.enter_context(open(os.fspath(file), "rb"))
456 own_fid = True
458 # Code to distinguish from NumPy binary files and pickles.
459 _ZIP_PREFIX = b'PK\x03\x04'
460 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this
461 N = len(format.MAGIC_PREFIX)
462 magic = fid.read(N)
463 if not magic:
464 raise EOFError("No data left in file")
465 # If the file size is less than N, we need to make sure not
466 # to seek past the beginning of the file
467 fid.seek(-min(N, len(magic)), 1) # back-up
468 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
469 # zip-file (assume .npz)
470 # Potentially transfer file ownership to NpzFile
471 stack.pop_all()
472 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
473 pickle_kwargs=pickle_kwargs,
474 max_header_size=max_header_size)
475 return ret
476 elif magic == format.MAGIC_PREFIX:
477 # .npy file
478 if mmap_mode:
479 if allow_pickle:
480 max_header_size = 2**64
481 return format.open_memmap(file, mode=mmap_mode,
482 max_header_size=max_header_size)
483 else:
484 return format.read_array(fid, allow_pickle=allow_pickle,
485 pickle_kwargs=pickle_kwargs,
486 max_header_size=max_header_size)
487 else:
488 # Try a pickle
489 if not allow_pickle:
490 raise ValueError("Cannot load file containing pickled data "
491 "when allow_pickle=False")
492 try:
493 return pickle.load(fid, **pickle_kwargs)
494 except Exception as e:
495 raise pickle.UnpicklingError(
496 f"Failed to interpret file {file!r} as a pickle") from e
499def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
500 return (arr,)
503@array_function_dispatch(_save_dispatcher)
504def save(file, arr, allow_pickle=True, fix_imports=True):
505 """
506 Save an array to a binary file in NumPy ``.npy`` format.
508 Parameters
509 ----------
510 file : file, str, or pathlib.Path
511 File or filename to which the data is saved. If file is a file-object,
512 then the filename is unchanged. If file is a string or Path,
513 a ``.npy`` extension will be appended to the filename if it does not
514 already have one.
515 arr : array_like
516 Array data to be saved.
517 allow_pickle : bool, optional
518 Allow saving object arrays using Python pickles. Reasons for
519 disallowing pickles include security (loading pickled data can execute
520 arbitrary code) and portability (pickled objects may not be loadable
521 on different Python installations, for example if the stored objects
522 require libraries that are not available, and not all pickled data is
523 compatible between Python 2 and Python 3).
524 Default: True
525 fix_imports : bool, optional
526 Only useful in forcing objects in object arrays on Python 3 to be
527 pickled in a Python 2 compatible way. If `fix_imports` is True, pickle
528 will try to map the new Python 3 names to the old module names used in
529 Python 2, so that the pickle data stream is readable with Python 2.
531 See Also
532 --------
533 savez : Save several arrays into a ``.npz`` archive
534 savetxt, load
536 Notes
537 -----
538 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
540 Any data saved to the file is appended to the end of the file.
542 Examples
543 --------
544 >>> from tempfile import TemporaryFile
545 >>> outfile = TemporaryFile()
547 >>> x = np.arange(10)
548 >>> np.save(outfile, x)
550 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file
551 >>> np.load(outfile)
552 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
555 >>> with open('test.npy', 'wb') as f:
556 ... np.save(f, np.array([1, 2]))
557 ... np.save(f, np.array([1, 3]))
558 >>> with open('test.npy', 'rb') as f:
559 ... a = np.load(f)
560 ... b = np.load(f)
561 >>> print(a, b)
562 # [1 2] [1 3]
563 """
564 if hasattr(file, 'write'):
565 file_ctx = contextlib.nullcontext(file)
566 else:
567 file = os.fspath(file)
568 if not file.endswith('.npy'):
569 file = file + '.npy'
570 file_ctx = open(file, "wb")
572 with file_ctx as fid:
573 arr = np.asanyarray(arr)
574 format.write_array(fid, arr, allow_pickle=allow_pickle,
575 pickle_kwargs=dict(fix_imports=fix_imports))
578def _savez_dispatcher(file, *args, **kwds):
579 yield from args
580 yield from kwds.values()
583@array_function_dispatch(_savez_dispatcher)
584def savez(file, *args, **kwds):
585 """Save several arrays into a single file in uncompressed ``.npz`` format.
587 Provide arrays as keyword arguments to store them under the
588 corresponding name in the output file: ``savez(fn, x=x, y=y)``.
590 If arrays are specified as positional arguments, i.e., ``savez(fn,
591 x, y)``, their names will be `arr_0`, `arr_1`, etc.
593 Parameters
594 ----------
595 file : file, str, or pathlib.Path
596 Either the filename (string) or an open file (file-like object)
597 where the data will be saved. If file is a string or a Path, the
598 ``.npz`` extension will be appended to the filename if it is not
599 already there.
600 args : Arguments, optional
601 Arrays to save to the file. Please use keyword arguments (see
602 `kwds` below) to assign names to arrays. Arrays specified as
603 args will be named "arr_0", "arr_1", and so on.
604 kwds : Keyword arguments, optional
605 Arrays to save to the file. Each array will be saved to the
606 output file with its corresponding keyword name.
608 Returns
609 -------
610 None
612 See Also
613 --------
614 save : Save a single array to a binary file in NumPy format.
615 savetxt : Save an array to a file as plain text.
616 savez_compressed : Save several arrays into a compressed ``.npz`` archive
618 Notes
619 -----
620 The ``.npz`` file format is a zipped archive of files named after the
621 variables they contain. The archive is not compressed and each file
622 in the archive contains one variable in ``.npy`` format. For a
623 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
625 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile`
626 object is returned. This is a dictionary-like object which can be queried
627 for its list of arrays (with the ``.files`` attribute), and for the arrays
628 themselves.
630 Keys passed in `kwds` are used as filenames inside the ZIP archive.
631 Therefore, keys should be valid filenames; e.g., avoid keys that begin with
632 ``/`` or contain ``.``.
634 When naming variables with keyword arguments, it is not possible to name a
635 variable ``file``, as this would cause the ``file`` argument to be defined
636 twice in the call to ``savez``.
638 Examples
639 --------
640 >>> from tempfile import TemporaryFile
641 >>> outfile = TemporaryFile()
642 >>> x = np.arange(10)
643 >>> y = np.sin(x)
645 Using `savez` with \\*args, the arrays are saved with default names.
647 >>> np.savez(outfile, x, y)
648 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file
649 >>> npzfile = np.load(outfile)
650 >>> npzfile.files
651 ['arr_0', 'arr_1']
652 >>> npzfile['arr_0']
653 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
655 Using `savez` with \\**kwds, the arrays are saved with the keyword names.
657 >>> outfile = TemporaryFile()
658 >>> np.savez(outfile, x=x, y=y)
659 >>> _ = outfile.seek(0)
660 >>> npzfile = np.load(outfile)
661 >>> sorted(npzfile.files)
662 ['x', 'y']
663 >>> npzfile['x']
664 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
666 """
667 _savez(file, args, kwds, False)
670def _savez_compressed_dispatcher(file, *args, **kwds):
671 yield from args
672 yield from kwds.values()
675@array_function_dispatch(_savez_compressed_dispatcher)
676def savez_compressed(file, *args, **kwds):
677 """
678 Save several arrays into a single file in compressed ``.npz`` format.
680 Provide arrays as keyword arguments to store them under the
681 corresponding name in the output file: ``savez_compressed(fn, x=x, y=y)``.
683 If arrays are specified as positional arguments, i.e.,
684 ``savez_compressed(fn, x, y)``, their names will be `arr_0`, `arr_1`, etc.
686 Parameters
687 ----------
688 file : file, str, or pathlib.Path
689 Either the filename (string) or an open file (file-like object)
690 where the data will be saved. If file is a string or a Path, the
691 ``.npz`` extension will be appended to the filename if it is not
692 already there.
693 args : Arguments, optional
694 Arrays to save to the file. Please use keyword arguments (see
695 `kwds` below) to assign names to arrays. Arrays specified as
696 args will be named "arr_0", "arr_1", and so on.
697 kwds : Keyword arguments, optional
698 Arrays to save to the file. Each array will be saved to the
699 output file with its corresponding keyword name.
701 Returns
702 -------
703 None
705 See Also
706 --------
707 numpy.save : Save a single array to a binary file in NumPy format.
708 numpy.savetxt : Save an array to a file as plain text.
709 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
710 numpy.load : Load the files created by savez_compressed.
712 Notes
713 -----
714 The ``.npz`` file format is a zipped archive of files named after the
715 variables they contain. The archive is compressed with
716 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable
717 in ``.npy`` format. For a description of the ``.npy`` format, see
718 :py:mod:`numpy.lib.format`.
721 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile`
722 object is returned. This is a dictionary-like object which can be queried
723 for its list of arrays (with the ``.files`` attribute), and for the arrays
724 themselves.
726 Examples
727 --------
728 >>> test_array = np.random.rand(3, 2)
729 >>> test_vector = np.random.rand(4)
730 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector)
731 >>> loaded = np.load('/tmp/123.npz')
732 >>> print(np.array_equal(test_array, loaded['a']))
733 True
734 >>> print(np.array_equal(test_vector, loaded['b']))
735 True
737 """
738 _savez(file, args, kwds, True)
741def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
742 # Import is postponed to here since zipfile depends on gzip, an optional
743 # component of the so-called standard library.
744 import zipfile
746 if not hasattr(file, 'write'):
747 file = os.fspath(file)
748 if not file.endswith('.npz'):
749 file = file + '.npz'
751 namedict = kwds
752 for i, val in enumerate(args):
753 key = 'arr_%d' % i
754 if key in namedict.keys():
755 raise ValueError(
756 "Cannot use un-named variables and keyword %s" % key)
757 namedict[key] = val
759 if compress:
760 compression = zipfile.ZIP_DEFLATED
761 else:
762 compression = zipfile.ZIP_STORED
764 zipf = zipfile_factory(file, mode="w", compression=compression)
766 for key, val in namedict.items():
767 fname = key + '.npy'
768 val = np.asanyarray(val)
769 # always force zip64, gh-10776
770 with zipf.open(fname, 'w', force_zip64=True) as fid:
771 format.write_array(fid, val,
772 allow_pickle=allow_pickle,
773 pickle_kwargs=pickle_kwargs)
775 zipf.close()
778def _ensure_ndmin_ndarray_check_param(ndmin):
779 """Just checks if the param ndmin is supported on
780 _ensure_ndmin_ndarray. It is intended to be used as
781 verification before running anything expensive.
782 e.g. loadtxt, genfromtxt
783 """
784 # Check correctness of the values of `ndmin`
785 if ndmin not in [0, 1, 2]:
786 raise ValueError(f"Illegal value of ndmin keyword: {ndmin}")
788def _ensure_ndmin_ndarray(a, *, ndmin: int):
789 """This is a helper function of loadtxt and genfromtxt to ensure
790 proper minimum dimension as requested
792 ndim : int. Supported values 1, 2, 3
793 ^^ whenever this changes, keep in sync with
794 _ensure_ndmin_ndarray_check_param
795 """
796 # Verify that the array has at least dimensions `ndmin`.
797 # Tweak the size and shape of the arrays - remove extraneous dimensions
798 if a.ndim > ndmin:
799 a = np.squeeze(a)
800 # and ensure we have the minimum number of dimensions asked for
801 # - has to be in this order for the odd case ndmin=1, a.squeeze().ndim=0
802 if a.ndim < ndmin:
803 if ndmin == 1:
804 a = np.atleast_1d(a)
805 elif ndmin == 2:
806 a = np.atleast_2d(a).T
808 return a
811# amount of lines loadtxt reads in one chunk, can be overridden for testing
812_loadtxt_chunksize = 50000
815def _check_nonneg_int(value, name="argument"):
816 try:
817 operator.index(value)
818 except TypeError:
819 raise TypeError(f"{name} must be an integer") from None
820 if value < 0:
821 raise ValueError(f"{name} must be nonnegative")
824def _preprocess_comments(iterable, comments, encoding):
825 """
826 Generator that consumes a line iterated iterable and strips out the
827 multiple (or multi-character) comments from lines.
828 This is a pre-processing step to achieve feature parity with loadtxt
829 (we assume that this feature is a nieche feature).
830 """
831 for line in iterable:
832 if isinstance(line, bytes):
833 # Need to handle conversion here, or the splitting would fail
834 line = line.decode(encoding)
836 for c in comments:
837 line = line.split(c, 1)[0]
839 yield line
842# The number of rows we read in one go if confronted with a parametric dtype
843_loadtxt_chunksize = 50000
846def _read(fname, *, delimiter=',', comment='#', quote='"',
847 imaginary_unit='j', usecols=None, skiplines=0,
848 max_rows=None, converters=None, ndmin=None, unpack=False,
849 dtype=np.float64, encoding=None):
850 r"""
851 Read a NumPy array from a text file.
852 This is a helper function for loadtxt.
854 Parameters
855 ----------
856 fname : file, str, or pathlib.Path
857 The filename or the file to be read.
858 delimiter : str, optional
859 Field delimiter of the fields in line of the file.
860 Default is a comma, ','. If None any sequence of whitespace is
861 considered a delimiter.
862 comment : str or sequence of str or None, optional
863 Character that begins a comment. All text from the comment
864 character to the end of the line is ignored.
865 Multiple comments or multiple-character comment strings are supported,
866 but may be slower and `quote` must be empty if used.
867 Use None to disable all use of comments.
868 quote : str or None, optional
869 Character that is used to quote string fields. Default is '"'
870 (a double quote). Use None to disable quote support.
871 imaginary_unit : str, optional
872 Character that represent the imaginary unit `sqrt(-1)`.
873 Default is 'j'.
874 usecols : array_like, optional
875 A one-dimensional array of integer column numbers. These are the
876 columns from the file to be included in the array. If this value
877 is not given, all the columns are used.
878 skiplines : int, optional
879 Number of lines to skip before interpreting the data in the file.
880 max_rows : int, optional
881 Maximum number of rows of data to read. Default is to read the
882 entire file.
883 converters : dict or callable, optional
884 A function to parse all columns strings into the desired value, or
885 a dictionary mapping column number to a parser function.
886 E.g. if column 0 is a date string: ``converters = {0: datestr2num}``.
887 Converters can also be used to provide a default value for missing
888 data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will
889 convert empty fields to 0.
890 Default: None
891 ndmin : int, optional
892 Minimum dimension of the array returned.
893 Allowed values are 0, 1 or 2. Default is 0.
894 unpack : bool, optional
895 If True, the returned array is transposed, so that arguments may be
896 unpacked using ``x, y, z = read(...)``. When used with a structured
897 data-type, arrays are returned for each field. Default is False.
898 dtype : numpy data type
899 A NumPy dtype instance, can be a structured dtype to map to the
900 columns of the file.
901 encoding : str, optional
902 Encoding used to decode the inputfile. The special value 'bytes'
903 (the default) enables backwards-compatible behavior for `converters`,
904 ensuring that inputs to the converter functions are encoded
905 bytes objects. The special value 'bytes' has no additional effect if
906 ``converters=None``. If encoding is ``'bytes'`` or ``None``, the
907 default system encoding is used.
909 Returns
910 -------
911 ndarray
912 NumPy array.
913 """
914 # Handle special 'bytes' keyword for encoding
915 byte_converters = False
916 if encoding == 'bytes':
917 encoding = None
918 byte_converters = True
920 if dtype is None:
921 raise TypeError("a dtype must be provided.")
922 dtype = np.dtype(dtype)
924 read_dtype_via_object_chunks = None
925 if dtype.kind in 'SUM' and (
926 dtype == "S0" or dtype == "U0" or dtype == "M8" or dtype == 'm8'):
927 # This is a legacy "flexible" dtype. We do not truly support
928 # parametric dtypes currently (no dtype discovery step in the core),
929 # but have to support these for backward compatibility.
930 read_dtype_via_object_chunks = dtype
931 dtype = np.dtype(object)
933 if usecols is not None:
934 # Allow usecols to be a single int or a sequence of ints, the C-code
935 # handles the rest
936 try:
937 usecols = list(usecols)
938 except TypeError:
939 usecols = [usecols]
941 _ensure_ndmin_ndarray_check_param(ndmin)
943 if comment is None:
944 comments = None
945 else:
946 # assume comments are a sequence of strings
947 if "" in comment:
948 raise ValueError(
949 "comments cannot be an empty string. Use comments=None to "
950 "disable comments."
951 )
952 comments = tuple(comment)
953 comment = None
954 if len(comments) == 0:
955 comments = None # No comments at all
956 elif len(comments) == 1:
957 # If there is only one comment, and that comment has one character,
958 # the normal parsing can deal with it just fine.
959 if isinstance(comments[0], str) and len(comments[0]) == 1:
960 comment = comments[0]
961 comments = None
962 else:
963 # Input validation if there are multiple comment characters
964 if delimiter in comments:
965 raise TypeError(
966 f"Comment characters '{comments}' cannot include the "
967 f"delimiter '{delimiter}'"
968 )
970 # comment is now either a 1 or 0 character string or a tuple:
971 if comments is not None:
972 # Note: An earlier version support two character comments (and could
973 # have been extended to multiple characters, we assume this is
974 # rare enough to not optimize for.
975 if quote is not None:
976 raise ValueError(
977 "when multiple comments or a multi-character comment is "
978 "given, quotes are not supported. In this case quotechar "
979 "must be set to None.")
981 if len(imaginary_unit) != 1:
982 raise ValueError('len(imaginary_unit) must be 1.')
984 _check_nonneg_int(skiplines)
985 if max_rows is not None:
986 _check_nonneg_int(max_rows)
987 else:
988 # Passing -1 to the C code means "read the entire file".
989 max_rows = -1
991 fh_closing_ctx = contextlib.nullcontext()
992 filelike = False
993 try:
994 if isinstance(fname, os.PathLike):
995 fname = os.fspath(fname)
996 if isinstance(fname, str):
997 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
998 if encoding is None:
999 encoding = getattr(fh, 'encoding', 'latin1')
1001 fh_closing_ctx = contextlib.closing(fh)
1002 data = fh
1003 filelike = True
1004 else:
1005 if encoding is None:
1006 encoding = getattr(fname, 'encoding', 'latin1')
1007 data = iter(fname)
1008 except TypeError as e:
1009 raise ValueError(
1010 f"fname must be a string, filehandle, list of strings,\n"
1011 f"or generator. Got {type(fname)} instead.") from e
1013 with fh_closing_ctx:
1014 if comments is not None:
1015 if filelike:
1016 data = iter(data)
1017 filelike = False
1018 data = _preprocess_comments(data, comments, encoding)
1020 if read_dtype_via_object_chunks is None:
1021 arr = _load_from_filelike(
1022 data, delimiter=delimiter, comment=comment, quote=quote,
1023 imaginary_unit=imaginary_unit,
1024 usecols=usecols, skiplines=skiplines, max_rows=max_rows,
1025 converters=converters, dtype=dtype,
1026 encoding=encoding, filelike=filelike,
1027 byte_converters=byte_converters)
1029 else:
1030 # This branch reads the file into chunks of object arrays and then
1031 # casts them to the desired actual dtype. This ensures correct
1032 # string-length and datetime-unit discovery (like `arr.astype()`).
1033 # Due to chunking, certain error reports are less clear, currently.
1034 if filelike:
1035 data = iter(data) # cannot chunk when reading from file
1037 c_byte_converters = False
1038 if read_dtype_via_object_chunks == "S":
1039 c_byte_converters = True # Use latin1 rather than ascii
1041 chunks = []
1042 while max_rows != 0:
1043 if max_rows < 0:
1044 chunk_size = _loadtxt_chunksize
1045 else:
1046 chunk_size = min(_loadtxt_chunksize, max_rows)
1048 next_arr = _load_from_filelike(
1049 data, delimiter=delimiter, comment=comment, quote=quote,
1050 imaginary_unit=imaginary_unit,
1051 usecols=usecols, skiplines=skiplines, max_rows=max_rows,
1052 converters=converters, dtype=dtype,
1053 encoding=encoding, filelike=filelike,
1054 byte_converters=byte_converters,
1055 c_byte_converters=c_byte_converters)
1056 # Cast here already. We hope that this is better even for
1057 # large files because the storage is more compact. It could
1058 # be adapted (in principle the concatenate could cast).
1059 chunks.append(next_arr.astype(read_dtype_via_object_chunks))
1061 skiprows = 0 # Only have to skip for first chunk
1062 if max_rows >= 0:
1063 max_rows -= chunk_size
1064 if len(next_arr) < chunk_size:
1065 # There was less data than requested, so we are done.
1066 break
1068 # Need at least one chunk, but if empty, the last one may have
1069 # the wrong shape.
1070 if len(chunks) > 1 and len(chunks[-1]) == 0:
1071 del chunks[-1]
1072 if len(chunks) == 1:
1073 arr = chunks[0]
1074 else:
1075 arr = np.concatenate(chunks, axis=0)
1077 # NOTE: ndmin works as advertised for structured dtypes, but normally
1078 # these would return a 1D result plus the structured dimension,
1079 # so ndmin=2 adds a third dimension even when no squeezing occurs.
1080 # A `squeeze=False` could be a better solution (pandas uses squeeze).
1081 arr = _ensure_ndmin_ndarray(arr, ndmin=ndmin)
1083 if arr.shape:
1084 if arr.shape[0] == 0:
1085 warnings.warn(
1086 f'loadtxt: input contained no data: "{fname}"',
1087 category=UserWarning,
1088 stacklevel=3
1089 )
1091 if unpack:
1092 # Unpack structured dtypes if requested:
1093 dt = arr.dtype
1094 if dt.names is not None:
1095 # For structured arrays, return an array for each field.
1096 return [arr[field] for field in dt.names]
1097 else:
1098 return arr.T
1099 else:
1100 return arr
1103@set_array_function_like_doc
1104@set_module('numpy')
1105def loadtxt(fname, dtype=float, comments='#', delimiter=None,
1106 converters=None, skiprows=0, usecols=None, unpack=False,
1107 ndmin=0, encoding=None, max_rows=None, *, quotechar=None,
1108 like=None):
1109 r"""
1110 Load data from a text file.
1112 Parameters
1113 ----------
1114 fname : file, str, pathlib.Path, list of str, generator
1115 File, filename, list, or generator to read. If the filename
1116 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note
1117 that generators must return bytes or strings. The strings
1118 in a list or produced by a generator are treated as lines.
1119 dtype : data-type, optional
1120 Data-type of the resulting array; default: float. If this is a
1121 structured data-type, the resulting array will be 1-dimensional, and
1122 each row will be interpreted as an element of the array. In this
1123 case, the number of columns used must match the number of fields in
1124 the data-type.
1125 comments : str or sequence of str or None, optional
1126 The characters or list of characters used to indicate the start of a
1127 comment. None implies no comments. For backwards compatibility, byte
1128 strings will be decoded as 'latin1'. The default is '#'.
1129 delimiter : str, optional
1130 The character used to separate the values. For backwards compatibility,
1131 byte strings will be decoded as 'latin1'. The default is whitespace.
1133 .. versionchanged:: 1.23.0
1134 Only single character delimiters are supported. Newline characters
1135 cannot be used as the delimiter.
1137 converters : dict or callable, optional
1138 Converter functions to customize value parsing. If `converters` is
1139 callable, the function is applied to all columns, else it must be a
1140 dict that maps column number to a parser function.
1141 See examples for further details.
1142 Default: None.
1144 .. versionchanged:: 1.23.0
1145 The ability to pass a single callable to be applied to all columns
1146 was added.
1148 skiprows : int, optional
1149 Skip the first `skiprows` lines, including comments; default: 0.
1150 usecols : int or sequence, optional
1151 Which columns to read, with 0 being the first. For example,
1152 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
1153 The default, None, results in all columns being read.
1155 .. versionchanged:: 1.11.0
1156 When a single column has to be read it is possible to use
1157 an integer instead of a tuple. E.g ``usecols = 3`` reads the
1158 fourth column the same way as ``usecols = (3,)`` would.
1159 unpack : bool, optional
1160 If True, the returned array is transposed, so that arguments may be
1161 unpacked using ``x, y, z = loadtxt(...)``. When used with a
1162 structured data-type, arrays are returned for each field.
1163 Default is False.
1164 ndmin : int, optional
1165 The returned array will have at least `ndmin` dimensions.
1166 Otherwise mono-dimensional axes will be squeezed.
1167 Legal values: 0 (default), 1 or 2.
1169 .. versionadded:: 1.6.0
1170 encoding : str, optional
1171 Encoding used to decode the inputfile. Does not apply to input streams.
1172 The special value 'bytes' enables backward compatibility workarounds
1173 that ensures you receive byte arrays as results if possible and passes
1174 'latin1' encoded strings to converters. Override this value to receive
1175 unicode arrays and pass strings as input to converters. If set to None
1176 the system default is used. The default value is 'bytes'.
1178 .. versionadded:: 1.14.0
1179 .. versionchanged:: 2.0
1180 Before NumPy 2, the default was ``'bytes'`` for Python 2
1181 compatibility. The default is now ``None``.
1183 max_rows : int, optional
1184 Read `max_rows` rows of content after `skiprows` lines. The default is
1185 to read all the rows. Note that empty rows containing no data such as
1186 empty lines and comment lines are not counted towards `max_rows`,
1187 while such lines are counted in `skiprows`.
1189 .. versionadded:: 1.16.0
1191 .. versionchanged:: 1.23.0
1192 Lines containing no data, including comment lines (e.g., lines
1193 starting with '#' or as specified via `comments`) are not counted
1194 towards `max_rows`.
1195 quotechar : unicode character or None, optional
1196 The character used to denote the start and end of a quoted item.
1197 Occurrences of the delimiter or comment characters are ignored within
1198 a quoted item. The default value is ``quotechar=None``, which means
1199 quoting support is disabled.
1201 If two consecutive instances of `quotechar` are found within a quoted
1202 field, the first is treated as an escape character. See examples.
1204 .. versionadded:: 1.23.0
1205 ${ARRAY_FUNCTION_LIKE}
1207 .. versionadded:: 1.20.0
1209 Returns
1210 -------
1211 out : ndarray
1212 Data read from the text file.
1214 See Also
1215 --------
1216 load, fromstring, fromregex
1217 genfromtxt : Load data with missing values handled as specified.
1218 scipy.io.loadmat : reads MATLAB data files
1220 Notes
1221 -----
1222 This function aims to be a fast reader for simply formatted files. The
1223 `genfromtxt` function provides more sophisticated handling of, e.g.,
1224 lines with missing values.
1226 Each row in the input text file must have the same number of values to be
1227 able to read all values. If all rows do not have same number of values, a
1228 subset of up to n columns (where n is the least number of values present
1229 in all rows) can be read by specifying the columns via `usecols`.
1231 .. versionadded:: 1.10.0
1233 The strings produced by the Python float.hex method can be used as
1234 input for floats.
1236 Examples
1237 --------
1238 >>> from io import StringIO # StringIO behaves like a file object
1239 >>> c = StringIO("0 1\n2 3")
1240 >>> np.loadtxt(c)
1241 array([[0., 1.],
1242 [2., 3.]])
1244 >>> d = StringIO("M 21 72\nF 35 58")
1245 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
1246 ... 'formats': ('S1', 'i4', 'f4')})
1247 array([(b'M', 21, 72.), (b'F', 35, 58.)],
1248 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')])
1250 >>> c = StringIO("1,0,2\n3,0,4")
1251 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
1252 >>> x
1253 array([1., 3.])
1254 >>> y
1255 array([2., 4.])
1257 The `converters` argument is used to specify functions to preprocess the
1258 text prior to parsing. `converters` can be a dictionary that maps
1259 preprocessing functions to each column:
1261 >>> s = StringIO("1.618, 2.296\n3.141, 4.669\n")
1262 >>> conv = {
1263 ... 0: lambda x: np.floor(float(x)), # conversion fn for column 0
1264 ... 1: lambda x: np.ceil(float(x)), # conversion fn for column 1
1265 ... }
1266 >>> np.loadtxt(s, delimiter=",", converters=conv)
1267 array([[1., 3.],
1268 [3., 5.]])
1270 `converters` can be a callable instead of a dictionary, in which case it
1271 is applied to all columns:
1273 >>> s = StringIO("0xDE 0xAD\n0xC0 0xDE")
1274 >>> import functools
1275 >>> conv = functools.partial(int, base=16)
1276 >>> np.loadtxt(s, converters=conv)
1277 array([[222., 173.],
1278 [192., 222.]])
1280 This example shows how `converters` can be used to convert a field
1281 with a trailing minus sign into a negative number.
1283 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94")
1284 >>> def conv(fld):
1285 ... return -float(fld[:-1]) if fld.endswith("-") else float(fld)
1286 ...
1287 >>> np.loadtxt(s, converters=conv)
1288 array([[ 10.01, -31.25],
1289 [ 19.22, 64.31],
1290 [-17.57, 63.94]])
1292 Using a callable as the converter can be particularly useful for handling
1293 values with different formatting, e.g. floats with underscores:
1295 >>> s = StringIO("1 2.7 100_000")
1296 >>> np.loadtxt(s, converters=float)
1297 array([1.e+00, 2.7e+00, 1.e+05])
1299 This idea can be extended to automatically handle values specified in
1300 many different formats, such as hex values:
1302 >>> def conv(val):
1303 ... try:
1304 ... return float(val)
1305 ... except ValueError:
1306 ... return float.fromhex(val)
1307 >>> s = StringIO("1, 2.5, 3_000, 0b4, 0x1.4000000000000p+2")
1308 >>> np.loadtxt(s, delimiter=",", converters=conv)
1309 array([1.0e+00, 2.5e+00, 3.0e+03, 1.8e+02, 5.0e+00])
1311 Or a format where the ``-`` sign comes after the number:
1313 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94")
1314 >>> conv = lambda x: -float(x[:-1]) if x.endswith("-") else float(x)
1315 >>> np.loadtxt(s, converters=conv)
1316 array([[ 10.01, -31.25],
1317 [ 19.22, 64.31],
1318 [-17.57, 63.94]])
1320 Support for quoted fields is enabled with the `quotechar` parameter.
1321 Comment and delimiter characters are ignored when they appear within a
1322 quoted item delineated by `quotechar`:
1324 >>> s = StringIO('"alpha, #42", 10.0\n"beta, #64", 2.0\n')
1325 >>> dtype = np.dtype([("label", "U12"), ("value", float)])
1326 >>> np.loadtxt(s, dtype=dtype, delimiter=",", quotechar='"')
1327 array([('alpha, #42', 10.), ('beta, #64', 2.)],
1328 dtype=[('label', '<U12'), ('value', '<f8')])
1330 Quoted fields can be separated by multiple whitespace characters:
1332 >>> s = StringIO('"alpha, #42" 10.0\n"beta, #64" 2.0\n')
1333 >>> dtype = np.dtype([("label", "U12"), ("value", float)])
1334 >>> np.loadtxt(s, dtype=dtype, delimiter=None, quotechar='"')
1335 array([('alpha, #42', 10.), ('beta, #64', 2.)],
1336 dtype=[('label', '<U12'), ('value', '<f8')])
1338 Two consecutive quote characters within a quoted field are treated as a
1339 single escaped character:
1341 >>> s = StringIO('"Hello, my name is ""Monty""!"')
1342 >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"')
1343 array('Hello, my name is "Monty"!', dtype='<U26')
1345 Read subset of columns when all rows do not contain equal number of values:
1347 >>> d = StringIO("1 2\n2 4\n3 9 12\n4 16 20")
1348 >>> np.loadtxt(d, usecols=(0, 1))
1349 array([[ 1., 2.],
1350 [ 2., 4.],
1351 [ 3., 9.],
1352 [ 4., 16.]])
1354 """
1356 if like is not None:
1357 return _loadtxt_with_like(
1358 like, fname, dtype=dtype, comments=comments, delimiter=delimiter,
1359 converters=converters, skiprows=skiprows, usecols=usecols,
1360 unpack=unpack, ndmin=ndmin, encoding=encoding,
1361 max_rows=max_rows
1362 )
1364 if isinstance(delimiter, bytes):
1365 delimiter.decode("latin1")
1367 if dtype is None:
1368 dtype = np.float64
1370 comment = comments
1371 # Control character type conversions for Py3 convenience
1372 if comment is not None:
1373 if isinstance(comment, (str, bytes)):
1374 comment = [comment]
1375 comment = [
1376 x.decode('latin1') if isinstance(x, bytes) else x for x in comment]
1377 if isinstance(delimiter, bytes):
1378 delimiter = delimiter.decode('latin1')
1380 arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,
1381 converters=converters, skiplines=skiprows, usecols=usecols,
1382 unpack=unpack, ndmin=ndmin, encoding=encoding,
1383 max_rows=max_rows, quote=quotechar)
1385 return arr
1388_loadtxt_with_like = array_function_dispatch()(loadtxt)
1391def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,
1392 header=None, footer=None, comments=None,
1393 encoding=None):
1394 return (X,)
1397@array_function_dispatch(_savetxt_dispatcher)
1398def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
1399 footer='', comments='# ', encoding=None):
1400 """
1401 Save an array to a text file.
1403 Parameters
1404 ----------
1405 fname : filename, file handle or pathlib.Path
1406 If the filename ends in ``.gz``, the file is automatically saved in
1407 compressed gzip format. `loadtxt` understands gzipped files
1408 transparently.
1409 X : 1D or 2D array_like
1410 Data to be saved to a text file.
1411 fmt : str or sequence of strs, optional
1412 A single format (%10.5f), a sequence of formats, or a
1413 multi-format string, e.g. 'Iteration %d -- %10.5f', in which
1414 case `delimiter` is ignored. For complex `X`, the legal options
1415 for `fmt` are:
1417 * a single specifier, ``fmt='%.4e'``, resulting in numbers formatted
1418 like ``' (%s+%sj)' % (fmt, fmt)``
1419 * a full string specifying every real and imaginary part, e.g.
1420 ``' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'`` for 3 columns
1421 * a list of specifiers, one per column - in this case, the real
1422 and imaginary part must have separate specifiers,
1423 e.g. ``['%.3e + %.3ej', '(%.15e%+.15ej)']`` for 2 columns
1424 delimiter : str, optional
1425 String or character separating columns.
1426 newline : str, optional
1427 String or character separating lines.
1429 .. versionadded:: 1.5.0
1430 header : str, optional
1431 String that will be written at the beginning of the file.
1433 .. versionadded:: 1.7.0
1434 footer : str, optional
1435 String that will be written at the end of the file.
1437 .. versionadded:: 1.7.0
1438 comments : str, optional
1439 String that will be prepended to the ``header`` and ``footer`` strings,
1440 to mark them as comments. Default: '# ', as expected by e.g.
1441 ``numpy.loadtxt``.
1443 .. versionadded:: 1.7.0
1444 encoding : {None, str}, optional
1445 Encoding used to encode the outputfile. Does not apply to output
1446 streams. If the encoding is something other than 'bytes' or 'latin1'
1447 you will not be able to load the file in NumPy versions < 1.14. Default
1448 is 'latin1'.
1450 .. versionadded:: 1.14.0
1453 See Also
1454 --------
1455 save : Save an array to a binary file in NumPy ``.npy`` format
1456 savez : Save several arrays into an uncompressed ``.npz`` archive
1457 savez_compressed : Save several arrays into a compressed ``.npz`` archive
1459 Notes
1460 -----
1461 Further explanation of the `fmt` parameter
1462 (``%[flag]width[.precision]specifier``):
1464 flags:
1465 ``-`` : left justify
1467 ``+`` : Forces to precede result with + or -.
1469 ``0`` : Left pad the number with zeros instead of space (see width).
1471 width:
1472 Minimum number of characters to be printed. The value is not truncated
1473 if it has more characters.
1475 precision:
1476 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
1477 digits.
1478 - For ``e, E`` and ``f`` specifiers, the number of digits to print
1479 after the decimal point.
1480 - For ``g`` and ``G``, the maximum number of significant digits.
1481 - For ``s``, the maximum number of characters.
1483 specifiers:
1484 ``c`` : character
1486 ``d`` or ``i`` : signed decimal integer
1488 ``e`` or ``E`` : scientific notation with ``e`` or ``E``.
1490 ``f`` : decimal floating point
1492 ``g,G`` : use the shorter of ``e,E`` or ``f``
1494 ``o`` : signed octal
1496 ``s`` : string of characters
1498 ``u`` : unsigned decimal integer
1500 ``x,X`` : unsigned hexadecimal integer
1502 This explanation of ``fmt`` is not complete, for an exhaustive
1503 specification see [1]_.
1505 References
1506 ----------
1507 .. [1] `Format Specification Mini-Language
1508 <https://docs.python.org/library/string.html#format-specification-mini-language>`_,
1509 Python Documentation.
1511 Examples
1512 --------
1513 >>> x = y = z = np.arange(0.0,5.0,1.0)
1514 >>> np.savetxt('test.out', x, delimiter=',') # X is an array
1515 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays
1516 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation
1518 """
1520 class WriteWrap:
1521 """Convert to bytes on bytestream inputs.
1523 """
1524 def __init__(self, fh, encoding):
1525 self.fh = fh
1526 self.encoding = encoding
1527 self.do_write = self.first_write
1529 def close(self):
1530 self.fh.close()
1532 def write(self, v):
1533 self.do_write(v)
1535 def write_bytes(self, v):
1536 if isinstance(v, bytes):
1537 self.fh.write(v)
1538 else:
1539 self.fh.write(v.encode(self.encoding))
1541 def write_normal(self, v):
1542 self.fh.write(asunicode(v))
1544 def first_write(self, v):
1545 try:
1546 self.write_normal(v)
1547 self.write = self.write_normal
1548 except TypeError:
1549 # input is probably a bytestream
1550 self.write_bytes(v)
1551 self.write = self.write_bytes
1553 own_fh = False
1554 if isinstance(fname, os.PathLike):
1555 fname = os.fspath(fname)
1556 if _is_string_like(fname):
1557 # datasource doesn't support creating a new file ...
1558 open(fname, 'wt').close()
1559 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)
1560 own_fh = True
1561 elif hasattr(fname, 'write'):
1562 # wrap to handle byte output streams
1563 fh = WriteWrap(fname, encoding or 'latin1')
1564 else:
1565 raise ValueError('fname must be a string or file handle')
1567 try:
1568 X = np.asarray(X)
1570 # Handle 1-dimensional arrays
1571 if X.ndim == 0 or X.ndim > 2:
1572 raise ValueError(
1573 "Expected 1D or 2D array, got %dD array instead" % X.ndim)
1574 elif X.ndim == 1:
1575 # Common case -- 1d array of numbers
1576 if X.dtype.names is None:
1577 X = np.atleast_2d(X).T
1578 ncol = 1
1580 # Complex dtype -- each field indicates a separate column
1581 else:
1582 ncol = len(X.dtype.names)
1583 else:
1584 ncol = X.shape[1]
1586 iscomplex_X = np.iscomplexobj(X)
1587 # `fmt` can be a string with multiple insertion points or a
1588 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
1589 if type(fmt) in (list, tuple):
1590 if len(fmt) != ncol:
1591 raise AttributeError('fmt has wrong shape. %s' % str(fmt))
1592 format = delimiter.join(fmt)
1593 elif isinstance(fmt, str):
1594 n_fmt_chars = fmt.count('%')
1595 error = ValueError('fmt has wrong number of %% formats: %s' % fmt)
1596 if n_fmt_chars == 1:
1597 if iscomplex_X:
1598 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
1599 else:
1600 fmt = [fmt, ] * ncol
1601 format = delimiter.join(fmt)
1602 elif iscomplex_X and n_fmt_chars != (2 * ncol):
1603 raise error
1604 elif ((not iscomplex_X) and n_fmt_chars != ncol):
1605 raise error
1606 else:
1607 format = fmt
1608 else:
1609 raise ValueError('invalid fmt: %r' % (fmt,))
1611 if len(header) > 0:
1612 header = header.replace('\n', '\n' + comments)
1613 fh.write(comments + header + newline)
1614 if iscomplex_X:
1615 for row in X:
1616 row2 = []
1617 for number in row:
1618 row2.append(number.real)
1619 row2.append(number.imag)
1620 s = format % tuple(row2) + newline
1621 fh.write(s.replace('+-', '-'))
1622 else:
1623 for row in X:
1624 try:
1625 v = format % tuple(row) + newline
1626 except TypeError as e:
1627 raise TypeError("Mismatch between array dtype ('%s') and "
1628 "format specifier ('%s')"
1629 % (str(X.dtype), format)) from e
1630 fh.write(v)
1632 if len(footer) > 0:
1633 footer = footer.replace('\n', '\n' + comments)
1634 fh.write(comments + footer + newline)
1635 finally:
1636 if own_fh:
1637 fh.close()
1640@set_module('numpy')
1641def fromregex(file, regexp, dtype, encoding=None):
1642 r"""
1643 Construct an array from a text file, using regular expression parsing.
1645 The returned array is always a structured array, and is constructed from
1646 all matches of the regular expression in the file. Groups in the regular
1647 expression are converted to fields of the structured array.
1649 Parameters
1650 ----------
1651 file : file, str, or pathlib.Path
1652 Filename or file object to read.
1654 .. versionchanged:: 1.22.0
1655 Now accepts `os.PathLike` implementations.
1656 regexp : str or regexp
1657 Regular expression used to parse the file.
1658 Groups in the regular expression correspond to fields in the dtype.
1659 dtype : dtype or list of dtypes
1660 Dtype for the structured array; must be a structured datatype.
1661 encoding : str, optional
1662 Encoding used to decode the inputfile. Does not apply to input streams.
1664 .. versionadded:: 1.14.0
1666 Returns
1667 -------
1668 output : ndarray
1669 The output array, containing the part of the content of `file` that
1670 was matched by `regexp`. `output` is always a structured array.
1672 Raises
1673 ------
1674 TypeError
1675 When `dtype` is not a valid dtype for a structured array.
1677 See Also
1678 --------
1679 fromstring, loadtxt
1681 Notes
1682 -----
1683 Dtypes for structured arrays can be specified in several forms, but all
1684 forms specify at least the data type and field name. For details see
1685 `basics.rec`.
1687 Examples
1688 --------
1689 >>> from io import StringIO
1690 >>> text = StringIO("1312 foo\n1534 bar\n444 qux")
1692 >>> regexp = r"(\d+)\s+(...)" # match [digits, whitespace, anything]
1693 >>> output = np.fromregex(text, regexp,
1694 ... [('num', np.int64), ('key', 'S3')])
1695 >>> output
1696 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')],
1697 dtype=[('num', '<i8'), ('key', 'S3')])
1698 >>> output['num']
1699 array([1312, 1534, 444])
1701 """
1702 own_fh = False
1703 if not hasattr(file, "read"):
1704 file = os.fspath(file)
1705 file = np.lib._datasource.open(file, 'rt', encoding=encoding)
1706 own_fh = True
1708 try:
1709 if not isinstance(dtype, np.dtype):
1710 dtype = np.dtype(dtype)
1711 if dtype.names is None:
1712 raise TypeError('dtype must be a structured datatype.')
1714 content = file.read()
1715 if isinstance(content, bytes) and isinstance(regexp, str):
1716 regexp = asbytes(regexp)
1718 if not hasattr(regexp, 'match'):
1719 regexp = re.compile(regexp)
1720 seq = regexp.findall(content)
1721 if seq and not isinstance(seq[0], tuple):
1722 # Only one group is in the regexp.
1723 # Create the new array as a single data-type and then
1724 # re-interpret as a single-field structured array.
1725 newdtype = np.dtype(dtype[dtype.names[0]])
1726 output = np.array(seq, dtype=newdtype)
1727 output.dtype = dtype
1728 else:
1729 output = np.array(seq, dtype=dtype)
1731 return output
1732 finally:
1733 if own_fh:
1734 file.close()
1737#####--------------------------------------------------------------------------
1738#---- --- ASCII functions ---
1739#####--------------------------------------------------------------------------
1742@set_array_function_like_doc
1743@set_module('numpy')
1744def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
1745 skip_header=0, skip_footer=0, converters=None,
1746 missing_values=None, filling_values=None, usecols=None,
1747 names=None, excludelist=None,
1748 deletechars=''.join(sorted(NameValidator.defaultdeletechars)),
1749 replace_space='_', autostrip=False, case_sensitive=True,
1750 defaultfmt="f%i", unpack=None, usemask=False, loose=True,
1751 invalid_raise=True, max_rows=None, encoding=None,
1752 *, ndmin=0, like=None):
1753 """
1754 Load data from a text file, with missing values handled as specified.
1756 Each line past the first `skip_header` lines is split at the `delimiter`
1757 character, and characters following the `comments` character are discarded.
1759 Parameters
1760 ----------
1761 fname : file, str, pathlib.Path, list of str, generator
1762 File, filename, list, or generator to read. If the filename
1763 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note
1764 that generators must return bytes or strings. The strings
1765 in a list or produced by a generator are treated as lines.
1766 dtype : dtype, optional
1767 Data type of the resulting array.
1768 If None, the dtypes will be determined by the contents of each
1769 column, individually.
1770 comments : str, optional
1771 The character used to indicate the start of a comment.
1772 All the characters occurring on a line after a comment are discarded.
1773 delimiter : str, int, or sequence, optional
1774 The string used to separate values. By default, any consecutive
1775 whitespaces act as delimiter. An integer or sequence of integers
1776 can also be provided as width(s) of each field.
1777 skiprows : int, optional
1778 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead.
1779 skip_header : int, optional
1780 The number of lines to skip at the beginning of the file.
1781 skip_footer : int, optional
1782 The number of lines to skip at the end of the file.
1783 converters : variable, optional
1784 The set of functions that convert the data of a column to a value.
1785 The converters can also be used to provide a default value
1786 for missing data: ``converters = {3: lambda s: float(s or 0)}``.
1787 missing : variable, optional
1788 `missing` was removed in numpy 1.10. Please use `missing_values`
1789 instead.
1790 missing_values : variable, optional
1791 The set of strings corresponding to missing data.
1792 filling_values : variable, optional
1793 The set of values to be used as default when the data are missing.
1794 usecols : sequence, optional
1795 Which columns to read, with 0 being the first. For example,
1796 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
1797 names : {None, True, str, sequence}, optional
1798 If `names` is True, the field names are read from the first line after
1799 the first `skip_header` lines. This line can optionally be preceded
1800 by a comment delimiter. Any content before the comment delimiter is
1801 discarded. If `names` is a sequence or a single-string of
1802 comma-separated names, the names will be used to define the field
1803 names in a structured dtype. If `names` is None, the names of the
1804 dtype fields will be used, if any.
1805 excludelist : sequence, optional
1806 A list of names to exclude. This list is appended to the default list
1807 ['return','file','print']. Excluded names are appended with an
1808 underscore: for example, `file` would become `file_`.
1809 deletechars : str, optional
1810 A string combining invalid characters that must be deleted from the
1811 names.
1812 defaultfmt : str, optional
1813 A format used to define default field names, such as "f%i" or "f_%02i".
1814 autostrip : bool, optional
1815 Whether to automatically strip white spaces from the variables.
1816 replace_space : char, optional
1817 Character(s) used in replacement of white spaces in the variable
1818 names. By default, use a '_'.
1819 case_sensitive : {True, False, 'upper', 'lower'}, optional
1820 If True, field names are case sensitive.
1821 If False or 'upper', field names are converted to upper case.
1822 If 'lower', field names are converted to lower case.
1823 unpack : bool, optional
1824 If True, the returned array is transposed, so that arguments may be
1825 unpacked using ``x, y, z = genfromtxt(...)``. When used with a
1826 structured data-type, arrays are returned for each field.
1827 Default is False.
1828 usemask : bool, optional
1829 If True, return a masked array.
1830 If False, return a regular array.
1831 loose : bool, optional
1832 If True, do not raise errors for invalid values.
1833 invalid_raise : bool, optional
1834 If True, an exception is raised if an inconsistency is detected in the
1835 number of columns.
1836 If False, a warning is emitted and the offending lines are skipped.
1837 max_rows : int, optional
1838 The maximum number of rows to read. Must not be used with skip_footer
1839 at the same time. If given, the value must be at least 1. Default is
1840 to read the entire file.
1842 .. versionadded:: 1.10.0
1843 encoding : str, optional
1844 Encoding used to decode the inputfile. Does not apply when `fname`
1845 is a file object. The special value 'bytes' enables backward
1846 compatibility workarounds that ensure that you receive byte arrays
1847 when possible and passes latin1 encoded strings to converters.
1848 Override this value to receive unicode arrays and pass strings
1849 as input to converters. If set to None the system default is used.
1850 The default value is 'bytes'.
1852 .. versionadded:: 1.14.0
1853 .. versionchanged:: 2.0
1854 Before NumPy 2, the default was ``'bytes'`` for Python 2
1855 compatibility. The default is now ``None``.
1857 ndmin : int, optional
1858 Same parameter as `loadtxt`
1860 .. versionadded:: 1.23.0
1861 ${ARRAY_FUNCTION_LIKE}
1863 .. versionadded:: 1.20.0
1865 Returns
1866 -------
1867 out : ndarray
1868 Data read from the text file. If `usemask` is True, this is a
1869 masked array.
1871 See Also
1872 --------
1873 numpy.loadtxt : equivalent function when no data is missing.
1875 Notes
1876 -----
1877 * When spaces are used as delimiters, or when no delimiter has been given
1878 as input, there should not be any missing data between two fields.
1879 * When variables are named (either by a flexible dtype or with a `names`
1880 sequence), there must not be any header in the file (else a ValueError
1881 exception is raised).
1882 * Individual values are not stripped of spaces by default.
1883 When using a custom converter, make sure the function does remove spaces.
1884 * Custom converters may receive unexpected values due to dtype
1885 discovery.
1887 References
1888 ----------
1889 .. [1] NumPy User Guide, section `I/O with NumPy
1890 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
1892 Examples
1893 --------
1894 >>> from io import StringIO
1895 >>> import numpy as np
1897 Comma delimited file with mixed dtype
1899 >>> s = StringIO("1,1.3,abcde")
1900 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
1901 ... ('mystring','S5')], delimiter=",")
1902 >>> data
1903 array((1, 1.3, b'abcde'),
1904 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1906 Using dtype = None
1908 >>> _ = s.seek(0) # needed for StringIO example only
1909 >>> data = np.genfromtxt(s, dtype=None,
1910 ... names = ['myint','myfloat','mystring'], delimiter=",")
1911 >>> data
1912 array((1, 1.3, 'abcde'),
1913 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '<U5')])
1915 Specifying dtype and names
1917 >>> _ = s.seek(0)
1918 >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
1919 ... names=['myint','myfloat','mystring'], delimiter=",")
1920 >>> data
1921 array((1, 1.3, b'abcde'),
1922 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1924 An example with fixed-width columns
1926 >>> s = StringIO("11.3abcde")
1927 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
1928 ... delimiter=[1,3,5])
1929 >>> data
1930 array((1, 1.3, 'abcde'),
1931 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '<U5')])
1933 An example to show comments
1935 >>> f = StringIO('''
1936 ... text,# of chars
1937 ... hello world,11
1938 ... numpy,5''')
1939 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',')
1940 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')],
1941 dtype=[('f0', 'S12'), ('f1', 'S12')])
1943 """
1945 if like is not None:
1946 return _genfromtxt_with_like(
1947 like, fname, dtype=dtype, comments=comments, delimiter=delimiter,
1948 skip_header=skip_header, skip_footer=skip_footer,
1949 converters=converters, missing_values=missing_values,
1950 filling_values=filling_values, usecols=usecols, names=names,
1951 excludelist=excludelist, deletechars=deletechars,
1952 replace_space=replace_space, autostrip=autostrip,
1953 case_sensitive=case_sensitive, defaultfmt=defaultfmt,
1954 unpack=unpack, usemask=usemask, loose=loose,
1955 invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding,
1956 ndmin=ndmin,
1957 )
1959 _ensure_ndmin_ndarray_check_param(ndmin)
1961 if max_rows is not None:
1962 if skip_footer:
1963 raise ValueError(
1964 "The keywords 'skip_footer' and 'max_rows' can not be "
1965 "specified at the same time.")
1966 if max_rows < 1:
1967 raise ValueError("'max_rows' must be at least 1.")
1969 if usemask:
1970 from numpy.ma import MaskedArray, make_mask_descr
1971 # Check the input dictionary of converters
1972 user_converters = converters or {}
1973 if not isinstance(user_converters, dict):
1974 raise TypeError(
1975 "The input argument 'converter' should be a valid dictionary "
1976 "(got '%s' instead)" % type(user_converters))
1978 if encoding == 'bytes':
1979 encoding = None
1980 byte_converters = True
1981 else:
1982 byte_converters = False
1984 # Initialize the filehandle, the LineSplitter and the NameValidator
1985 if isinstance(fname, os.PathLike):
1986 fname = os.fspath(fname)
1987 if isinstance(fname, str):
1988 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding)
1989 fid_ctx = contextlib.closing(fid)
1990 else:
1991 fid = fname
1992 fid_ctx = contextlib.nullcontext(fid)
1993 try:
1994 fhd = iter(fid)
1995 except TypeError as e:
1996 raise TypeError(
1997 "fname must be a string, a filehandle, a sequence of strings,\n"
1998 f"or an iterator of strings. Got {type(fname)} instead."
1999 ) from e
2000 with fid_ctx:
2001 split_line = LineSplitter(delimiter=delimiter, comments=comments,
2002 autostrip=autostrip, encoding=encoding)
2003 validate_names = NameValidator(excludelist=excludelist,
2004 deletechars=deletechars,
2005 case_sensitive=case_sensitive,
2006 replace_space=replace_space)
2008 # Skip the first `skip_header` rows
2009 try:
2010 for i in range(skip_header):
2011 next(fhd)
2013 # Keep on until we find the first valid values
2014 first_values = None
2016 while not first_values:
2017 first_line = _decode_line(next(fhd), encoding)
2018 if (names is True) and (comments is not None):
2019 if comments in first_line:
2020 first_line = (
2021 ''.join(first_line.split(comments)[1:]))
2022 first_values = split_line(first_line)
2023 except StopIteration:
2024 # return an empty array if the datafile is empty
2025 first_line = ''
2026 first_values = []
2027 warnings.warn(
2028 'genfromtxt: Empty input file: "%s"' % fname, stacklevel=2
2029 )
2031 # Should we take the first values as names ?
2032 if names is True:
2033 fval = first_values[0].strip()
2034 if comments is not None:
2035 if fval in comments:
2036 del first_values[0]
2038 # Check the columns to use: make sure `usecols` is a list
2039 if usecols is not None:
2040 try:
2041 usecols = [_.strip() for _ in usecols.split(",")]
2042 except AttributeError:
2043 try:
2044 usecols = list(usecols)
2045 except TypeError:
2046 usecols = [usecols, ]
2047 nbcols = len(usecols or first_values)
2049 # Check the names and overwrite the dtype.names if needed
2050 if names is True:
2051 names = validate_names([str(_.strip()) for _ in first_values])
2052 first_line = ''
2053 elif _is_string_like(names):
2054 names = validate_names([_.strip() for _ in names.split(',')])
2055 elif names:
2056 names = validate_names(names)
2057 # Get the dtype
2058 if dtype is not None:
2059 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,
2060 excludelist=excludelist,
2061 deletechars=deletechars,
2062 case_sensitive=case_sensitive,
2063 replace_space=replace_space)
2064 # Make sure the names is a list (for 2.5)
2065 if names is not None:
2066 names = list(names)
2068 if usecols:
2069 for (i, current) in enumerate(usecols):
2070 # if usecols is a list of names, convert to a list of indices
2071 if _is_string_like(current):
2072 usecols[i] = names.index(current)
2073 elif current < 0:
2074 usecols[i] = current + len(first_values)
2075 # If the dtype is not None, make sure we update it
2076 if (dtype is not None) and (len(dtype) > nbcols):
2077 descr = dtype.descr
2078 dtype = np.dtype([descr[_] for _ in usecols])
2079 names = list(dtype.names)
2080 # If `names` is not None, update the names
2081 elif (names is not None) and (len(names) > nbcols):
2082 names = [names[_] for _ in usecols]
2083 elif (names is not None) and (dtype is not None):
2084 names = list(dtype.names)
2086 # Process the missing values ...............................
2087 # Rename missing_values for convenience
2088 user_missing_values = missing_values or ()
2089 if isinstance(user_missing_values, bytes):
2090 user_missing_values = user_missing_values.decode('latin1')
2092 # Define the list of missing_values (one column: one list)
2093 missing_values = [list(['']) for _ in range(nbcols)]
2095 # We have a dictionary: process it field by field
2096 if isinstance(user_missing_values, dict):
2097 # Loop on the items
2098 for (key, val) in user_missing_values.items():
2099 # Is the key a string ?
2100 if _is_string_like(key):
2101 try:
2102 # Transform it into an integer
2103 key = names.index(key)
2104 except ValueError:
2105 # We couldn't find it: the name must have been dropped
2106 continue
2107 # Redefine the key as needed if it's a column number
2108 if usecols:
2109 try:
2110 key = usecols.index(key)
2111 except ValueError:
2112 pass
2113 # Transform the value as a list of string
2114 if isinstance(val, (list, tuple)):
2115 val = [str(_) for _ in val]
2116 else:
2117 val = [str(val), ]
2118 # Add the value(s) to the current list of missing
2119 if key is None:
2120 # None acts as default
2121 for miss in missing_values:
2122 miss.extend(val)
2123 else:
2124 missing_values[key].extend(val)
2125 # We have a sequence : each item matches a column
2126 elif isinstance(user_missing_values, (list, tuple)):
2127 for (value, entry) in zip(user_missing_values, missing_values):
2128 value = str(value)
2129 if value not in entry:
2130 entry.append(value)
2131 # We have a string : apply it to all entries
2132 elif isinstance(user_missing_values, str):
2133 user_value = user_missing_values.split(",")
2134 for entry in missing_values:
2135 entry.extend(user_value)
2136 # We have something else: apply it to all entries
2137 else:
2138 for entry in missing_values:
2139 entry.extend([str(user_missing_values)])
2141 # Process the filling_values ...............................
2142 # Rename the input for convenience
2143 user_filling_values = filling_values
2144 if user_filling_values is None:
2145 user_filling_values = []
2146 # Define the default
2147 filling_values = [None] * nbcols
2148 # We have a dictionary : update each entry individually
2149 if isinstance(user_filling_values, dict):
2150 for (key, val) in user_filling_values.items():
2151 if _is_string_like(key):
2152 try:
2153 # Transform it into an integer
2154 key = names.index(key)
2155 except ValueError:
2156 # We couldn't find it: the name must have been dropped
2157 continue
2158 # Redefine the key if it's a column number
2159 # and usecols is defined
2160 if usecols:
2161 try:
2162 key = usecols.index(key)
2163 except ValueError:
2164 pass
2165 # Add the value to the list
2166 filling_values[key] = val
2167 # We have a sequence : update on a one-to-one basis
2168 elif isinstance(user_filling_values, (list, tuple)):
2169 n = len(user_filling_values)
2170 if (n <= nbcols):
2171 filling_values[:n] = user_filling_values
2172 else:
2173 filling_values = user_filling_values[:nbcols]
2174 # We have something else : use it for all entries
2175 else:
2176 filling_values = [user_filling_values] * nbcols
2178 # Initialize the converters ................................
2179 if dtype is None:
2180 # Note: we can't use a [...]*nbcols, as we would have 3 times
2181 # the same converter, instead of 3 different converters.
2182 converters = [
2183 StringConverter(None, missing_values=miss, default=fill)
2184 for (miss, fill) in zip(missing_values, filling_values)
2185 ]
2186 else:
2187 dtype_flat = flatten_dtype(dtype, flatten_base=True)
2188 # Initialize the converters
2189 if len(dtype_flat) > 1:
2190 # Flexible type : get a converter from each dtype
2191 zipit = zip(dtype_flat, missing_values, filling_values)
2192 converters = [StringConverter(dt,
2193 locked=True,
2194 missing_values=miss,
2195 default=fill)
2196 for (dt, miss, fill) in zipit]
2197 else:
2198 # Set to a default converter (but w/ different missing values)
2199 zipit = zip(missing_values, filling_values)
2200 converters = [StringConverter(dtype,
2201 locked=True,
2202 missing_values=miss,
2203 default=fill)
2204 for (miss, fill) in zipit]
2205 # Update the converters to use the user-defined ones
2206 uc_update = []
2207 for (j, conv) in user_converters.items():
2208 # If the converter is specified by column names,
2209 # use the index instead
2210 if _is_string_like(j):
2211 try:
2212 j = names.index(j)
2213 i = j
2214 except ValueError:
2215 continue
2216 elif usecols:
2217 try:
2218 i = usecols.index(j)
2219 except ValueError:
2220 # Unused converter specified
2221 continue
2222 else:
2223 i = j
2224 # Find the value to test - first_line is not filtered by usecols:
2225 if len(first_line):
2226 testing_value = first_values[j]
2227 else:
2228 testing_value = None
2229 if conv is bytes:
2230 user_conv = asbytes
2231 elif byte_converters:
2232 # Converters may use decode to workaround numpy's old
2233 # behavior, so encode the string again before passing
2234 # to the user converter.
2235 def tobytes_first(x, conv):
2236 if type(x) is bytes:
2237 return conv(x)
2238 return conv(x.encode("latin1"))
2239 user_conv = functools.partial(tobytes_first, conv=conv)
2240 else:
2241 user_conv = conv
2242 converters[i].update(user_conv, locked=True,
2243 testing_value=testing_value,
2244 default=filling_values[i],
2245 missing_values=missing_values[i],)
2246 uc_update.append((i, user_conv))
2247 # Make sure we have the corrected keys in user_converters...
2248 user_converters.update(uc_update)
2250 # Fixme: possible error as following variable never used.
2251 # miss_chars = [_.missing_values for _ in converters]
2253 # Initialize the output lists ...
2254 # ... rows
2255 rows = []
2256 append_to_rows = rows.append
2257 # ... masks
2258 if usemask:
2259 masks = []
2260 append_to_masks = masks.append
2261 # ... invalid
2262 invalid = []
2263 append_to_invalid = invalid.append
2265 # Parse each line
2266 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
2267 values = split_line(line)
2268 nbvalues = len(values)
2269 # Skip an empty line
2270 if nbvalues == 0:
2271 continue
2272 if usecols:
2273 # Select only the columns we need
2274 try:
2275 values = [values[_] for _ in usecols]
2276 except IndexError:
2277 append_to_invalid((i + skip_header + 1, nbvalues))
2278 continue
2279 elif nbvalues != nbcols:
2280 append_to_invalid((i + skip_header + 1, nbvalues))
2281 continue
2282 # Store the values
2283 append_to_rows(tuple(values))
2284 if usemask:
2285 append_to_masks(tuple([v.strip() in m
2286 for (v, m) in zip(values,
2287 missing_values)]))
2288 if len(rows) == max_rows:
2289 break
2291 # Upgrade the converters (if needed)
2292 if dtype is None:
2293 for (i, converter) in enumerate(converters):
2294 current_column = [itemgetter(i)(_m) for _m in rows]
2295 try:
2296 converter.iterupgrade(current_column)
2297 except ConverterLockError:
2298 errmsg = "Converter #%i is locked and cannot be upgraded: " % i
2299 current_column = map(itemgetter(i), rows)
2300 for (j, value) in enumerate(current_column):
2301 try:
2302 converter.upgrade(value)
2303 except (ConverterError, ValueError):
2304 errmsg += "(occurred line #%i for value '%s')"
2305 errmsg %= (j + 1 + skip_header, value)
2306 raise ConverterError(errmsg)
2308 # Check that we don't have invalid values
2309 nbinvalid = len(invalid)
2310 if nbinvalid > 0:
2311 nbrows = len(rows) + nbinvalid - skip_footer
2312 # Construct the error message
2313 template = " Line #%%i (got %%i columns instead of %i)" % nbcols
2314 if skip_footer > 0:
2315 nbinvalid_skipped = len([_ for _ in invalid
2316 if _[0] > nbrows + skip_header])
2317 invalid = invalid[:nbinvalid - nbinvalid_skipped]
2318 skip_footer -= nbinvalid_skipped
2319#
2320# nbrows -= skip_footer
2321# errmsg = [template % (i, nb)
2322# for (i, nb) in invalid if i < nbrows]
2323# else:
2324 errmsg = [template % (i, nb)
2325 for (i, nb) in invalid]
2326 if len(errmsg):
2327 errmsg.insert(0, "Some errors were detected !")
2328 errmsg = "\n".join(errmsg)
2329 # Raise an exception ?
2330 if invalid_raise:
2331 raise ValueError(errmsg)
2332 # Issue a warning ?
2333 else:
2334 warnings.warn(errmsg, ConversionWarning, stacklevel=2)
2336 # Strip the last skip_footer data
2337 if skip_footer > 0:
2338 rows = rows[:-skip_footer]
2339 if usemask:
2340 masks = masks[:-skip_footer]
2342 # Convert each value according to the converter:
2343 # We want to modify the list in place to avoid creating a new one...
2344 if loose:
2345 rows = list(
2346 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]
2347 for (i, conv) in enumerate(converters)]))
2348 else:
2349 rows = list(
2350 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]
2351 for (i, conv) in enumerate(converters)]))
2353 # Reset the dtype
2354 data = rows
2355 if dtype is None:
2356 # Get the dtypes from the types of the converters
2357 column_types = [conv.type for conv in converters]
2358 # Find the columns with strings...
2359 strcolidx = [i for (i, v) in enumerate(column_types)
2360 if v == np.str_]
2362 if byte_converters and strcolidx:
2363 # convert strings back to bytes for backward compatibility
2364 warnings.warn(
2365 "Reading unicode strings without specifying the encoding "
2366 "argument is deprecated. Set the encoding, use None for the "
2367 "system default.",
2368 np.exceptions.VisibleDeprecationWarning, stacklevel=2)
2370 def encode_unicode_cols(row_tup):
2371 row = list(row_tup)
2372 for i in strcolidx:
2373 row[i] = row[i].encode('latin1')
2374 return tuple(row)
2376 try:
2377 data = [encode_unicode_cols(r) for r in data]
2378 except UnicodeEncodeError:
2379 pass
2380 else:
2381 for i in strcolidx:
2382 column_types[i] = np.bytes_
2384 # Update string types to be the right length
2385 sized_column_types = column_types[:]
2386 for i, col_type in enumerate(column_types):
2387 if np.issubdtype(col_type, np.character):
2388 n_chars = max(len(row[i]) for row in data)
2389 sized_column_types[i] = (col_type, n_chars)
2391 if names is None:
2392 # If the dtype is uniform (before sizing strings)
2393 base = {
2394 c_type
2395 for c, c_type in zip(converters, column_types)
2396 if c._checked}
2397 if len(base) == 1:
2398 uniform_type, = base
2399 (ddtype, mdtype) = (uniform_type, bool)
2400 else:
2401 ddtype = [(defaultfmt % i, dt)
2402 for (i, dt) in enumerate(sized_column_types)]
2403 if usemask:
2404 mdtype = [(defaultfmt % i, bool)
2405 for (i, dt) in enumerate(sized_column_types)]
2406 else:
2407 ddtype = list(zip(names, sized_column_types))
2408 mdtype = list(zip(names, [bool] * len(sized_column_types)))
2409 output = np.array(data, dtype=ddtype)
2410 if usemask:
2411 outputmask = np.array(masks, dtype=mdtype)
2412 else:
2413 # Overwrite the initial dtype names if needed
2414 if names and dtype.names is not None:
2415 dtype.names = names
2416 # Case 1. We have a structured type
2417 if len(dtype_flat) > 1:
2418 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
2419 # First, create the array using a flattened dtype:
2420 # [('a', int), ('b1', int), ('b2', float)]
2421 # Then, view the array using the specified dtype.
2422 if 'O' in (_.char for _ in dtype_flat):
2423 if has_nested_fields(dtype):
2424 raise NotImplementedError(
2425 "Nested fields involving objects are not supported...")
2426 else:
2427 output = np.array(data, dtype=dtype)
2428 else:
2429 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])
2430 output = rows.view(dtype)
2431 # Now, process the rowmasks the same way
2432 if usemask:
2433 rowmasks = np.array(
2434 masks, dtype=np.dtype([('', bool) for t in dtype_flat]))
2435 # Construct the new dtype
2436 mdtype = make_mask_descr(dtype)
2437 outputmask = rowmasks.view(mdtype)
2438 # Case #2. We have a basic dtype
2439 else:
2440 # We used some user-defined converters
2441 if user_converters:
2442 ishomogeneous = True
2443 descr = []
2444 for i, ttype in enumerate([conv.type for conv in converters]):
2445 # Keep the dtype of the current converter
2446 if i in user_converters:
2447 ishomogeneous &= (ttype == dtype.type)
2448 if np.issubdtype(ttype, np.character):
2449 ttype = (ttype, max(len(row[i]) for row in data))
2450 descr.append(('', ttype))
2451 else:
2452 descr.append(('', dtype))
2453 # So we changed the dtype ?
2454 if not ishomogeneous:
2455 # We have more than one field
2456 if len(descr) > 1:
2457 dtype = np.dtype(descr)
2458 # We have only one field: drop the name if not needed.
2459 else:
2460 dtype = np.dtype(ttype)
2461 #
2462 output = np.array(data, dtype)
2463 if usemask:
2464 if dtype.names is not None:
2465 mdtype = [(_, bool) for _ in dtype.names]
2466 else:
2467 mdtype = bool
2468 outputmask = np.array(masks, dtype=mdtype)
2469 # Try to take care of the missing data we missed
2470 names = output.dtype.names
2471 if usemask and names:
2472 for (name, conv) in zip(names, converters):
2473 missing_values = [conv(_) for _ in conv.missing_values
2474 if _ != '']
2475 for mval in missing_values:
2476 outputmask[name] |= (output[name] == mval)
2477 # Construct the final array
2478 if usemask:
2479 output = output.view(MaskedArray)
2480 output._mask = outputmask
2482 output = _ensure_ndmin_ndarray(output, ndmin=ndmin)
2484 if unpack:
2485 if names is None:
2486 return output.T
2487 elif len(names) == 1:
2488 # squeeze single-name dtypes too
2489 return output[names[0]]
2490 else:
2491 # For structured arrays with multiple fields,
2492 # return an array for each field.
2493 return [output[field] for field in names]
2494 return output
2497_genfromtxt_with_like = array_function_dispatch()(genfromtxt)
2500def recfromtxt(fname, **kwargs):
2501 """
2502 Load ASCII data from a file and return it in a record array.
2504 If ``usemask=False`` a standard `recarray` is returned,
2505 if ``usemask=True`` a MaskedRecords array is returned.
2507 .. deprecated:: 2.0
2508 Use `numpy.genfromtxt` instead.
2510 Parameters
2511 ----------
2512 fname, kwargs : For a description of input parameters, see `genfromtxt`.
2514 See Also
2515 --------
2516 numpy.genfromtxt : generic function
2518 Notes
2519 -----
2520 By default, `dtype` is None, which means that the data-type of the output
2521 array will be determined from the data.
2523 """
2525 # Deprecated in NumPy 2.0, 2023-07-11
2526 warnings.warn(
2527 "`recfromtxt` is deprecated, "
2528 "use `numpy.genfromtxt` instead."
2529 "(deprecated in NumPy 2.0)",
2530 DeprecationWarning,
2531 stacklevel=2
2532 )
2534 kwargs.setdefault("dtype", None)
2535 usemask = kwargs.get('usemask', False)
2536 output = genfromtxt(fname, **kwargs)
2537 if usemask:
2538 from numpy.ma.mrecords import MaskedRecords
2539 output = output.view(MaskedRecords)
2540 else:
2541 output = output.view(np.recarray)
2542 return output
2545def recfromcsv(fname, **kwargs):
2546 """
2547 Load ASCII data stored in a comma-separated file.
2549 The returned array is a record array (if ``usemask=False``, see
2550 `recarray`) or a masked record array (if ``usemask=True``,
2551 see `ma.mrecords.MaskedRecords`).
2553 .. deprecated:: 2.0
2554 Use `numpy.genfromtxt` with comma as `delimiter` instead.
2556 Parameters
2557 ----------
2558 fname, kwargs : For a description of input parameters, see `genfromtxt`.
2560 See Also
2561 --------
2562 numpy.genfromtxt : generic function to load ASCII data.
2564 Notes
2565 -----
2566 By default, `dtype` is None, which means that the data-type of the output
2567 array will be determined from the data.
2569 """
2571 # Deprecated in NumPy 2.0, 2023-07-11
2572 warnings.warn(
2573 "`recfromcsv` is deprecated, "
2574 "use `numpy.genfromtxt` with comma as `delimiter` instead. "
2575 "(deprecated in NumPy 2.0)",
2576 DeprecationWarning,
2577 stacklevel=2
2578 )
2580 # Set default kwargs for genfromtxt as relevant to csv import.
2581 kwargs.setdefault("case_sensitive", "lower")
2582 kwargs.setdefault("names", True)
2583 kwargs.setdefault("delimiter", ",")
2584 kwargs.setdefault("dtype", None)
2585 output = genfromtxt(fname, **kwargs)
2587 usemask = kwargs.get("usemask", False)
2588 if usemask:
2589 from numpy.ma.mrecords import MaskedRecords
2590 output = output.view(MaskedRecords)
2591 else:
2592 output = output.view(np.recarray)
2593 return output