Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_npyio

1"""

2IO related functions.

3"""

4import os

5import re

6import functools

7import itertools

8import warnings

9import weakref

10import contextlib

11import operator

12from operator import itemgetter, index as opindex, methodcaller

13from collections.abc import Mapping

14import pickle

16import numpy as np

17from . import format

18from ._datasource import DataSource

19from numpy._core import overrides

20from numpy._core.multiarray import packbits, unpackbits

21from numpy._core._multiarray_umath import _load_from_filelike

22from numpy._core.overrides import set_array_function_like_doc, set_module

23from ._iotools import (

24 LineSplitter, NameValidator, StringConverter, ConverterError,

25 ConverterLockError, ConversionWarning, _is_string_like,

26 has_nested_fields, flatten_dtype, easy_dtype, _decode_line

27 )

28from numpy._utils import asunicode, asbytes

31__all__ = [

32 'savetxt', 'loadtxt', 'genfromtxt', 'load', 'save', 'savez',

33 'savez_compressed', 'packbits', 'unpackbits', 'fromregex'

34 ]

37array_function_dispatch = functools.partial(

38 overrides.array_function_dispatch, module='numpy')

41class BagObj:

42 """

43 BagObj(obj)

45 Convert attribute look-ups to getitems on the object passed in.

47 Parameters

48 ----------

49 obj : class instance

50 Object on which attribute look-up is performed.

52 Examples

53 --------

54 >>> from numpy.lib._npyio_impl import BagObj as BO

55 >>> class BagDemo:

56 ... def __getitem__(self, key): # An instance of BagObj(BagDemo)

57 ... # will call this method when any

58 ... # attribute look-up is required

59 ... result = "Doesn't matter what you want, "

60 ... return result + "you're gonna get this"

61 ...

62 >>> demo_obj = BagDemo()

63 >>> bagobj = BO(demo_obj)

64 >>> bagobj.hello_there

65 "Doesn't matter what you want, you're gonna get this"

66 >>> bagobj.I_can_be_anything

67 "Doesn't matter what you want, you're gonna get this"

69 """

71 def __init__(self, obj):

72 # Use weakref to make NpzFile objects collectable by refcount

73 self._obj = weakref.proxy(obj)

75 def __getattribute__(self, key):

76 try:

77 return object.__getattribute__(self, '_obj')[key]

78 except KeyError:

79 raise AttributeError(key) from None

81 def __dir__(self):

82 """

83 Enables dir(bagobj) to list the files in an NpzFile.

85 This also enables tab-completion in an interpreter or IPython.

86 """

87 return list(object.__getattribute__(self, '_obj').keys())

90def zipfile_factory(file, *args, **kwargs):

91 """

92 Create a ZipFile.

94 Allows for Zip64, and the `file` argument can accept file, str, or

95 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile

96 constructor.

97 """

98 if not hasattr(file, 'read'):

99 file = os.fspath(file)

100 import zipfile

101 kwargs['allowZip64'] = True

102 return zipfile.ZipFile(file, *args, **kwargs)

103

104

105@set_module('numpy.lib.npyio')

106class NpzFile(Mapping):

107 """

108 NpzFile(fid)

109

110 A dictionary-like object with lazy-loading of files in the zipped

111 archive provided on construction.

112

113 `NpzFile` is used to load files in the NumPy ``.npz`` data archive

114 format. It assumes that files in the archive have a ``.npy`` extension,

115 other files are ignored.

116

117 The arrays and file strings are lazily loaded on either

118 getitem access using ``obj['key']`` or attribute lookup using

119 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can

120 be obtained with ``obj.files`` and the ZipFile object itself using

121 ``obj.zip``.

122

123 Attributes

124 ----------

125 files : list of str

126 List of all files in the archive with a ``.npy`` extension.

127 zip : ZipFile instance

128 The ZipFile object initialized with the zipped archive.

129 f : BagObj instance

130 An object on which attribute can be performed as an alternative

131 to getitem access on the `NpzFile` instance itself.

132 allow_pickle : bool, optional

133 Allow loading pickled data. Default: False

134

135 .. versionchanged:: 1.16.3

136 Made default False in response to CVE-2019-6446.

137

138 pickle_kwargs : dict, optional

139 Additional keyword arguments to pass on to pickle.load.

140 These are only useful when loading object arrays saved on

141 Python 2 when using Python 3.

142 max_header_size : int, optional

143 Maximum allowed size of the header. Large headers may not be safe

144 to load securely and thus require explicitly passing a larger value.

145 See :py:func:`ast.literal_eval()` for details.

146 This option is ignored when `allow_pickle` is passed. In that case

147 the file is by definition trusted and the limit is unnecessary.

148

149 Parameters

150 ----------

151 fid : file, str, or pathlib.Path

152 The zipped archive to open. This is either a file-like object

153 or a string containing the path to the archive.

154 own_fid : bool, optional

155 Whether NpzFile should close the file handle.

156 Requires that `fid` is a file-like object.

157

158 Examples

159 --------

160 >>> from tempfile import TemporaryFile

161 >>> outfile = TemporaryFile()

162 >>> x = np.arange(10)

163 >>> y = np.sin(x)

164 >>> np.savez(outfile, x=x, y=y)

165 >>> _ = outfile.seek(0)

166

167 >>> npz = np.load(outfile)

168 >>> isinstance(npz, np.lib.npyio.NpzFile)

169 True

170 >>> npz

171 NpzFile 'object' with keys: x, y

172 >>> sorted(npz.files)

173 ['x', 'y']

174 >>> npz['x'] # getitem access

175 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

176 >>> npz.f.x # attribute lookup

177 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

178

179 """

180 # Make __exit__ safe if zipfile_factory raises an exception

181 zip = None

182 fid = None

183 _MAX_REPR_ARRAY_COUNT = 5

184

185 def __init__(self, fid, own_fid=False, allow_pickle=False,

186 pickle_kwargs=None, *,

187 max_header_size=format._MAX_HEADER_SIZE):

188 # Import is postponed to here since zipfile depends on gzip, an

189 # optional component of the so-called standard library.

190 _zip = zipfile_factory(fid)

191 self._files = _zip.namelist()

192 self.files = []

193 self.allow_pickle = allow_pickle

194 self.max_header_size = max_header_size

195 self.pickle_kwargs = pickle_kwargs

196 for x in self._files:

197 if x.endswith('.npy'):

198 self.files.append(x[:-4])

199 else:

200 self.files.append(x)

201 self.zip = _zip

202 self.f = BagObj(self)

203 if own_fid:

204 self.fid = fid

205

206 def __enter__(self):

207 return self

208

209 def __exit__(self, exc_type, exc_value, traceback):

210 self.close()

211

212 def close(self):

213 """

214 Close the file.

215

216 """

217 if self.zip is not None:

218 self.zip.close()

219 self.zip = None

220 if self.fid is not None:

221 self.fid.close()

222 self.fid = None

223 self.f = None # break reference cycle

224

225 def __del__(self):

226 self.close()

227

228 # Implement the Mapping ABC

229 def __iter__(self):

230 return iter(self.files)

231

232 def __len__(self):

233 return len(self.files)

234

235 def __getitem__(self, key):

236 # FIXME: This seems like it will copy strings around

237 # more than is strictly necessary. The zipfile

238 # will read the string and then

239 # the format.read_array will copy the string

240 # to another place in memory.

241 # It would be better if the zipfile could read

242 # (or at least uncompress) the data

243 # directly into the array memory.

244 member = False

245 if key in self._files:

246 member = True

247 elif key in self.files:

248 member = True

249 key += '.npy'

250 if member:

251 bytes = self.zip.open(key)

252 magic = bytes.read(len(format.MAGIC_PREFIX))

253 bytes.close()

254 if magic == format.MAGIC_PREFIX:

255 bytes = self.zip.open(key)

256 return format.read_array(bytes,

257 allow_pickle=self.allow_pickle,

258 pickle_kwargs=self.pickle_kwargs,

259 max_header_size=self.max_header_size)

260 else:

261 return self.zip.read(key)

262 else:

263 raise KeyError(f"{key} is not a file in the archive")

264

265 def __contains__(self, key):

266 return (key in self._files or key in self.files)

267

268 def __repr__(self):

269 # Get filename or default to `object`

270 if isinstance(self.fid, str):

271 filename = self.fid

272 else:

273 filename = getattr(self.fid, "name", "object")

274

275 # Get the name of arrays

276 array_names = ', '.join(self.files[:self._MAX_REPR_ARRAY_COUNT])

277 if len(self.files) > self._MAX_REPR_ARRAY_COUNT:

278 array_names += "..."

279 return f"NpzFile {filename!r} with keys: {array_names}"

280

281 # Work around problems with the docstrings in the Mapping methods

282 # They contain a `->`, which confuses the type annotation interpretations

283 # of sphinx-docs. See gh-25964

284

285 def get(self, key, default=None, /):

286 """

287 D.get(k,[,d]) returns D[k] if k in D, else d. d defaults to None.

288 """

289 return Mapping.get(self, key, default)

290

291 def items(self):

292 """

293 D.items() returns a set-like object providing a view on the items

294 """

295 return Mapping.items(self)

296

297 def keys(self):

298 """

299 D.keys() returns a set-like object providing a view on the keys

300 """

301 return Mapping.keys(self)

302

303 def values(self):

304 """

305 D.values() returns a set-like object providing a view on the values

306 """

307 return Mapping.values(self)

308

309

310@set_module('numpy')

311def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True,

312 encoding='ASCII', *, max_header_size=format._MAX_HEADER_SIZE):

313 """

314 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.

315

316 .. warning:: Loading files that contain object arrays uses the ``pickle``

317 module, which is not secure against erroneous or maliciously

318 constructed data. Consider passing ``allow_pickle=False`` to

319 load data that is known not to contain object arrays for the

320 safer handling of untrusted sources.

321

322 Parameters

323 ----------

324 file : file-like object, string, or pathlib.Path

325 The file to read. File-like objects must support the

326 ``seek()`` and ``read()`` methods and must always

327 be opened in binary mode. Pickled files require that the

328 file-like object support the ``readline()`` method as well.

329 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional

330 If not None, then memory-map the file, using the given mode (see

331 `numpy.memmap` for a detailed description of the modes). A

332 memory-mapped array is kept on disk. However, it can be accessed

333 and sliced like any ndarray. Memory mapping is especially useful

334 for accessing small fragments of large files without reading the

335 entire file into memory.

336 allow_pickle : bool, optional

337 Allow loading pickled object arrays stored in npy files. Reasons for

338 disallowing pickles include security, as loading pickled data can

339 execute arbitrary code. If pickles are disallowed, loading object

340 arrays will fail. Default: False

341

342 .. versionchanged:: 1.16.3

343 Made default False in response to CVE-2019-6446.

344

345 fix_imports : bool, optional

346 Only useful when loading Python 2 generated pickled files on Python 3,

347 which includes npy/npz files containing object arrays. If `fix_imports`

348 is True, pickle will try to map the old Python 2 names to the new names

349 used in Python 3.

350 encoding : str, optional

351 What encoding to use when reading Python 2 strings. Only useful when

352 loading Python 2 generated pickled files in Python 3, which includes

353 npy/npz files containing object arrays. Values other than 'latin1',

354 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical

355 data. Default: 'ASCII'

356 max_header_size : int, optional

357 Maximum allowed size of the header. Large headers may not be safe

358 to load securely and thus require explicitly passing a larger value.

359 See :py:func:`ast.literal_eval()` for details.

360 This option is ignored when `allow_pickle` is passed. In that case

361 the file is by definition trusted and the limit is unnecessary.

362

363 Returns

364 -------

365 result : array, tuple, dict, etc.

366 Data stored in the file. For ``.npz`` files, the returned instance

367 of NpzFile class must be closed to avoid leaking file descriptors.

368

369 Raises

370 ------

371 OSError

372 If the input file does not exist or cannot be read.

373 UnpicklingError

374 If ``allow_pickle=True``, but the file cannot be loaded as a pickle.

375 ValueError

376 The file contains an object array, but ``allow_pickle=False`` given.

377 EOFError

378 When calling ``np.load`` multiple times on the same file handle,

379 if all data has already been read

380

381 See Also

382 --------

383 save, savez, savez_compressed, loadtxt

384 memmap : Create a memory-map to an array stored in a file on disk.

385 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.

386

387 Notes

388 -----

389 - If the file contains pickle data, then whatever object is stored

390 in the pickle is returned.

391 - If the file is a ``.npy`` file, then a single array is returned.

392 - If the file is a ``.npz`` file, then a dictionary-like object is

393 returned, containing ``{filename: array}`` key-value pairs, one for

394 each file in the archive.

395 - If the file is a ``.npz`` file, the returned value supports the

396 context manager protocol in a similar fashion to the open function::

397

398 with load('foo.npz') as data:

399 a = data['a']

400

401 The underlying file descriptor is closed when exiting the 'with'

402 block.

403

404 Examples

405 --------

406 Store data to disk, and load it again:

407

408 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))

409 >>> np.load('/tmp/123.npy')

410 array([[1, 2, 3],

411 [4, 5, 6]])

412

413 Store compressed data to disk, and load it again:

414

415 >>> a=np.array([[1, 2, 3], [4, 5, 6]])

416 >>> b=np.array([1, 2])

417 >>> np.savez('/tmp/123.npz', a=a, b=b)

418 >>> data = np.load('/tmp/123.npz')

419 >>> data['a']

420 array([[1, 2, 3],

421 [4, 5, 6]])

422 >>> data['b']

423 array([1, 2])

424 >>> data.close()

425

426 Mem-map the stored array, and then access the second row

427 directly from disk:

428

429 >>> X = np.load('/tmp/123.npy', mmap_mode='r')

430 >>> X[1, :]

431 memmap([4, 5, 6])

432

433 """

434 if encoding not in ('ASCII', 'latin1', 'bytes'):

435 # The 'encoding' value for pickle also affects what encoding

436 # the serialized binary data of NumPy arrays is loaded

437 # in. Pickle does not pass on the encoding information to

438 # NumPy. The unpickling code in numpy._core.multiarray is

439 # written to assume that unicode data appearing where binary

440 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.

441 #

442 # Other encoding values can corrupt binary data, and we

443 # purposefully disallow them. For the same reason, the errors=

444 # argument is not exposed, as values other than 'strict'

445 # result can similarly silently corrupt numerical data.

446 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")

447

448 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)

449

450 with contextlib.ExitStack() as stack:

451 if hasattr(file, 'read'):

452 fid = file

453 own_fid = False

454 else:

455 fid = stack.enter_context(open(os.fspath(file), "rb"))

456 own_fid = True

457

458 # Code to distinguish from NumPy binary files and pickles.

459 _ZIP_PREFIX = b'PK\x03\x04'

460 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this

461 N = len(format.MAGIC_PREFIX)

462 magic = fid.read(N)

463 if not magic:

464 raise EOFError("No data left in file")

465 # If the file size is less than N, we need to make sure not

466 # to seek past the beginning of the file

467 fid.seek(-min(N, len(magic)), 1) # back-up

468 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):

469 # zip-file (assume .npz)

470 # Potentially transfer file ownership to NpzFile

471 stack.pop_all()

472 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,

473 pickle_kwargs=pickle_kwargs,

474 max_header_size=max_header_size)

475 return ret

476 elif magic == format.MAGIC_PREFIX:

477 # .npy file

478 if mmap_mode:

479 if allow_pickle:

480 max_header_size = 2**64

481 return format.open_memmap(file, mode=mmap_mode,

482 max_header_size=max_header_size)

483 else:

484 return format.read_array(fid, allow_pickle=allow_pickle,

485 pickle_kwargs=pickle_kwargs,

486 max_header_size=max_header_size)

487 else:

488 # Try a pickle

489 if not allow_pickle:

490 raise ValueError("Cannot load file containing pickled data "

491 "when allow_pickle=False")

492 try:

493 return pickle.load(fid, **pickle_kwargs)

494 except Exception as e:

495 raise pickle.UnpicklingError(

496 f"Failed to interpret file {file!r} as a pickle") from e

497

498

499def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):

500 return (arr,)

501

502

503@array_function_dispatch(_save_dispatcher)

504def save(file, arr, allow_pickle=True, fix_imports=True):

505 """

506 Save an array to a binary file in NumPy ``.npy`` format.

507

508 Parameters

509 ----------

510 file : file, str, or pathlib.Path

511 File or filename to which the data is saved. If file is a file-object,

512 then the filename is unchanged. If file is a string or Path,

513 a ``.npy`` extension will be appended to the filename if it does not

514 already have one.

515 arr : array_like

516 Array data to be saved.

517 allow_pickle : bool, optional

518 Allow saving object arrays using Python pickles. Reasons for

519 disallowing pickles include security (loading pickled data can execute

520 arbitrary code) and portability (pickled objects may not be loadable

521 on different Python installations, for example if the stored objects

522 require libraries that are not available, and not all pickled data is

523 compatible between Python 2 and Python 3).

524 Default: True

525 fix_imports : bool, optional

526 Only useful in forcing objects in object arrays on Python 3 to be

527 pickled in a Python 2 compatible way. If `fix_imports` is True, pickle

528 will try to map the new Python 3 names to the old module names used in

529 Python 2, so that the pickle data stream is readable with Python 2.

530

531 See Also

532 --------

533 savez : Save several arrays into a ``.npz`` archive

534 savetxt, load

535

536 Notes

537 -----

538 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.

539

540 Any data saved to the file is appended to the end of the file.

541

542 Examples

543 --------

544 >>> from tempfile import TemporaryFile

545 >>> outfile = TemporaryFile()

546

547 >>> x = np.arange(10)

548 >>> np.save(outfile, x)

549

550 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file

551 >>> np.load(outfile)

552 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

553

554

555 >>> with open('test.npy', 'wb') as f:

556 ... np.save(f, np.array([1, 2]))

557 ... np.save(f, np.array([1, 3]))

558 >>> with open('test.npy', 'rb') as f:

559 ... a = np.load(f)

560 ... b = np.load(f)

561 >>> print(a, b)

562 # [1 2] [1 3]

563 """

564 if hasattr(file, 'write'):

565 file_ctx = contextlib.nullcontext(file)

566 else:

567 file = os.fspath(file)

568 if not file.endswith('.npy'):

569 file = file + '.npy'

570 file_ctx = open(file, "wb")

571

572 with file_ctx as fid:

573 arr = np.asanyarray(arr)

574 format.write_array(fid, arr, allow_pickle=allow_pickle,

575 pickle_kwargs=dict(fix_imports=fix_imports))

576

577

578def _savez_dispatcher(file, *args, **kwds):

579 yield from args

580 yield from kwds.values()

581

582

583@array_function_dispatch(_savez_dispatcher)

584def savez(file, *args, **kwds):

585 """Save several arrays into a single file in uncompressed ``.npz`` format.

586

587 Provide arrays as keyword arguments to store them under the

588 corresponding name in the output file: ``savez(fn, x=x, y=y)``.

589

590 If arrays are specified as positional arguments, i.e., ``savez(fn,

591 x, y)``, their names will be `arr_0`, `arr_1`, etc.

592

593 Parameters

594 ----------

595 file : file, str, or pathlib.Path

596 Either the filename (string) or an open file (file-like object)

597 where the data will be saved. If file is a string or a Path, the

598 ``.npz`` extension will be appended to the filename if it is not

599 already there.

600 args : Arguments, optional

601 Arrays to save to the file. Please use keyword arguments (see

602 `kwds` below) to assign names to arrays. Arrays specified as

603 args will be named "arr_0", "arr_1", and so on.

604 kwds : Keyword arguments, optional

605 Arrays to save to the file. Each array will be saved to the

606 output file with its corresponding keyword name.

607

608 Returns

609 -------

610 None

611

612 See Also

613 --------

614 save : Save a single array to a binary file in NumPy format.

615 savetxt : Save an array to a file as plain text.

616 savez_compressed : Save several arrays into a compressed ``.npz`` archive

617

618 Notes

619 -----

620 The ``.npz`` file format is a zipped archive of files named after the

621 variables they contain. The archive is not compressed and each file

622 in the archive contains one variable in ``.npy`` format. For a

623 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.

624

625 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile`

626 object is returned. This is a dictionary-like object which can be queried

627 for its list of arrays (with the ``.files`` attribute), and for the arrays

628 themselves.

629

630 Keys passed in `kwds` are used as filenames inside the ZIP archive.

631 Therefore, keys should be valid filenames; e.g., avoid keys that begin with

632 ``/`` or contain ``.``.

633

634 When naming variables with keyword arguments, it is not possible to name a

635 variable ``file``, as this would cause the ``file`` argument to be defined

636 twice in the call to ``savez``.

637

638 Examples

639 --------

640 >>> from tempfile import TemporaryFile

641 >>> outfile = TemporaryFile()

642 >>> x = np.arange(10)

643 >>> y = np.sin(x)

644

645 Using `savez` with \\*args, the arrays are saved with default names.

646

647 >>> np.savez(outfile, x, y)

648 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file

649 >>> npzfile = np.load(outfile)

650 >>> npzfile.files

651 ['arr_0', 'arr_1']

652 >>> npzfile['arr_0']

653 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

654

655 Using `savez` with \\**kwds, the arrays are saved with the keyword names.

656

657 >>> outfile = TemporaryFile()

658 >>> np.savez(outfile, x=x, y=y)

659 >>> _ = outfile.seek(0)

660 >>> npzfile = np.load(outfile)

661 >>> sorted(npzfile.files)

662 ['x', 'y']

663 >>> npzfile['x']

664 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

665

666 """

667 _savez(file, args, kwds, False)

668

669

670def _savez_compressed_dispatcher(file, *args, **kwds):

671 yield from args

672 yield from kwds.values()

673

674

675@array_function_dispatch(_savez_compressed_dispatcher)

676def savez_compressed(file, *args, **kwds):

677 """

678 Save several arrays into a single file in compressed ``.npz`` format.

679

680 Provide arrays as keyword arguments to store them under the

681 corresponding name in the output file: ``savez_compressed(fn, x=x, y=y)``.

682

683 If arrays are specified as positional arguments, i.e.,

684 ``savez_compressed(fn, x, y)``, their names will be `arr_0`, `arr_1`, etc.

685

686 Parameters

687 ----------

688 file : file, str, or pathlib.Path

689 Either the filename (string) or an open file (file-like object)

690 where the data will be saved. If file is a string or a Path, the

691 ``.npz`` extension will be appended to the filename if it is not

692 already there.

693 args : Arguments, optional

694 Arrays to save to the file. Please use keyword arguments (see

695 `kwds` below) to assign names to arrays. Arrays specified as

696 args will be named "arr_0", "arr_1", and so on.

697 kwds : Keyword arguments, optional

698 Arrays to save to the file. Each array will be saved to the

699 output file with its corresponding keyword name.

700

701 Returns

702 -------

703 None

704

705 See Also

706 --------

707 numpy.save : Save a single array to a binary file in NumPy format.

708 numpy.savetxt : Save an array to a file as plain text.

709 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format

710 numpy.load : Load the files created by savez_compressed.

711

712 Notes

713 -----

714 The ``.npz`` file format is a zipped archive of files named after the

715 variables they contain. The archive is compressed with

716 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable

717 in ``.npy`` format. For a description of the ``.npy`` format, see

718 :py:mod:`numpy.lib.format`.

719

720

721 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile`

722 object is returned. This is a dictionary-like object which can be queried

723 for its list of arrays (with the ``.files`` attribute), and for the arrays

724 themselves.

725

726 Examples

727 --------

728 >>> test_array = np.random.rand(3, 2)

729 >>> test_vector = np.random.rand(4)

730 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector)

731 >>> loaded = np.load('/tmp/123.npz')

732 >>> print(np.array_equal(test_array, loaded['a']))

733 True

734 >>> print(np.array_equal(test_vector, loaded['b']))

735 True

736

737 """

738 _savez(file, args, kwds, True)

739

740

741def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):

742 # Import is postponed to here since zipfile depends on gzip, an optional

743 # component of the so-called standard library.

744 import zipfile

745

746 if not hasattr(file, 'write'):

747 file = os.fspath(file)

748 if not file.endswith('.npz'):

749 file = file + '.npz'

750

751 namedict = kwds

752 for i, val in enumerate(args):

753 key = 'arr_%d' % i

754 if key in namedict.keys():

755 raise ValueError(

756 "Cannot use un-named variables and keyword %s" % key)

757 namedict[key] = val

758

759 if compress:

760 compression = zipfile.ZIP_DEFLATED

761 else:

762 compression = zipfile.ZIP_STORED

763

764 zipf = zipfile_factory(file, mode="w", compression=compression)

765

766 for key, val in namedict.items():

767 fname = key + '.npy'

768 val = np.asanyarray(val)

769 # always force zip64, gh-10776

770 with zipf.open(fname, 'w', force_zip64=True) as fid:

771 format.write_array(fid, val,

772 allow_pickle=allow_pickle,

773 pickle_kwargs=pickle_kwargs)

774

775 zipf.close()

776

777

778def _ensure_ndmin_ndarray_check_param(ndmin):

779 """Just checks if the param ndmin is supported on

780 _ensure_ndmin_ndarray. It is intended to be used as

781 verification before running anything expensive.

782 e.g. loadtxt, genfromtxt

783 """

784 # Check correctness of the values of `ndmin`

785 if ndmin not in [0, 1, 2]:

786 raise ValueError(f"Illegal value of ndmin keyword: {ndmin}")

787

788def _ensure_ndmin_ndarray(a, *, ndmin: int):

789 """This is a helper function of loadtxt and genfromtxt to ensure

790 proper minimum dimension as requested

791

792 ndim : int. Supported values 1, 2, 3

793 ^^ whenever this changes, keep in sync with

794 _ensure_ndmin_ndarray_check_param

795 """

796 # Verify that the array has at least dimensions `ndmin`.

797 # Tweak the size and shape of the arrays - remove extraneous dimensions

798 if a.ndim > ndmin:

799 a = np.squeeze(a)

800 # and ensure we have the minimum number of dimensions asked for

801 # - has to be in this order for the odd case ndmin=1, a.squeeze().ndim=0

802 if a.ndim < ndmin:

803 if ndmin == 1:

804 a = np.atleast_1d(a)

805 elif ndmin == 2:

806 a = np.atleast_2d(a).T

807

808 return a

809

810

811# amount of lines loadtxt reads in one chunk, can be overridden for testing

812_loadtxt_chunksize = 50000

813

814

815def _check_nonneg_int(value, name="argument"):

816 try:

817 operator.index(value)

818 except TypeError:

819 raise TypeError(f"{name} must be an integer") from None

820 if value < 0:

821 raise ValueError(f"{name} must be nonnegative")

822

823

824def _preprocess_comments(iterable, comments, encoding):

825 """

826 Generator that consumes a line iterated iterable and strips out the

827 multiple (or multi-character) comments from lines.

828 This is a pre-processing step to achieve feature parity with loadtxt

829 (we assume that this feature is a nieche feature).

830 """

831 for line in iterable:

832 if isinstance(line, bytes):

833 # Need to handle conversion here, or the splitting would fail

834 line = line.decode(encoding)

835

836 for c in comments:

837 line = line.split(c, 1)[0]

838

839 yield line

840

841

842# The number of rows we read in one go if confronted with a parametric dtype

843_loadtxt_chunksize = 50000

844

845

846def _read(fname, *, delimiter=',', comment='#', quote='"',

847 imaginary_unit='j', usecols=None, skiplines=0,

848 max_rows=None, converters=None, ndmin=None, unpack=False,

849 dtype=np.float64, encoding=None):

850 r"""

851 Read a NumPy array from a text file.

852 This is a helper function for loadtxt.

853

854 Parameters

855 ----------

856 fname : file, str, or pathlib.Path

857 The filename or the file to be read.

858 delimiter : str, optional

859 Field delimiter of the fields in line of the file.

860 Default is a comma, ','. If None any sequence of whitespace is

861 considered a delimiter.

862 comment : str or sequence of str or None, optional

863 Character that begins a comment. All text from the comment

864 character to the end of the line is ignored.

865 Multiple comments or multiple-character comment strings are supported,

866 but may be slower and `quote` must be empty if used.

867 Use None to disable all use of comments.

868 quote : str or None, optional

869 Character that is used to quote string fields. Default is '"'

870 (a double quote). Use None to disable quote support.

871 imaginary_unit : str, optional

872 Character that represent the imaginary unit `sqrt(-1)`.

873 Default is 'j'.

874 usecols : array_like, optional

875 A one-dimensional array of integer column numbers. These are the

876 columns from the file to be included in the array. If this value

877 is not given, all the columns are used.

878 skiplines : int, optional

879 Number of lines to skip before interpreting the data in the file.

880 max_rows : int, optional

881 Maximum number of rows of data to read. Default is to read the

882 entire file.

883 converters : dict or callable, optional

884 A function to parse all columns strings into the desired value, or

885 a dictionary mapping column number to a parser function.

886 E.g. if column 0 is a date string: ``converters = {0: datestr2num}``.

887 Converters can also be used to provide a default value for missing

888 data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will

889 convert empty fields to 0.

890 Default: None

891 ndmin : int, optional

892 Minimum dimension of the array returned.

893 Allowed values are 0, 1 or 2. Default is 0.

894 unpack : bool, optional

895 If True, the returned array is transposed, so that arguments may be

896 unpacked using ``x, y, z = read(...)``. When used with a structured

897 data-type, arrays are returned for each field. Default is False.

898 dtype : numpy data type

899 A NumPy dtype instance, can be a structured dtype to map to the

900 columns of the file.

901 encoding : str, optional

902 Encoding used to decode the inputfile. The special value 'bytes'

903 (the default) enables backwards-compatible behavior for `converters`,

904 ensuring that inputs to the converter functions are encoded

905 bytes objects. The special value 'bytes' has no additional effect if

906 ``converters=None``. If encoding is ``'bytes'`` or ``None``, the

907 default system encoding is used.

908

909 Returns

910 -------

911 ndarray

912 NumPy array.

913 """

914 # Handle special 'bytes' keyword for encoding

915 byte_converters = False

916 if encoding == 'bytes':

917 encoding = None

918 byte_converters = True

919

920 if dtype is None:

921 raise TypeError("a dtype must be provided.")

922 dtype = np.dtype(dtype)

923

924 read_dtype_via_object_chunks = None

925 if dtype.kind in 'SUM' and (

926 dtype == "S0" or dtype == "U0" or dtype == "M8" or dtype == 'm8'):

927 # This is a legacy "flexible" dtype. We do not truly support

928 # parametric dtypes currently (no dtype discovery step in the core),

929 # but have to support these for backward compatibility.

930 read_dtype_via_object_chunks = dtype

931 dtype = np.dtype(object)

932

933 if usecols is not None:

934 # Allow usecols to be a single int or a sequence of ints, the C-code

935 # handles the rest

936 try:

937 usecols = list(usecols)

938 except TypeError:

939 usecols = [usecols]

940

941 _ensure_ndmin_ndarray_check_param(ndmin)

942

943 if comment is None:

944 comments = None

945 else:

946 # assume comments are a sequence of strings

947 if "" in comment:

948 raise ValueError(

949 "comments cannot be an empty string. Use comments=None to "

950 "disable comments."

951 )

952 comments = tuple(comment)

953 comment = None

954 if len(comments) == 0:

955 comments = None # No comments at all

956 elif len(comments) == 1:

957 # If there is only one comment, and that comment has one character,

958 # the normal parsing can deal with it just fine.

959 if isinstance(comments[0], str) and len(comments[0]) == 1:

960 comment = comments[0]

961 comments = None

962 else:

963 # Input validation if there are multiple comment characters

964 if delimiter in comments:

965 raise TypeError(

966 f"Comment characters '{comments}' cannot include the "

967 f"delimiter '{delimiter}'"

968 )

969

970 # comment is now either a 1 or 0 character string or a tuple:

971 if comments is not None:

972 # Note: An earlier version support two character comments (and could

973 # have been extended to multiple characters, we assume this is

974 # rare enough to not optimize for.

975 if quote is not None:

976 raise ValueError(

977 "when multiple comments or a multi-character comment is "

978 "given, quotes are not supported. In this case quotechar "

979 "must be set to None.")

980

981 if len(imaginary_unit) != 1:

982 raise ValueError('len(imaginary_unit) must be 1.')

983

984 _check_nonneg_int(skiplines)

985 if max_rows is not None:

986 _check_nonneg_int(max_rows)

987 else:

988 # Passing -1 to the C code means "read the entire file".

989 max_rows = -1

990

991 fh_closing_ctx = contextlib.nullcontext()

992 filelike = False

993 try:

994 if isinstance(fname, os.PathLike):

995 fname = os.fspath(fname)

996 if isinstance(fname, str):

997 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)

998 if encoding is None:

999 encoding = getattr(fh, 'encoding', 'latin1')

1000

1001 fh_closing_ctx = contextlib.closing(fh)

1002 data = fh

1003 filelike = True

1004 else:

1005 if encoding is None:

1006 encoding = getattr(fname, 'encoding', 'latin1')

1007 data = iter(fname)

1008 except TypeError as e:

1009 raise ValueError(

1010 f"fname must be a string, filehandle, list of strings,\n"

1011 f"or generator. Got {type(fname)} instead.") from e

1012

1013 with fh_closing_ctx:

1014 if comments is not None:

1015 if filelike:

1016 data = iter(data)

1017 filelike = False

1018 data = _preprocess_comments(data, comments, encoding)

1019

1020 if read_dtype_via_object_chunks is None:

1021 arr = _load_from_filelike(

1022 data, delimiter=delimiter, comment=comment, quote=quote,

1023 imaginary_unit=imaginary_unit,

1024 usecols=usecols, skiplines=skiplines, max_rows=max_rows,

1025 converters=converters, dtype=dtype,

1026 encoding=encoding, filelike=filelike,

1027 byte_converters=byte_converters)

1028

1029 else:

1030 # This branch reads the file into chunks of object arrays and then

1031 # casts them to the desired actual dtype. This ensures correct

1032 # string-length and datetime-unit discovery (like `arr.astype()`).

1033 # Due to chunking, certain error reports are less clear, currently.

1034 if filelike:

1035 data = iter(data) # cannot chunk when reading from file

1036

1037 c_byte_converters = False

1038 if read_dtype_via_object_chunks == "S":

1039 c_byte_converters = True # Use latin1 rather than ascii

1040

1041 chunks = []

1042 while max_rows != 0:

1043 if max_rows < 0:

1044 chunk_size = _loadtxt_chunksize

1045 else:

1046 chunk_size = min(_loadtxt_chunksize, max_rows)

1047

1048 next_arr = _load_from_filelike(

1049 data, delimiter=delimiter, comment=comment, quote=quote,

1050 imaginary_unit=imaginary_unit,

1051 usecols=usecols, skiplines=skiplines, max_rows=max_rows,

1052 converters=converters, dtype=dtype,

1053 encoding=encoding, filelike=filelike,

1054 byte_converters=byte_converters,

1055 c_byte_converters=c_byte_converters)

1056 # Cast here already. We hope that this is better even for

1057 # large files because the storage is more compact. It could

1058 # be adapted (in principle the concatenate could cast).

1059 chunks.append(next_arr.astype(read_dtype_via_object_chunks))

1060

1061 skiprows = 0 # Only have to skip for first chunk

1062 if max_rows >= 0:

1063 max_rows -= chunk_size

1064 if len(next_arr) < chunk_size:

1065 # There was less data than requested, so we are done.

1066 break

1067

1068 # Need at least one chunk, but if empty, the last one may have

1069 # the wrong shape.

1070 if len(chunks) > 1 and len(chunks[-1]) == 0:

1071 del chunks[-1]

1072 if len(chunks) == 1:

1073 arr = chunks[0]

1074 else:

1075 arr = np.concatenate(chunks, axis=0)

1076

1077 # NOTE: ndmin works as advertised for structured dtypes, but normally

1078 # these would return a 1D result plus the structured dimension,

1079 # so ndmin=2 adds a third dimension even when no squeezing occurs.

1080 # A `squeeze=False` could be a better solution (pandas uses squeeze).

1081 arr = _ensure_ndmin_ndarray(arr, ndmin=ndmin)

1082

1083 if arr.shape:

1084 if arr.shape[0] == 0:

1085 warnings.warn(

1086 f'loadtxt: input contained no data: "{fname}"',

1087 category=UserWarning,

1088 stacklevel=3

1089 )

1090

1091 if unpack:

1092 # Unpack structured dtypes if requested:

1093 dt = arr.dtype

1094 if dt.names is not None:

1095 # For structured arrays, return an array for each field.

1096 return [arr[field] for field in dt.names]

1097 else:

1098 return arr.T

1099 else:

1100 return arr

1101

1102

1103@set_array_function_like_doc

1104@set_module('numpy')

1105def loadtxt(fname, dtype=float, comments='#', delimiter=None,

1106 converters=None, skiprows=0, usecols=None, unpack=False,

1107 ndmin=0, encoding=None, max_rows=None, *, quotechar=None,

1108 like=None):

1109 r"""

1110 Load data from a text file.

1111

1112 Parameters

1113 ----------

1114 fname : file, str, pathlib.Path, list of str, generator

1115 File, filename, list, or generator to read. If the filename

1116 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note

1117 that generators must return bytes or strings. The strings

1118 in a list or produced by a generator are treated as lines.

1119 dtype : data-type, optional

1120 Data-type of the resulting array; default: float. If this is a

1121 structured data-type, the resulting array will be 1-dimensional, and

1122 each row will be interpreted as an element of the array. In this

1123 case, the number of columns used must match the number of fields in

1124 the data-type.

1125 comments : str or sequence of str or None, optional

1126 The characters or list of characters used to indicate the start of a

1127 comment. None implies no comments. For backwards compatibility, byte

1128 strings will be decoded as 'latin1'. The default is '#'.

1129 delimiter : str, optional

1130 The character used to separate the values. For backwards compatibility,

1131 byte strings will be decoded as 'latin1'. The default is whitespace.

1132

1133 .. versionchanged:: 1.23.0

1134 Only single character delimiters are supported. Newline characters

1135 cannot be used as the delimiter.

1136

1137 converters : dict or callable, optional

1138 Converter functions to customize value parsing. If `converters` is

1139 callable, the function is applied to all columns, else it must be a

1140 dict that maps column number to a parser function.

1141 See examples for further details.

1142 Default: None.

1143

1144 .. versionchanged:: 1.23.0

1145 The ability to pass a single callable to be applied to all columns

1146 was added.

1147

1148 skiprows : int, optional

1149 Skip the first `skiprows` lines, including comments; default: 0.

1150 usecols : int or sequence, optional

1151 Which columns to read, with 0 being the first. For example,

1152 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.

1153 The default, None, results in all columns being read.

1154

1155 .. versionchanged:: 1.11.0

1156 When a single column has to be read it is possible to use

1157 an integer instead of a tuple. E.g ``usecols = 3`` reads the

1158 fourth column the same way as ``usecols = (3,)`` would.

1159 unpack : bool, optional

1160 If True, the returned array is transposed, so that arguments may be

1161 unpacked using ``x, y, z = loadtxt(...)``. When used with a

1162 structured data-type, arrays are returned for each field.

1163 Default is False.

1164 ndmin : int, optional

1165 The returned array will have at least `ndmin` dimensions.

1166 Otherwise mono-dimensional axes will be squeezed.

1167 Legal values: 0 (default), 1 or 2.

1168

1169 .. versionadded:: 1.6.0

1170 encoding : str, optional

1171 Encoding used to decode the inputfile. Does not apply to input streams.

1172 The special value 'bytes' enables backward compatibility workarounds

1173 that ensures you receive byte arrays as results if possible and passes

1174 'latin1' encoded strings to converters. Override this value to receive

1175 unicode arrays and pass strings as input to converters. If set to None

1176 the system default is used. The default value is 'bytes'.

1177

1178 .. versionadded:: 1.14.0

1179 .. versionchanged:: 2.0

1180 Before NumPy 2, the default was ``'bytes'`` for Python 2

1181 compatibility. The default is now ``None``.

1182

1183 max_rows : int, optional

1184 Read `max_rows` rows of content after `skiprows` lines. The default is

1185 to read all the rows. Note that empty rows containing no data such as

1186 empty lines and comment lines are not counted towards `max_rows`,

1187 while such lines are counted in `skiprows`.

1188

1189 .. versionadded:: 1.16.0

1190

1191 .. versionchanged:: 1.23.0

1192 Lines containing no data, including comment lines (e.g., lines

1193 starting with '#' or as specified via `comments`) are not counted

1194 towards `max_rows`.

1195 quotechar : unicode character or None, optional

1196 The character used to denote the start and end of a quoted item.

1197 Occurrences of the delimiter or comment characters are ignored within

1198 a quoted item. The default value is ``quotechar=None``, which means

1199 quoting support is disabled.

1200

1201 If two consecutive instances of `quotechar` are found within a quoted

1202 field, the first is treated as an escape character. See examples.

1203

1204 .. versionadded:: 1.23.0

1205 ${ARRAY_FUNCTION_LIKE}

1206

1207 .. versionadded:: 1.20.0

1208

1209 Returns

1210 -------

1211 out : ndarray

1212 Data read from the text file.

1213

1214 See Also

1215 --------

1216 load, fromstring, fromregex

1217 genfromtxt : Load data with missing values handled as specified.

1218 scipy.io.loadmat : reads MATLAB data files

1219

1220 Notes

1221 -----

1222 This function aims to be a fast reader for simply formatted files. The

1223 `genfromtxt` function provides more sophisticated handling of, e.g.,

1224 lines with missing values.

1225

1226 Each row in the input text file must have the same number of values to be

1227 able to read all values. If all rows do not have same number of values, a

1228 subset of up to n columns (where n is the least number of values present

1229 in all rows) can be read by specifying the columns via `usecols`.

1230

1231 .. versionadded:: 1.10.0

1232

1233 The strings produced by the Python float.hex method can be used as

1234 input for floats.

1235

1236 Examples

1237 --------

1238 >>> from io import StringIO # StringIO behaves like a file object

1239 >>> c = StringIO("0 1\n2 3")

1240 >>> np.loadtxt(c)

1241 array([[0., 1.],

1242 [2., 3.]])

1243

1244 >>> d = StringIO("M 21 72\nF 35 58")

1245 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),

1246 ... 'formats': ('S1', 'i4', 'f4')})

1247 array([(b'M', 21, 72.), (b'F', 35, 58.)],

1248 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')])

1249

1250 >>> c = StringIO("1,0,2\n3,0,4")

1251 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)

1252 >>> x

1253 array([1., 3.])

1254 >>> y

1255 array([2., 4.])

1256

1257 The `converters` argument is used to specify functions to preprocess the

1258 text prior to parsing. `converters` can be a dictionary that maps

1259 preprocessing functions to each column:

1260

1261 >>> s = StringIO("1.618, 2.296\n3.141, 4.669\n")

1262 >>> conv = {

1263 ... 0: lambda x: np.floor(float(x)), # conversion fn for column 0

1264 ... 1: lambda x: np.ceil(float(x)), # conversion fn for column 1

1265 ... }

1266 >>> np.loadtxt(s, delimiter=",", converters=conv)

1267 array([[1., 3.],

1268 [3., 5.]])

1269

1270 `converters` can be a callable instead of a dictionary, in which case it

1271 is applied to all columns:

1272

1273 >>> s = StringIO("0xDE 0xAD\n0xC0 0xDE")

1274 >>> import functools

1275 >>> conv = functools.partial(int, base=16)

1276 >>> np.loadtxt(s, converters=conv)

1277 array([[222., 173.],

1278 [192., 222.]])

1279

1280 This example shows how `converters` can be used to convert a field

1281 with a trailing minus sign into a negative number.

1282

1283 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94")

1284 >>> def conv(fld):

1285 ... return -float(fld[:-1]) if fld.endswith("-") else float(fld)

1286 ...

1287 >>> np.loadtxt(s, converters=conv)

1288 array([[ 10.01, -31.25],

1289 [ 19.22, 64.31],

1290 [-17.57, 63.94]])

1291

1292 Using a callable as the converter can be particularly useful for handling

1293 values with different formatting, e.g. floats with underscores:

1294

1295 >>> s = StringIO("1 2.7 100_000")

1296 >>> np.loadtxt(s, converters=float)

1297 array([1.e+00, 2.7e+00, 1.e+05])

1298

1299 This idea can be extended to automatically handle values specified in

1300 many different formats, such as hex values:

1301

1302 >>> def conv(val):

1303 ... try:

1304 ... return float(val)

1305 ... except ValueError:

1306 ... return float.fromhex(val)

1307 >>> s = StringIO("1, 2.5, 3_000, 0b4, 0x1.4000000000000p+2")

1308 >>> np.loadtxt(s, delimiter=",", converters=conv)

1309 array([1.0e+00, 2.5e+00, 3.0e+03, 1.8e+02, 5.0e+00])

1310

1311 Or a format where the ``-`` sign comes after the number:

1312

1313 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94")

1314 >>> conv = lambda x: -float(x[:-1]) if x.endswith("-") else float(x)

1315 >>> np.loadtxt(s, converters=conv)

1316 array([[ 10.01, -31.25],

1317 [ 19.22, 64.31],

1318 [-17.57, 63.94]])

1319

1320 Support for quoted fields is enabled with the `quotechar` parameter.

1321 Comment and delimiter characters are ignored when they appear within a

1322 quoted item delineated by `quotechar`:

1323

1324 >>> s = StringIO('"alpha, #42", 10.0\n"beta, #64", 2.0\n')

1325 >>> dtype = np.dtype([("label", "U12"), ("value", float)])

1326 >>> np.loadtxt(s, dtype=dtype, delimiter=",", quotechar='"')

1327 array([('alpha, #42', 10.), ('beta, #64', 2.)],

1328 dtype=[('label', '<U12'), ('value', '<f8')])

1329

1330 Quoted fields can be separated by multiple whitespace characters:

1331

1332 >>> s = StringIO('"alpha, #42" 10.0\n"beta, #64" 2.0\n')

1333 >>> dtype = np.dtype([("label", "U12"), ("value", float)])

1334 >>> np.loadtxt(s, dtype=dtype, delimiter=None, quotechar='"')

1335 array([('alpha, #42', 10.), ('beta, #64', 2.)],

1336 dtype=[('label', '<U12'), ('value', '<f8')])

1337

1338 Two consecutive quote characters within a quoted field are treated as a

1339 single escaped character:

1340

1341 >>> s = StringIO('"Hello, my name is ""Monty""!"')

1342 >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"')

1343 array('Hello, my name is "Monty"!', dtype='<U26')

1344

1345 Read subset of columns when all rows do not contain equal number of values:

1346

1347 >>> d = StringIO("1 2\n2 4\n3 9 12\n4 16 20")

1348 >>> np.loadtxt(d, usecols=(0, 1))

1349 array([[ 1., 2.],

1350 [ 2., 4.],

1351 [ 3., 9.],

1352 [ 4., 16.]])

1353

1354 """

1355

1356 if like is not None:

1357 return _loadtxt_with_like(

1358 like, fname, dtype=dtype, comments=comments, delimiter=delimiter,

1359 converters=converters, skiprows=skiprows, usecols=usecols,

1360 unpack=unpack, ndmin=ndmin, encoding=encoding,

1361 max_rows=max_rows

1362 )

1363

1364 if isinstance(delimiter, bytes):

1365 delimiter.decode("latin1")

1366

1367 if dtype is None:

1368 dtype = np.float64

1369

1370 comment = comments

1371 # Control character type conversions for Py3 convenience

1372 if comment is not None:

1373 if isinstance(comment, (str, bytes)):

1374 comment = [comment]

1375 comment = [

1376 x.decode('latin1') if isinstance(x, bytes) else x for x in comment]

1377 if isinstance(delimiter, bytes):

1378 delimiter = delimiter.decode('latin1')

1379

1380 arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,

1381 converters=converters, skiplines=skiprows, usecols=usecols,

1382 unpack=unpack, ndmin=ndmin, encoding=encoding,

1383 max_rows=max_rows, quote=quotechar)

1384

1385 return arr

1386

1387

1388_loadtxt_with_like = array_function_dispatch()(loadtxt)

1389

1390

1391def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,

1392 header=None, footer=None, comments=None,

1393 encoding=None):

1394 return (X,)

1395

1396

1397@array_function_dispatch(_savetxt_dispatcher)

1398def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',

1399 footer='', comments='# ', encoding=None):

1400 """

1401 Save an array to a text file.

1402

1403 Parameters

1404 ----------

1405 fname : filename, file handle or pathlib.Path

1406 If the filename ends in ``.gz``, the file is automatically saved in

1407 compressed gzip format. `loadtxt` understands gzipped files

1408 transparently.

1409 X : 1D or 2D array_like

1410 Data to be saved to a text file.

1411 fmt : str or sequence of strs, optional

1412 A single format (%10.5f), a sequence of formats, or a

1413 multi-format string, e.g. 'Iteration %d -- %10.5f', in which

1414 case `delimiter` is ignored. For complex `X`, the legal options

1415 for `fmt` are:

1416

1417 * a single specifier, ``fmt='%.4e'``, resulting in numbers formatted

1418 like ``' (%s+%sj)' % (fmt, fmt)``

1419 * a full string specifying every real and imaginary part, e.g.

1420 ``' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'`` for 3 columns

1421 * a list of specifiers, one per column - in this case, the real

1422 and imaginary part must have separate specifiers,

1423 e.g. ``['%.3e + %.3ej', '(%.15e%+.15ej)']`` for 2 columns

1424 delimiter : str, optional

1425 String or character separating columns.

1426 newline : str, optional

1427 String or character separating lines.

1428

1429 .. versionadded:: 1.5.0

1430 header : str, optional

1431 String that will be written at the beginning of the file.

1432

1433 .. versionadded:: 1.7.0

1434 footer : str, optional

1435 String that will be written at the end of the file.

1436

1437 .. versionadded:: 1.7.0

1438 comments : str, optional

1439 String that will be prepended to the ``header`` and ``footer`` strings,

1440 to mark them as comments. Default: '# ', as expected by e.g.

1441 ``numpy.loadtxt``.

1442

1443 .. versionadded:: 1.7.0

1444 encoding : {None, str}, optional

1445 Encoding used to encode the outputfile. Does not apply to output

1446 streams. If the encoding is something other than 'bytes' or 'latin1'

1447 you will not be able to load the file in NumPy versions < 1.14. Default

1448 is 'latin1'.

1449

1450 .. versionadded:: 1.14.0

1451

1452

1453 See Also

1454 --------

1455 save : Save an array to a binary file in NumPy ``.npy`` format

1456 savez : Save several arrays into an uncompressed ``.npz`` archive

1457 savez_compressed : Save several arrays into a compressed ``.npz`` archive

1458

1459 Notes

1460 -----

1461 Further explanation of the `fmt` parameter

1462 (``%[flag]width[.precision]specifier``):

1463

1464 flags:

1465 ``-`` : left justify

1466

1467 ``+`` : Forces to precede result with + or -.

1468

1469 ``0`` : Left pad the number with zeros instead of space (see width).

1470

1471 width:

1472 Minimum number of characters to be printed. The value is not truncated

1473 if it has more characters.

1474

1475 precision:

1476 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of

1477 digits.

1478 - For ``e, E`` and ``f`` specifiers, the number of digits to print

1479 after the decimal point.

1480 - For ``g`` and ``G``, the maximum number of significant digits.

1481 - For ``s``, the maximum number of characters.

1482

1483 specifiers:

1484 ``c`` : character

1485

1486 ``d`` or ``i`` : signed decimal integer

1487

1488 ``e`` or ``E`` : scientific notation with ``e`` or ``E``.

1489

1490 ``f`` : decimal floating point

1491

1492 ``g,G`` : use the shorter of ``e,E`` or ``f``

1493

1494 ``o`` : signed octal

1495

1496 ``s`` : string of characters

1497

1498 ``u`` : unsigned decimal integer

1499

1500 ``x,X`` : unsigned hexadecimal integer

1501

1502 This explanation of ``fmt`` is not complete, for an exhaustive

1503 specification see [1]_.

1504

1505 References

1506 ----------

1507 .. [1] `Format Specification Mini-Language

1508 <https://docs.python.org/library/string.html#format-specification-mini-language>`_,

1509 Python Documentation.

1510

1511 Examples

1512 --------

1513 >>> x = y = z = np.arange(0.0,5.0,1.0)

1514 >>> np.savetxt('test.out', x, delimiter=',') # X is an array

1515 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays

1516 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation

1517

1518 """

1519

1520 class WriteWrap:

1521 """Convert to bytes on bytestream inputs.

1522

1523 """

1524 def __init__(self, fh, encoding):

1525 self.fh = fh

1526 self.encoding = encoding

1527 self.do_write = self.first_write

1528

1529 def close(self):

1530 self.fh.close()

1531

1532 def write(self, v):

1533 self.do_write(v)

1534

1535 def write_bytes(self, v):

1536 if isinstance(v, bytes):

1537 self.fh.write(v)

1538 else:

1539 self.fh.write(v.encode(self.encoding))

1540

1541 def write_normal(self, v):

1542 self.fh.write(asunicode(v))

1543

1544 def first_write(self, v):

1545 try:

1546 self.write_normal(v)

1547 self.write = self.write_normal

1548 except TypeError:

1549 # input is probably a bytestream

1550 self.write_bytes(v)

1551 self.write = self.write_bytes

1552

1553 own_fh = False

1554 if isinstance(fname, os.PathLike):

1555 fname = os.fspath(fname)

1556 if _is_string_like(fname):

1557 # datasource doesn't support creating a new file ...

1558 open(fname, 'wt').close()

1559 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)

1560 own_fh = True

1561 elif hasattr(fname, 'write'):

1562 # wrap to handle byte output streams

1563 fh = WriteWrap(fname, encoding or 'latin1')

1564 else:

1565 raise ValueError('fname must be a string or file handle')

1566

1567 try:

1568 X = np.asarray(X)

1569

1570 # Handle 1-dimensional arrays

1571 if X.ndim == 0 or X.ndim > 2:

1572 raise ValueError(

1573 "Expected 1D or 2D array, got %dD array instead" % X.ndim)

1574 elif X.ndim == 1:

1575 # Common case -- 1d array of numbers

1576 if X.dtype.names is None:

1577 X = np.atleast_2d(X).T

1578 ncol = 1

1579

1580 # Complex dtype -- each field indicates a separate column

1581 else:

1582 ncol = len(X.dtype.names)

1583 else:

1584 ncol = X.shape[1]

1585

1586 iscomplex_X = np.iscomplexobj(X)

1587 # `fmt` can be a string with multiple insertion points or a

1588 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')

1589 if type(fmt) in (list, tuple):

1590 if len(fmt) != ncol:

1591 raise AttributeError('fmt has wrong shape. %s' % str(fmt))

1592 format = delimiter.join(fmt)

1593 elif isinstance(fmt, str):

1594 n_fmt_chars = fmt.count('%')

1595 error = ValueError('fmt has wrong number of %% formats: %s' % fmt)

1596 if n_fmt_chars == 1:

1597 if iscomplex_X:

1598 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol

1599 else:

1600 fmt = [fmt, ] * ncol

1601 format = delimiter.join(fmt)

1602 elif iscomplex_X and n_fmt_chars != (2 * ncol):

1603 raise error

1604 elif ((not iscomplex_X) and n_fmt_chars != ncol):

1605 raise error

1606 else:

1607 format = fmt

1608 else:

1609 raise ValueError('invalid fmt: %r' % (fmt,))

1610

1611 if len(header) > 0:

1612 header = header.replace('\n', '\n' + comments)

1613 fh.write(comments + header + newline)

1614 if iscomplex_X:

1615 for row in X:

1616 row2 = []

1617 for number in row:

1618 row2.append(number.real)

1619 row2.append(number.imag)

1620 s = format % tuple(row2) + newline

1621 fh.write(s.replace('+-', '-'))

1622 else:

1623 for row in X:

1624 try:

1625 v = format % tuple(row) + newline

1626 except TypeError as e:

1627 raise TypeError("Mismatch between array dtype ('%s') and "

1628 "format specifier ('%s')"

1629 % (str(X.dtype), format)) from e

1630 fh.write(v)

1631

1632 if len(footer) > 0:

1633 footer = footer.replace('\n', '\n' + comments)

1634 fh.write(comments + footer + newline)

1635 finally:

1636 if own_fh:

1637 fh.close()

1638

1639

1640@set_module('numpy')

1641def fromregex(file, regexp, dtype, encoding=None):

1642 r"""

1643 Construct an array from a text file, using regular expression parsing.

1644

1645 The returned array is always a structured array, and is constructed from

1646 all matches of the regular expression in the file. Groups in the regular

1647 expression are converted to fields of the structured array.

1648

1649 Parameters

1650 ----------

1651 file : file, str, or pathlib.Path

1652 Filename or file object to read.

1653

1654 .. versionchanged:: 1.22.0

1655 Now accepts `os.PathLike` implementations.

1656 regexp : str or regexp

1657 Regular expression used to parse the file.

1658 Groups in the regular expression correspond to fields in the dtype.

1659 dtype : dtype or list of dtypes

1660 Dtype for the structured array; must be a structured datatype.

1661 encoding : str, optional

1662 Encoding used to decode the inputfile. Does not apply to input streams.

1663

1664 .. versionadded:: 1.14.0

1665

1666 Returns

1667 -------

1668 output : ndarray

1669 The output array, containing the part of the content of `file` that

1670 was matched by `regexp`. `output` is always a structured array.

1671

1672 Raises

1673 ------

1674 TypeError

1675 When `dtype` is not a valid dtype for a structured array.

1676

1677 See Also

1678 --------

1679 fromstring, loadtxt

1680

1681 Notes

1682 -----

1683 Dtypes for structured arrays can be specified in several forms, but all

1684 forms specify at least the data type and field name. For details see

1685 `basics.rec`.

1686

1687 Examples

1688 --------

1689 >>> from io import StringIO

1690 >>> text = StringIO("1312 foo\n1534 bar\n444 qux")

1691

1692 >>> regexp = r"(\d+)\s+(...)" # match [digits, whitespace, anything]

1693 >>> output = np.fromregex(text, regexp,

1694 ... [('num', np.int64), ('key', 'S3')])

1695 >>> output

1696 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')],

1697 dtype=[('num', '<i8'), ('key', 'S3')])

1698 >>> output['num']

1699 array([1312, 1534, 444])

1700

1701 """

1702 own_fh = False

1703 if not hasattr(file, "read"):

1704 file = os.fspath(file)

1705 file = np.lib._datasource.open(file, 'rt', encoding=encoding)

1706 own_fh = True

1707

1708 try:

1709 if not isinstance(dtype, np.dtype):

1710 dtype = np.dtype(dtype)

1711 if dtype.names is None:

1712 raise TypeError('dtype must be a structured datatype.')

1713

1714 content = file.read()

1715 if isinstance(content, bytes) and isinstance(regexp, str):

1716 regexp = asbytes(regexp)

1717

1718 if not hasattr(regexp, 'match'):

1719 regexp = re.compile(regexp)

1720 seq = regexp.findall(content)

1721 if seq and not isinstance(seq[0], tuple):

1722 # Only one group is in the regexp.

1723 # Create the new array as a single data-type and then

1724 # re-interpret as a single-field structured array.

1725 newdtype = np.dtype(dtype[dtype.names[0]])

1726 output = np.array(seq, dtype=newdtype)

1727 output.dtype = dtype

1728 else:

1729 output = np.array(seq, dtype=dtype)

1730

1731 return output

1732 finally:

1733 if own_fh:

1734 file.close()

1735

1736

1737#####--------------------------------------------------------------------------

1738#---- --- ASCII functions ---

1739#####--------------------------------------------------------------------------

1740

1741

1742@set_array_function_like_doc

1743@set_module('numpy')

1744def genfromtxt(fname, dtype=float, comments='#', delimiter=None,

1745 skip_header=0, skip_footer=0, converters=None,

1746 missing_values=None, filling_values=None, usecols=None,

1747 names=None, excludelist=None,

1748 deletechars=''.join(sorted(NameValidator.defaultdeletechars)),

1749 replace_space='_', autostrip=False, case_sensitive=True,

1750 defaultfmt="f%i", unpack=None, usemask=False, loose=True,

1751 invalid_raise=True, max_rows=None, encoding=None,

1752 *, ndmin=0, like=None):

1753 """

1754 Load data from a text file, with missing values handled as specified.

1755

1756 Each line past the first `skip_header` lines is split at the `delimiter`

1757 character, and characters following the `comments` character are discarded.

1758

1759 Parameters

1760 ----------

1761 fname : file, str, pathlib.Path, list of str, generator

1762 File, filename, list, or generator to read. If the filename

1763 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note

1764 that generators must return bytes or strings. The strings

1765 in a list or produced by a generator are treated as lines.

1766 dtype : dtype, optional

1767 Data type of the resulting array.

1768 If None, the dtypes will be determined by the contents of each

1769 column, individually.

1770 comments : str, optional

1771 The character used to indicate the start of a comment.

1772 All the characters occurring on a line after a comment are discarded.

1773 delimiter : str, int, or sequence, optional

1774 The string used to separate values. By default, any consecutive

1775 whitespaces act as delimiter. An integer or sequence of integers

1776 can also be provided as width(s) of each field.

1777 skiprows : int, optional

1778 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead.

1779 skip_header : int, optional

1780 The number of lines to skip at the beginning of the file.

1781 skip_footer : int, optional

1782 The number of lines to skip at the end of the file.

1783 converters : variable, optional

1784 The set of functions that convert the data of a column to a value.

1785 The converters can also be used to provide a default value

1786 for missing data: ``converters = {3: lambda s: float(s or 0)}``.

1787 missing : variable, optional

1788 `missing` was removed in numpy 1.10. Please use `missing_values`

1789 instead.

1790 missing_values : variable, optional

1791 The set of strings corresponding to missing data.

1792 filling_values : variable, optional

1793 The set of values to be used as default when the data are missing.

1794 usecols : sequence, optional

1795 Which columns to read, with 0 being the first. For example,

1796 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.

1797 names : {None, True, str, sequence}, optional

1798 If `names` is True, the field names are read from the first line after

1799 the first `skip_header` lines. This line can optionally be preceded

1800 by a comment delimiter. Any content before the comment delimiter is

1801 discarded. If `names` is a sequence or a single-string of

1802 comma-separated names, the names will be used to define the field

1803 names in a structured dtype. If `names` is None, the names of the

1804 dtype fields will be used, if any.

1805 excludelist : sequence, optional

1806 A list of names to exclude. This list is appended to the default list

1807 ['return','file','print']. Excluded names are appended with an

1808 underscore: for example, `file` would become `file_`.

1809 deletechars : str, optional

1810 A string combining invalid characters that must be deleted from the

1811 names.

1812 defaultfmt : str, optional

1813 A format used to define default field names, such as "f%i" or "f_%02i".

1814 autostrip : bool, optional

1815 Whether to automatically strip white spaces from the variables.

1816 replace_space : char, optional

1817 Character(s) used in replacement of white spaces in the variable

1818 names. By default, use a '_'.

1819 case_sensitive : {True, False, 'upper', 'lower'}, optional

1820 If True, field names are case sensitive.

1821 If False or 'upper', field names are converted to upper case.

1822 If 'lower', field names are converted to lower case.

1823 unpack : bool, optional

1824 If True, the returned array is transposed, so that arguments may be

1825 unpacked using ``x, y, z = genfromtxt(...)``. When used with a

1826 structured data-type, arrays are returned for each field.

1827 Default is False.

1828 usemask : bool, optional

1829 If True, return a masked array.

1830 If False, return a regular array.

1831 loose : bool, optional

1832 If True, do not raise errors for invalid values.

1833 invalid_raise : bool, optional

1834 If True, an exception is raised if an inconsistency is detected in the

1835 number of columns.

1836 If False, a warning is emitted and the offending lines are skipped.

1837 max_rows : int, optional

1838 The maximum number of rows to read. Must not be used with skip_footer

1839 at the same time. If given, the value must be at least 1. Default is

1840 to read the entire file.

1841

1842 .. versionadded:: 1.10.0

1843 encoding : str, optional

1844 Encoding used to decode the inputfile. Does not apply when `fname`

1845 is a file object. The special value 'bytes' enables backward

1846 compatibility workarounds that ensure that you receive byte arrays

1847 when possible and passes latin1 encoded strings to converters.

1848 Override this value to receive unicode arrays and pass strings

1849 as input to converters. If set to None the system default is used.

1850 The default value is 'bytes'.

1851

1852 .. versionadded:: 1.14.0

1853 .. versionchanged:: 2.0

1854 Before NumPy 2, the default was ``'bytes'`` for Python 2

1855 compatibility. The default is now ``None``.

1856

1857 ndmin : int, optional

1858 Same parameter as `loadtxt`

1859

1860 .. versionadded:: 1.23.0

1861 ${ARRAY_FUNCTION_LIKE}

1862

1863 .. versionadded:: 1.20.0

1864

1865 Returns

1866 -------

1867 out : ndarray

1868 Data read from the text file. If `usemask` is True, this is a

1869 masked array.

1870

1871 See Also

1872 --------

1873 numpy.loadtxt : equivalent function when no data is missing.

1874

1875 Notes

1876 -----

1877 * When spaces are used as delimiters, or when no delimiter has been given

1878 as input, there should not be any missing data between two fields.

1879 * When variables are named (either by a flexible dtype or with a `names`

1880 sequence), there must not be any header in the file (else a ValueError

1881 exception is raised).

1882 * Individual values are not stripped of spaces by default.

1883 When using a custom converter, make sure the function does remove spaces.

1884 * Custom converters may receive unexpected values due to dtype

1885 discovery.

1886

1887 References

1888 ----------

1889 .. [1] NumPy User Guide, section `I/O with NumPy

1890 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.

1891

1892 Examples

1893 --------

1894 >>> from io import StringIO

1895 >>> import numpy as np

1896

1897 Comma delimited file with mixed dtype

1898

1899 >>> s = StringIO("1,1.3,abcde")

1900 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),

1901 ... ('mystring','S5')], delimiter=",")

1902 >>> data

1903 array((1, 1.3, b'abcde'),

1904 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])

1905

1906 Using dtype = None

1907

1908 >>> _ = s.seek(0) # needed for StringIO example only

1909 >>> data = np.genfromtxt(s, dtype=None,

1910 ... names = ['myint','myfloat','mystring'], delimiter=",")

1911 >>> data

1912 array((1, 1.3, 'abcde'),

1913 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '<U5')])

1914

1915 Specifying dtype and names

1916

1917 >>> _ = s.seek(0)

1918 >>> data = np.genfromtxt(s, dtype="i8,f8,S5",

1919 ... names=['myint','myfloat','mystring'], delimiter=",")

1920 >>> data

1921 array((1, 1.3, b'abcde'),

1922 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])

1923

1924 An example with fixed-width columns

1925

1926 >>> s = StringIO("11.3abcde")

1927 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],

1928 ... delimiter=[1,3,5])

1929 >>> data

1930 array((1, 1.3, 'abcde'),

1931 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '<U5')])

1932

1933 An example to show comments

1934

1935 >>> f = StringIO('''

1936 ... text,# of chars

1937 ... hello world,11

1938 ... numpy,5''')

1939 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',')

1940 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')],

1941 dtype=[('f0', 'S12'), ('f1', 'S12')])

1942

1943 """

1944

1945 if like is not None:

1946 return _genfromtxt_with_like(

1947 like, fname, dtype=dtype, comments=comments, delimiter=delimiter,

1948 skip_header=skip_header, skip_footer=skip_footer,

1949 converters=converters, missing_values=missing_values,

1950 filling_values=filling_values, usecols=usecols, names=names,

1951 excludelist=excludelist, deletechars=deletechars,

1952 replace_space=replace_space, autostrip=autostrip,

1953 case_sensitive=case_sensitive, defaultfmt=defaultfmt,

1954 unpack=unpack, usemask=usemask, loose=loose,

1955 invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding,

1956 ndmin=ndmin,

1957 )

1958

1959 _ensure_ndmin_ndarray_check_param(ndmin)

1960

1961 if max_rows is not None:

1962 if skip_footer:

1963 raise ValueError(

1964 "The keywords 'skip_footer' and 'max_rows' can not be "

1965 "specified at the same time.")

1966 if max_rows < 1:

1967 raise ValueError("'max_rows' must be at least 1.")

1968

1969 if usemask:

1970 from numpy.ma import MaskedArray, make_mask_descr

1971 # Check the input dictionary of converters

1972 user_converters = converters or {}

1973 if not isinstance(user_converters, dict):

1974 raise TypeError(

1975 "The input argument 'converter' should be a valid dictionary "

1976 "(got '%s' instead)" % type(user_converters))

1977

1978 if encoding == 'bytes':

1979 encoding = None

1980 byte_converters = True

1981 else:

1982 byte_converters = False

1983

1984 # Initialize the filehandle, the LineSplitter and the NameValidator

1985 if isinstance(fname, os.PathLike):

1986 fname = os.fspath(fname)

1987 if isinstance(fname, str):

1988 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding)

1989 fid_ctx = contextlib.closing(fid)

1990 else:

1991 fid = fname

1992 fid_ctx = contextlib.nullcontext(fid)

1993 try:

1994 fhd = iter(fid)

1995 except TypeError as e:

1996 raise TypeError(

1997 "fname must be a string, a filehandle, a sequence of strings,\n"

1998 f"or an iterator of strings. Got {type(fname)} instead."

1999 ) from e

2000 with fid_ctx:

2001 split_line = LineSplitter(delimiter=delimiter, comments=comments,

2002 autostrip=autostrip, encoding=encoding)

2003 validate_names = NameValidator(excludelist=excludelist,

2004 deletechars=deletechars,

2005 case_sensitive=case_sensitive,

2006 replace_space=replace_space)

2007

2008 # Skip the first `skip_header` rows

2009 try:

2010 for i in range(skip_header):

2011 next(fhd)

2012

2013 # Keep on until we find the first valid values

2014 first_values = None

2015

2016 while not first_values:

2017 first_line = _decode_line(next(fhd), encoding)

2018 if (names is True) and (comments is not None):

2019 if comments in first_line:

2020 first_line = (

2021 ''.join(first_line.split(comments)[1:]))

2022 first_values = split_line(first_line)

2023 except StopIteration:

2024 # return an empty array if the datafile is empty

2025 first_line = ''

2026 first_values = []

2027 warnings.warn(

2028 'genfromtxt: Empty input file: "%s"' % fname, stacklevel=2

2029 )

2030

2031 # Should we take the first values as names ?

2032 if names is True:

2033 fval = first_values[0].strip()

2034 if comments is not None:

2035 if fval in comments:

2036 del first_values[0]

2037

2038 # Check the columns to use: make sure `usecols` is a list

2039 if usecols is not None:

2040 try:

2041 usecols = [_.strip() for _ in usecols.split(",")]

2042 except AttributeError:

2043 try:

2044 usecols = list(usecols)

2045 except TypeError:

2046 usecols = [usecols, ]

2047 nbcols = len(usecols or first_values)

2048

2049 # Check the names and overwrite the dtype.names if needed

2050 if names is True:

2051 names = validate_names([str(_.strip()) for _ in first_values])

2052 first_line = ''

2053 elif _is_string_like(names):

2054 names = validate_names([_.strip() for _ in names.split(',')])

2055 elif names:

2056 names = validate_names(names)

2057 # Get the dtype

2058 if dtype is not None:

2059 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,

2060 excludelist=excludelist,

2061 deletechars=deletechars,

2062 case_sensitive=case_sensitive,

2063 replace_space=replace_space)

2064 # Make sure the names is a list (for 2.5)

2065 if names is not None:

2066 names = list(names)

2067

2068 if usecols:

2069 for (i, current) in enumerate(usecols):

2070 # if usecols is a list of names, convert to a list of indices

2071 if _is_string_like(current):

2072 usecols[i] = names.index(current)

2073 elif current < 0:

2074 usecols[i] = current + len(first_values)

2075 # If the dtype is not None, make sure we update it

2076 if (dtype is not None) and (len(dtype) > nbcols):

2077 descr = dtype.descr

2078 dtype = np.dtype([descr[_] for _ in usecols])

2079 names = list(dtype.names)

2080 # If `names` is not None, update the names

2081 elif (names is not None) and (len(names) > nbcols):

2082 names = [names[_] for _ in usecols]

2083 elif (names is not None) and (dtype is not None):

2084 names = list(dtype.names)

2085

2086 # Process the missing values ...............................

2087 # Rename missing_values for convenience

2088 user_missing_values = missing_values or ()

2089 if isinstance(user_missing_values, bytes):

2090 user_missing_values = user_missing_values.decode('latin1')

2091

2092 # Define the list of missing_values (one column: one list)

2093 missing_values = [list(['']) for _ in range(nbcols)]

2094

2095 # We have a dictionary: process it field by field

2096 if isinstance(user_missing_values, dict):

2097 # Loop on the items

2098 for (key, val) in user_missing_values.items():

2099 # Is the key a string ?

2100 if _is_string_like(key):

2101 try:

2102 # Transform it into an integer

2103 key = names.index(key)

2104 except ValueError:

2105 # We couldn't find it: the name must have been dropped

2106 continue

2107 # Redefine the key as needed if it's a column number

2108 if usecols:

2109 try:

2110 key = usecols.index(key)

2111 except ValueError:

2112 pass

2113 # Transform the value as a list of string

2114 if isinstance(val, (list, tuple)):

2115 val = [str(_) for _ in val]

2116 else:

2117 val = [str(val), ]

2118 # Add the value(s) to the current list of missing

2119 if key is None:

2120 # None acts as default

2121 for miss in missing_values:

2122 miss.extend(val)

2123 else:

2124 missing_values[key].extend(val)

2125 # We have a sequence : each item matches a column

2126 elif isinstance(user_missing_values, (list, tuple)):

2127 for (value, entry) in zip(user_missing_values, missing_values):

2128 value = str(value)

2129 if value not in entry:

2130 entry.append(value)

2131 # We have a string : apply it to all entries

2132 elif isinstance(user_missing_values, str):

2133 user_value = user_missing_values.split(",")

2134 for entry in missing_values:

2135 entry.extend(user_value)

2136 # We have something else: apply it to all entries

2137 else:

2138 for entry in missing_values:

2139 entry.extend([str(user_missing_values)])

2140

2141 # Process the filling_values ...............................

2142 # Rename the input for convenience

2143 user_filling_values = filling_values

2144 if user_filling_values is None:

2145 user_filling_values = []

2146 # Define the default

2147 filling_values = [None] * nbcols

2148 # We have a dictionary : update each entry individually

2149 if isinstance(user_filling_values, dict):

2150 for (key, val) in user_filling_values.items():

2151 if _is_string_like(key):

2152 try:

2153 # Transform it into an integer

2154 key = names.index(key)

2155 except ValueError:

2156 # We couldn't find it: the name must have been dropped

2157 continue

2158 # Redefine the key if it's a column number

2159 # and usecols is defined

2160 if usecols:

2161 try:

2162 key = usecols.index(key)

2163 except ValueError:

2164 pass

2165 # Add the value to the list

2166 filling_values[key] = val

2167 # We have a sequence : update on a one-to-one basis

2168 elif isinstance(user_filling_values, (list, tuple)):

2169 n = len(user_filling_values)

2170 if (n <= nbcols):

2171 filling_values[:n] = user_filling_values

2172 else:

2173 filling_values = user_filling_values[:nbcols]

2174 # We have something else : use it for all entries

2175 else:

2176 filling_values = [user_filling_values] * nbcols

2177

2178 # Initialize the converters ................................

2179 if dtype is None:

2180 # Note: we can't use a [...]*nbcols, as we would have 3 times

2181 # the same converter, instead of 3 different converters.

2182 converters = [

2183 StringConverter(None, missing_values=miss, default=fill)

2184 for (miss, fill) in zip(missing_values, filling_values)

2185 ]

2186 else:

2187 dtype_flat = flatten_dtype(dtype, flatten_base=True)

2188 # Initialize the converters

2189 if len(dtype_flat) > 1:

2190 # Flexible type : get a converter from each dtype

2191 zipit = zip(dtype_flat, missing_values, filling_values)

2192 converters = [StringConverter(dt,

2193 locked=True,

2194 missing_values=miss,

2195 default=fill)

2196 for (dt, miss, fill) in zipit]

2197 else:

2198 # Set to a default converter (but w/ different missing values)

2199 zipit = zip(missing_values, filling_values)

2200 converters = [StringConverter(dtype,

2201 locked=True,

2202 missing_values=miss,

2203 default=fill)

2204 for (miss, fill) in zipit]

2205 # Update the converters to use the user-defined ones

2206 uc_update = []

2207 for (j, conv) in user_converters.items():

2208 # If the converter is specified by column names,

2209 # use the index instead

2210 if _is_string_like(j):

2211 try:

2212 j = names.index(j)

2213 i = j

2214 except ValueError:

2215 continue

2216 elif usecols:

2217 try:

2218 i = usecols.index(j)

2219 except ValueError:

2220 # Unused converter specified

2221 continue

2222 else:

2223 i = j

2224 # Find the value to test - first_line is not filtered by usecols:

2225 if len(first_line):

2226 testing_value = first_values[j]

2227 else:

2228 testing_value = None

2229 if conv is bytes:

2230 user_conv = asbytes

2231 elif byte_converters:

2232 # Converters may use decode to workaround numpy's old

2233 # behavior, so encode the string again before passing

2234 # to the user converter.

2235 def tobytes_first(x, conv):

2236 if type(x) is bytes:

2237 return conv(x)

2238 return conv(x.encode("latin1"))

2239 user_conv = functools.partial(tobytes_first, conv=conv)

2240 else:

2241 user_conv = conv

2242 converters[i].update(user_conv, locked=True,

2243 testing_value=testing_value,

2244 default=filling_values[i],

2245 missing_values=missing_values[i],)

2246 uc_update.append((i, user_conv))

2247 # Make sure we have the corrected keys in user_converters...

2248 user_converters.update(uc_update)

2249

2250 # Fixme: possible error as following variable never used.

2251 # miss_chars = [_.missing_values for _ in converters]

2252

2253 # Initialize the output lists ...

2254 # ... rows

2255 rows = []

2256 append_to_rows = rows.append

2257 # ... masks

2258 if usemask:

2259 masks = []

2260 append_to_masks = masks.append

2261 # ... invalid

2262 invalid = []

2263 append_to_invalid = invalid.append

2264

2265 # Parse each line

2266 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):

2267 values = split_line(line)

2268 nbvalues = len(values)

2269 # Skip an empty line

2270 if nbvalues == 0:

2271 continue

2272 if usecols:

2273 # Select only the columns we need

2274 try:

2275 values = [values[_] for _ in usecols]

2276 except IndexError:

2277 append_to_invalid((i + skip_header + 1, nbvalues))

2278 continue

2279 elif nbvalues != nbcols:

2280 append_to_invalid((i + skip_header + 1, nbvalues))

2281 continue

2282 # Store the values

2283 append_to_rows(tuple(values))

2284 if usemask:

2285 append_to_masks(tuple([v.strip() in m

2286 for (v, m) in zip(values,

2287 missing_values)]))

2288 if len(rows) == max_rows:

2289 break

2290

2291 # Upgrade the converters (if needed)

2292 if dtype is None:

2293 for (i, converter) in enumerate(converters):

2294 current_column = [itemgetter(i)(_m) for _m in rows]

2295 try:

2296 converter.iterupgrade(current_column)

2297 except ConverterLockError:

2298 errmsg = "Converter #%i is locked and cannot be upgraded: " % i

2299 current_column = map(itemgetter(i), rows)

2300 for (j, value) in enumerate(current_column):

2301 try:

2302 converter.upgrade(value)

2303 except (ConverterError, ValueError):

2304 errmsg += "(occurred line #%i for value '%s')"

2305 errmsg %= (j + 1 + skip_header, value)

2306 raise ConverterError(errmsg)

2307

2308 # Check that we don't have invalid values

2309 nbinvalid = len(invalid)

2310 if nbinvalid > 0:

2311 nbrows = len(rows) + nbinvalid - skip_footer

2312 # Construct the error message

2313 template = " Line #%%i (got %%i columns instead of %i)" % nbcols

2314 if skip_footer > 0:

2315 nbinvalid_skipped = len([_ for _ in invalid

2316 if _[0] > nbrows + skip_header])

2317 invalid = invalid[:nbinvalid - nbinvalid_skipped]

2318 skip_footer -= nbinvalid_skipped

2319#

2320# nbrows -= skip_footer

2321# errmsg = [template % (i, nb)

2322# for (i, nb) in invalid if i < nbrows]

2323# else:

2324 errmsg = [template % (i, nb)

2325 for (i, nb) in invalid]

2326 if len(errmsg):

2327 errmsg.insert(0, "Some errors were detected !")

2328 errmsg = "\n".join(errmsg)

2329 # Raise an exception ?

2330 if invalid_raise:

2331 raise ValueError(errmsg)

2332 # Issue a warning ?

2333 else:

2334 warnings.warn(errmsg, ConversionWarning, stacklevel=2)

2335

2336 # Strip the last skip_footer data

2337 if skip_footer > 0:

2338 rows = rows[:-skip_footer]

2339 if usemask:

2340 masks = masks[:-skip_footer]

2341

2342 # Convert each value according to the converter:

2343 # We want to modify the list in place to avoid creating a new one...

2344 if loose:

2345 rows = list(

2346 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]

2347 for (i, conv) in enumerate(converters)]))

2348 else:

2349 rows = list(

2350 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]

2351 for (i, conv) in enumerate(converters)]))

2352

2353 # Reset the dtype

2354 data = rows

2355 if dtype is None:

2356 # Get the dtypes from the types of the converters

2357 column_types = [conv.type for conv in converters]

2358 # Find the columns with strings...

2359 strcolidx = [i for (i, v) in enumerate(column_types)

2360 if v == np.str_]

2361

2362 if byte_converters and strcolidx:

2363 # convert strings back to bytes for backward compatibility

2364 warnings.warn(

2365 "Reading unicode strings without specifying the encoding "

2366 "argument is deprecated. Set the encoding, use None for the "

2367 "system default.",

2368 np.exceptions.VisibleDeprecationWarning, stacklevel=2)

2369

2370 def encode_unicode_cols(row_tup):

2371 row = list(row_tup)

2372 for i in strcolidx:

2373 row[i] = row[i].encode('latin1')

2374 return tuple(row)

2375

2376 try:

2377 data = [encode_unicode_cols(r) for r in data]

2378 except UnicodeEncodeError:

2379 pass

2380 else:

2381 for i in strcolidx:

2382 column_types[i] = np.bytes_

2383

2384 # Update string types to be the right length

2385 sized_column_types = column_types[:]

2386 for i, col_type in enumerate(column_types):

2387 if np.issubdtype(col_type, np.character):

2388 n_chars = max(len(row[i]) for row in data)

2389 sized_column_types[i] = (col_type, n_chars)

2390

2391 if names is None:

2392 # If the dtype is uniform (before sizing strings)

2393 base = {

2394 c_type

2395 for c, c_type in zip(converters, column_types)

2396 if c._checked}

2397 if len(base) == 1:

2398 uniform_type, = base

2399 (ddtype, mdtype) = (uniform_type, bool)

2400 else:

2401 ddtype = [(defaultfmt % i, dt)

2402 for (i, dt) in enumerate(sized_column_types)]

2403 if usemask:

2404 mdtype = [(defaultfmt % i, bool)

2405 for (i, dt) in enumerate(sized_column_types)]

2406 else:

2407 ddtype = list(zip(names, sized_column_types))

2408 mdtype = list(zip(names, [bool] * len(sized_column_types)))

2409 output = np.array(data, dtype=ddtype)

2410 if usemask:

2411 outputmask = np.array(masks, dtype=mdtype)

2412 else:

2413 # Overwrite the initial dtype names if needed

2414 if names and dtype.names is not None:

2415 dtype.names = names

2416 # Case 1. We have a structured type

2417 if len(dtype_flat) > 1:

2418 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]

2419 # First, create the array using a flattened dtype:

2420 # [('a', int), ('b1', int), ('b2', float)]

2421 # Then, view the array using the specified dtype.

2422 if 'O' in (_.char for _ in dtype_flat):

2423 if has_nested_fields(dtype):

2424 raise NotImplementedError(

2425 "Nested fields involving objects are not supported...")

2426 else:

2427 output = np.array(data, dtype=dtype)

2428 else:

2429 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])

2430 output = rows.view(dtype)

2431 # Now, process the rowmasks the same way

2432 if usemask:

2433 rowmasks = np.array(

2434 masks, dtype=np.dtype([('', bool) for t in dtype_flat]))

2435 # Construct the new dtype

2436 mdtype = make_mask_descr(dtype)

2437 outputmask = rowmasks.view(mdtype)

2438 # Case #2. We have a basic dtype

2439 else:

2440 # We used some user-defined converters

2441 if user_converters:

2442 ishomogeneous = True

2443 descr = []

2444 for i, ttype in enumerate([conv.type for conv in converters]):

2445 # Keep the dtype of the current converter

2446 if i in user_converters:

2447 ishomogeneous &= (ttype == dtype.type)

2448 if np.issubdtype(ttype, np.character):

2449 ttype = (ttype, max(len(row[i]) for row in data))

2450 descr.append(('', ttype))

2451 else:

2452 descr.append(('', dtype))

2453 # So we changed the dtype ?

2454 if not ishomogeneous:

2455 # We have more than one field

2456 if len(descr) > 1:

2457 dtype = np.dtype(descr)

2458 # We have only one field: drop the name if not needed.

2459 else:

2460 dtype = np.dtype(ttype)

2461 #

2462 output = np.array(data, dtype)

2463 if usemask:

2464 if dtype.names is not None:

2465 mdtype = [(_, bool) for _ in dtype.names]

2466 else:

2467 mdtype = bool

2468 outputmask = np.array(masks, dtype=mdtype)

2469 # Try to take care of the missing data we missed

2470 names = output.dtype.names

2471 if usemask and names:

2472 for (name, conv) in zip(names, converters):

2473 missing_values = [conv(_) for _ in conv.missing_values

2474 if _ != '']

2475 for mval in missing_values:

2476 outputmask[name] |= (output[name] == mval)

2477 # Construct the final array

2478 if usemask:

2479 output = output.view(MaskedArray)

2480 output._mask = outputmask

2481

2482 output = _ensure_ndmin_ndarray(output, ndmin=ndmin)

2483

2484 if unpack:

2485 if names is None:

2486 return output.T

2487 elif len(names) == 1:

2488 # squeeze single-name dtypes too

2489 return output[names[0]]

2490 else:

2491 # For structured arrays with multiple fields,

2492 # return an array for each field.

2493 return [output[field] for field in names]

2494 return output

2495

2496

2497_genfromtxt_with_like = array_function_dispatch()(genfromtxt)

2498

2499

2500def recfromtxt(fname, **kwargs):

2501 """

2502 Load ASCII data from a file and return it in a record array.

2503

2504 If ``usemask=False`` a standard `recarray` is returned,

2505 if ``usemask=True`` a MaskedRecords array is returned.

2506

2507 .. deprecated:: 2.0

2508 Use `numpy.genfromtxt` instead.

2509

2510 Parameters

2511 ----------

2512 fname, kwargs : For a description of input parameters, see `genfromtxt`.

2513

2514 See Also

2515 --------

2516 numpy.genfromtxt : generic function

2517

2518 Notes

2519 -----

2520 By default, `dtype` is None, which means that the data-type of the output

2521 array will be determined from the data.

2522

2523 """

2524

2525 # Deprecated in NumPy 2.0, 2023-07-11

2526 warnings.warn(

2527 "`recfromtxt` is deprecated, "

2528 "use `numpy.genfromtxt` instead."

2529 "(deprecated in NumPy 2.0)",

2530 DeprecationWarning,

2531 stacklevel=2

2532 )

2533

2534 kwargs.setdefault("dtype", None)

2535 usemask = kwargs.get('usemask', False)

2536 output = genfromtxt(fname, **kwargs)

2537 if usemask:

2538 from numpy.ma.mrecords import MaskedRecords

2539 output = output.view(MaskedRecords)

2540 else:

2541 output = output.view(np.recarray)

2542 return output

2543

2544

2545def recfromcsv(fname, **kwargs):

2546 """

2547 Load ASCII data stored in a comma-separated file.

2548

2549 The returned array is a record array (if ``usemask=False``, see

2550 `recarray`) or a masked record array (if ``usemask=True``,

2551 see `ma.mrecords.MaskedRecords`).

2552

2553 .. deprecated:: 2.0

2554 Use `numpy.genfromtxt` with comma as `delimiter` instead.

2555

2556 Parameters

2557 ----------

2558 fname, kwargs : For a description of input parameters, see `genfromtxt`.

2559

2560 See Also

2561 --------

2562 numpy.genfromtxt : generic function to load ASCII data.

2563

2564 Notes

2565 -----

2566 By default, `dtype` is None, which means that the data-type of the output

2567 array will be determined from the data.

2568

2569 """

2570

2571 # Deprecated in NumPy 2.0, 2023-07-11

2572 warnings.warn(

2573 "`recfromcsv` is deprecated, "

2574 "use `numpy.genfromtxt` with comma as `delimiter` instead. "

2575 "(deprecated in NumPy 2.0)",

2576 DeprecationWarning,

2577 stacklevel=2

2578 )

2579

2580 # Set default kwargs for genfromtxt as relevant to csv import.

2581 kwargs.setdefault("case_sensitive", "lower")

2582 kwargs.setdefault("names", True)

2583 kwargs.setdefault("delimiter", ",")

2584 kwargs.setdefault("dtype", None)

2585 output = genfromtxt(fname, **kwargs)

2586

2587 usemask = kwargs.get("usemask", False)

2588 if usemask:

2589 from numpy.ma.mrecords import MaskedRecords

2590 output = output.view(MaskedRecords)

2591 else:

2592 output = output.view(np.recarray)

2593 return output

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_npyio_impl.py: 16%

802 statements