Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_npyio

1"""

2IO related functions.

3"""

4import os

5import re

6import functools

7import itertools

8import warnings

9import weakref

10import contextlib

11import operator

12from operator import itemgetter

13from collections.abc import Mapping

14import pickle

16import numpy as np

17from . import format

18from ._datasource import DataSource

19from numpy._core import overrides

20from numpy._core.multiarray import packbits, unpackbits

21from numpy._core._multiarray_umath import _load_from_filelike

22from numpy._core.overrides import finalize_array_function_like, set_module

23from ._iotools import (

24 LineSplitter, NameValidator, StringConverter, ConverterError,

25 ConverterLockError, ConversionWarning, _is_string_like,

26 has_nested_fields, flatten_dtype, easy_dtype, _decode_line

27 )

28from numpy._utils import asunicode, asbytes

31__all__ = [

32 'savetxt', 'loadtxt', 'genfromtxt', 'load', 'save', 'savez',

33 'savez_compressed', 'packbits', 'unpackbits', 'fromregex'

34 ]

37array_function_dispatch = functools.partial(

38 overrides.array_function_dispatch, module='numpy')

41class BagObj:

42 """

43 BagObj(obj)

45 Convert attribute look-ups to getitems on the object passed in.

47 Parameters

48 ----------

49 obj : class instance

50 Object on which attribute look-up is performed.

52 Examples

53 --------

54 >>> import numpy as np

55 >>> from numpy.lib._npyio_impl import BagObj as BO

56 >>> class BagDemo:

57 ... def __getitem__(self, key): # An instance of BagObj(BagDemo)

58 ... # will call this method when any

59 ... # attribute look-up is required

60 ... result = "Doesn't matter what you want, "

61 ... return result + "you're gonna get this"

62 ...

63 >>> demo_obj = BagDemo()

64 >>> bagobj = BO(demo_obj)

65 >>> bagobj.hello_there

66 "Doesn't matter what you want, you're gonna get this"

67 >>> bagobj.I_can_be_anything

68 "Doesn't matter what you want, you're gonna get this"

70 """

72 def __init__(self, obj):

73 # Use weakref to make NpzFile objects collectable by refcount

74 self._obj = weakref.proxy(obj)

76 def __getattribute__(self, key):

77 try:

78 return object.__getattribute__(self, '_obj')[key]

79 except KeyError:

80 raise AttributeError(key) from None

82 def __dir__(self):

83 """

84 Enables dir(bagobj) to list the files in an NpzFile.

86 This also enables tab-completion in an interpreter or IPython.

87 """

88 return list(object.__getattribute__(self, '_obj').keys())

91def zipfile_factory(file, *args, **kwargs):

92 """

93 Create a ZipFile.

95 Allows for Zip64, and the `file` argument can accept file, str, or

96 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile

97 constructor.

98 """

99 if not hasattr(file, 'read'):

100 file = os.fspath(file)

101 import zipfile

102 kwargs['allowZip64'] = True

103 return zipfile.ZipFile(file, *args, **kwargs)

104

105

106@set_module('numpy.lib.npyio')

107class NpzFile(Mapping):

108 """

109 NpzFile(fid)

110

111 A dictionary-like object with lazy-loading of files in the zipped

112 archive provided on construction.

113

114 `NpzFile` is used to load files in the NumPy ``.npz`` data archive

115 format. It assumes that files in the archive have a ``.npy`` extension,

116 other files are ignored.

117

118 The arrays and file strings are lazily loaded on either

119 getitem access using ``obj['key']`` or attribute lookup using

120 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can

121 be obtained with ``obj.files`` and the ZipFile object itself using

122 ``obj.zip``.

123

124 Attributes

125 ----------

126 files : list of str

127 List of all files in the archive with a ``.npy`` extension.

128 zip : ZipFile instance

129 The ZipFile object initialized with the zipped archive.

130 f : BagObj instance

131 An object on which attribute can be performed as an alternative

132 to getitem access on the `NpzFile` instance itself.

133 allow_pickle : bool, optional

134 Allow loading pickled data. Default: False

135 pickle_kwargs : dict, optional

136 Additional keyword arguments to pass on to pickle.load.

137 These are only useful when loading object arrays saved on

138 Python 2 when using Python 3.

139 max_header_size : int, optional

140 Maximum allowed size of the header. Large headers may not be safe

141 to load securely and thus require explicitly passing a larger value.

142 See :py:func:`ast.literal_eval()` for details.

143 This option is ignored when `allow_pickle` is passed. In that case

144 the file is by definition trusted and the limit is unnecessary.

145

146 Parameters

147 ----------

148 fid : file, str, or pathlib.Path

149 The zipped archive to open. This is either a file-like object

150 or a string containing the path to the archive.

151 own_fid : bool, optional

152 Whether NpzFile should close the file handle.

153 Requires that `fid` is a file-like object.

154

155 Examples

156 --------

157 >>> import numpy as np

158 >>> from tempfile import TemporaryFile

159 >>> outfile = TemporaryFile()

160 >>> x = np.arange(10)

161 >>> y = np.sin(x)

162 >>> np.savez(outfile, x=x, y=y)

163 >>> _ = outfile.seek(0)

164

165 >>> npz = np.load(outfile)

166 >>> isinstance(npz, np.lib.npyio.NpzFile)

167 True

168 >>> npz

169 NpzFile 'object' with keys: x, y

170 >>> sorted(npz.files)

171 ['x', 'y']

172 >>> npz['x'] # getitem access

173 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

174 >>> npz.f.x # attribute lookup

175 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

176

177 """

178 # Make __exit__ safe if zipfile_factory raises an exception

179 zip = None

180 fid = None

181 _MAX_REPR_ARRAY_COUNT = 5

182

183 def __init__(self, fid, own_fid=False, allow_pickle=False,

184 pickle_kwargs=None, *,

185 max_header_size=format._MAX_HEADER_SIZE):

186 # Import is postponed to here since zipfile depends on gzip, an

187 # optional component of the so-called standard library.

188 _zip = zipfile_factory(fid)

189 self._files = _zip.namelist()

190 self.files = []

191 self.allow_pickle = allow_pickle

192 self.max_header_size = max_header_size

193 self.pickle_kwargs = pickle_kwargs

194 for x in self._files:

195 if x.endswith('.npy'):

196 self.files.append(x[:-4])

197 else:

198 self.files.append(x)

199 self.zip = _zip

200 self.f = BagObj(self)

201 if own_fid:

202 self.fid = fid

203

204 def __enter__(self):

205 return self

206

207 def __exit__(self, exc_type, exc_value, traceback):

208 self.close()

209

210 def close(self):

211 """

212 Close the file.

213

214 """

215 if self.zip is not None:

216 self.zip.close()

217 self.zip = None

218 if self.fid is not None:

219 self.fid.close()

220 self.fid = None

221 self.f = None # break reference cycle

222

223 def __del__(self):

224 self.close()

225

226 # Implement the Mapping ABC

227 def __iter__(self):

228 return iter(self.files)

229

230 def __len__(self):

231 return len(self.files)

232

233 def __getitem__(self, key):

234 # FIXME: This seems like it will copy strings around

235 # more than is strictly necessary. The zipfile

236 # will read the string and then

237 # the format.read_array will copy the string

238 # to another place in memory.

239 # It would be better if the zipfile could read

240 # (or at least uncompress) the data

241 # directly into the array memory.

242 member = False

243 if key in self._files:

244 member = True

245 elif key in self.files:

246 member = True

247 key += '.npy'

248 if member:

249 bytes = self.zip.open(key)

250 magic = bytes.read(len(format.MAGIC_PREFIX))

251 bytes.close()

252 if magic == format.MAGIC_PREFIX:

253 bytes = self.zip.open(key)

254 return format.read_array(bytes,

255 allow_pickle=self.allow_pickle,

256 pickle_kwargs=self.pickle_kwargs,

257 max_header_size=self.max_header_size)

258 else:

259 return self.zip.read(key)

260 else:

261 raise KeyError(f"{key} is not a file in the archive")

262

263 def __contains__(self, key):

264 return (key in self._files or key in self.files)

265

266 def __repr__(self):

267 # Get filename or default to `object`

268 if isinstance(self.fid, str):

269 filename = self.fid

270 else:

271 filename = getattr(self.fid, "name", "object")

272

273 # Get the name of arrays

274 array_names = ', '.join(self.files[:self._MAX_REPR_ARRAY_COUNT])

275 if len(self.files) > self._MAX_REPR_ARRAY_COUNT:

276 array_names += "..."

277 return f"NpzFile {filename!r} with keys: {array_names}"

278

279 # Work around problems with the docstrings in the Mapping methods

280 # They contain a `->`, which confuses the type annotation interpretations

281 # of sphinx-docs. See gh-25964

282

283 def get(self, key, default=None, /):

284 """

285 D.get(k,[,d]) returns D[k] if k in D, else d. d defaults to None.

286 """

287 return Mapping.get(self, key, default)

288

289 def items(self):

290 """

291 D.items() returns a set-like object providing a view on the items

292 """

293 return Mapping.items(self)

294

295 def keys(self):

296 """

297 D.keys() returns a set-like object providing a view on the keys

298 """

299 return Mapping.keys(self)

300

301 def values(self):

302 """

303 D.values() returns a set-like object providing a view on the values

304 """

305 return Mapping.values(self)

306

307

308@set_module('numpy')

309def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True,

310 encoding='ASCII', *, max_header_size=format._MAX_HEADER_SIZE):

311 """

312 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.

313

314 .. warning:: Loading files that contain object arrays uses the ``pickle``

315 module, which is not secure against erroneous or maliciously

316 constructed data. Consider passing ``allow_pickle=False`` to

317 load data that is known not to contain object arrays for the

318 safer handling of untrusted sources.

319

320 Parameters

321 ----------

322 file : file-like object, string, or pathlib.Path

323 The file to read. File-like objects must support the

324 ``seek()`` and ``read()`` methods and must always

325 be opened in binary mode. Pickled files require that the

326 file-like object support the ``readline()`` method as well.

327 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional

328 If not None, then memory-map the file, using the given mode (see

329 `numpy.memmap` for a detailed description of the modes). A

330 memory-mapped array is kept on disk. However, it can be accessed

331 and sliced like any ndarray. Memory mapping is especially useful

332 for accessing small fragments of large files without reading the

333 entire file into memory.

334 allow_pickle : bool, optional

335 Allow loading pickled object arrays stored in npy files. Reasons for

336 disallowing pickles include security, as loading pickled data can

337 execute arbitrary code. If pickles are disallowed, loading object

338 arrays will fail. Default: False

339 fix_imports : bool, optional

340 Only useful when loading Python 2 generated pickled files on Python 3,

341 which includes npy/npz files containing object arrays. If `fix_imports`

342 is True, pickle will try to map the old Python 2 names to the new names

343 used in Python 3.

344 encoding : str, optional

345 What encoding to use when reading Python 2 strings. Only useful when

346 loading Python 2 generated pickled files in Python 3, which includes

347 npy/npz files containing object arrays. Values other than 'latin1',

348 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical

349 data. Default: 'ASCII'

350 max_header_size : int, optional

351 Maximum allowed size of the header. Large headers may not be safe

352 to load securely and thus require explicitly passing a larger value.

353 See :py:func:`ast.literal_eval()` for details.

354 This option is ignored when `allow_pickle` is passed. In that case

355 the file is by definition trusted and the limit is unnecessary.

356

357 Returns

358 -------

359 result : array, tuple, dict, etc.

360 Data stored in the file. For ``.npz`` files, the returned instance

361 of NpzFile class must be closed to avoid leaking file descriptors.

362

363 Raises

364 ------

365 OSError

366 If the input file does not exist or cannot be read.

367 UnpicklingError

368 If ``allow_pickle=True``, but the file cannot be loaded as a pickle.

369 ValueError

370 The file contains an object array, but ``allow_pickle=False`` given.

371 EOFError

372 When calling ``np.load`` multiple times on the same file handle,

373 if all data has already been read

374

375 See Also

376 --------

377 save, savez, savez_compressed, loadtxt

378 memmap : Create a memory-map to an array stored in a file on disk.

379 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.

380

381 Notes

382 -----

383 - If the file contains pickle data, then whatever object is stored

384 in the pickle is returned.

385 - If the file is a ``.npy`` file, then a single array is returned.

386 - If the file is a ``.npz`` file, then a dictionary-like object is

387 returned, containing ``{filename: array}`` key-value pairs, one for

388 each file in the archive.

389 - If the file is a ``.npz`` file, the returned value supports the

390 context manager protocol in a similar fashion to the open function::

391

392 with load('foo.npz') as data:

393 a = data['a']

394

395 The underlying file descriptor is closed when exiting the 'with'

396 block.

397

398 Examples

399 --------

400 >>> import numpy as np

401

402 Store data to disk, and load it again:

403

404 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))

405 >>> np.load('/tmp/123.npy')

406 array([[1, 2, 3],

407 [4, 5, 6]])

408

409 Store compressed data to disk, and load it again:

410

411 >>> a=np.array([[1, 2, 3], [4, 5, 6]])

412 >>> b=np.array([1, 2])

413 >>> np.savez('/tmp/123.npz', a=a, b=b)

414 >>> data = np.load('/tmp/123.npz')

415 >>> data['a']

416 array([[1, 2, 3],

417 [4, 5, 6]])

418 >>> data['b']

419 array([1, 2])

420 >>> data.close()

421

422 Mem-map the stored array, and then access the second row

423 directly from disk:

424

425 >>> X = np.load('/tmp/123.npy', mmap_mode='r')

426 >>> X[1, :]

427 memmap([4, 5, 6])

428

429 """

430 if encoding not in ('ASCII', 'latin1', 'bytes'):

431 # The 'encoding' value for pickle also affects what encoding

432 # the serialized binary data of NumPy arrays is loaded

433 # in. Pickle does not pass on the encoding information to

434 # NumPy. The unpickling code in numpy._core.multiarray is

435 # written to assume that unicode data appearing where binary

436 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.

437 #

438 # Other encoding values can corrupt binary data, and we

439 # purposefully disallow them. For the same reason, the errors=

440 # argument is not exposed, as values other than 'strict'

441 # result can similarly silently corrupt numerical data.

442 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")

443

444 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)

445

446 with contextlib.ExitStack() as stack:

447 if hasattr(file, 'read'):

448 fid = file

449 own_fid = False

450 else:

451 fid = stack.enter_context(open(os.fspath(file), "rb"))

452 own_fid = True

453

454 # Code to distinguish from NumPy binary files and pickles.

455 _ZIP_PREFIX = b'PK\x03\x04'

456 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this

457 N = len(format.MAGIC_PREFIX)

458 magic = fid.read(N)

459 if not magic:

460 raise EOFError("No data left in file")

461 # If the file size is less than N, we need to make sure not

462 # to seek past the beginning of the file

463 fid.seek(-min(N, len(magic)), 1) # back-up

464 if magic.startswith((_ZIP_PREFIX, _ZIP_SUFFIX)):

465 # zip-file (assume .npz)

466 # Potentially transfer file ownership to NpzFile

467 stack.pop_all()

468 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,

469 pickle_kwargs=pickle_kwargs,

470 max_header_size=max_header_size)

471 return ret

472 elif magic == format.MAGIC_PREFIX:

473 # .npy file

474 if mmap_mode:

475 if allow_pickle:

476 max_header_size = 2**64

477 return format.open_memmap(file, mode=mmap_mode,

478 max_header_size=max_header_size)

479 else:

480 return format.read_array(fid, allow_pickle=allow_pickle,

481 pickle_kwargs=pickle_kwargs,

482 max_header_size=max_header_size)

483 else:

484 # Try a pickle

485 if not allow_pickle:

486 raise ValueError(

487 "This file contains pickled (object) data. If you trust "

488 "the file you can load it unsafely using the "

489 "`allow_pickle=` keyword argument or `pickle.load()`.")

490 try:

491 return pickle.load(fid, **pickle_kwargs)

492 except Exception as e:

493 raise pickle.UnpicklingError(

494 f"Failed to interpret file {file!r} as a pickle") from e

495

496

497def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):

498 return (arr,)

499

500

501@array_function_dispatch(_save_dispatcher)

502def save(file, arr, allow_pickle=True, fix_imports=np._NoValue):

503 """

504 Save an array to a binary file in NumPy ``.npy`` format.

505

506 Parameters

507 ----------

508 file : file, str, or pathlib.Path

509 File or filename to which the data is saved. If file is a file-object,

510 then the filename is unchanged. If file is a string or Path,

511 a ``.npy`` extension will be appended to the filename if it does not

512 already have one.

513 arr : array_like

514 Array data to be saved.

515 allow_pickle : bool, optional

516 Allow saving object arrays using Python pickles. Reasons for

517 disallowing pickles include security (loading pickled data can execute

518 arbitrary code) and portability (pickled objects may not be loadable

519 on different Python installations, for example if the stored objects

520 require libraries that are not available, and not all pickled data is

521 compatible between different versions of Python).

522 Default: True

523 fix_imports : bool, optional

524 The `fix_imports` flag is deprecated and has no effect.

525

526 .. deprecated:: 2.1

527 This flag is ignored since NumPy 1.17 and was only needed to

528 support loading some files in Python 2 written in Python 3.

529

530 See Also

531 --------

532 savez : Save several arrays into a ``.npz`` archive

533 savetxt, load

534

535 Notes

536 -----

537 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.

538

539 Any data saved to the file is appended to the end of the file.

540

541 Examples

542 --------

543 >>> import numpy as np

544

545 >>> from tempfile import TemporaryFile

546 >>> outfile = TemporaryFile()

547

548 >>> x = np.arange(10)

549 >>> np.save(outfile, x)

550

551 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file

552 >>> np.load(outfile)

553 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

554

555

556 >>> with open('test.npy', 'wb') as f:

557 ... np.save(f, np.array([1, 2]))

558 ... np.save(f, np.array([1, 3]))

559 >>> with open('test.npy', 'rb') as f:

560 ... a = np.load(f)

561 ... b = np.load(f)

562 >>> print(a, b)

563 # [1 2] [1 3]

564 """

565 if fix_imports is not np._NoValue:

566 # Deprecated 2024-05-16, NumPy 2.1

567 warnings.warn(

568 "The 'fix_imports' flag is deprecated and has no effect. "

569 "(Deprecated in NumPy 2.1)",

570 DeprecationWarning, stacklevel=2)

571 if hasattr(file, 'write'):

572 file_ctx = contextlib.nullcontext(file)

573 else:

574 file = os.fspath(file)

575 if not file.endswith('.npy'):

576 file = file + '.npy'

577 file_ctx = open(file, "wb")

578

579 with file_ctx as fid:

580 arr = np.asanyarray(arr)

581 format.write_array(fid, arr, allow_pickle=allow_pickle,

582 pickle_kwargs=dict(fix_imports=fix_imports))

583

584

585def _savez_dispatcher(file, *args, allow_pickle=True, **kwds):

586 yield from args

587 yield from kwds.values()

588

589

590@array_function_dispatch(_savez_dispatcher)

591def savez(file, *args, allow_pickle=True, **kwds):

592 """Save several arrays into a single file in uncompressed ``.npz`` format.

593

594 Provide arrays as keyword arguments to store them under the

595 corresponding name in the output file: ``savez(fn, x=x, y=y)``.

596

597 If arrays are specified as positional arguments, i.e., ``savez(fn,

598 x, y)``, their names will be `arr_0`, `arr_1`, etc.

599

600 Parameters

601 ----------

602 file : file, str, or pathlib.Path

603 Either the filename (string) or an open file (file-like object)

604 where the data will be saved. If file is a string or a Path, the

605 ``.npz`` extension will be appended to the filename if it is not

606 already there.

607 args : Arguments, optional

608 Arrays to save to the file. Please use keyword arguments (see

609 `kwds` below) to assign names to arrays. Arrays specified as

610 args will be named "arr_0", "arr_1", and so on.

611 allow_pickle : bool, optional

612 Allow saving object arrays using Python pickles. Reasons for

613 disallowing pickles include security (loading pickled data can execute

614 arbitrary code) and portability (pickled objects may not be loadable

615 on different Python installations, for example if the stored objects

616 require libraries that are not available, and not all pickled data is

617 compatible between different versions of Python).

618 Default: True

619 kwds : Keyword arguments, optional

620 Arrays to save to the file. Each array will be saved to the

621 output file with its corresponding keyword name.

622

623 Returns

624 -------

625 None

626

627 See Also

628 --------

629 save : Save a single array to a binary file in NumPy format.

630 savetxt : Save an array to a file as plain text.

631 savez_compressed : Save several arrays into a compressed ``.npz`` archive

632

633 Notes

634 -----

635 The ``.npz`` file format is a zipped archive of files named after the

636 variables they contain. The archive is not compressed and each file

637 in the archive contains one variable in ``.npy`` format. For a

638 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.

639

640 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile`

641 object is returned. This is a dictionary-like object which can be queried

642 for its list of arrays (with the ``.files`` attribute), and for the arrays

643 themselves.

644

645 Keys passed in `kwds` are used as filenames inside the ZIP archive.

646 Therefore, keys should be valid filenames; e.g., avoid keys that begin with

647 ``/`` or contain ``.``.

648

649 When naming variables with keyword arguments, it is not possible to name a

650 variable ``file``, as this would cause the ``file`` argument to be defined

651 twice in the call to ``savez``.

652

653 Examples

654 --------

655 >>> import numpy as np

656 >>> from tempfile import TemporaryFile

657 >>> outfile = TemporaryFile()

658 >>> x = np.arange(10)

659 >>> y = np.sin(x)

660

661 Using `savez` with \\*args, the arrays are saved with default names.

662

663 >>> np.savez(outfile, x, y)

664 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file

665 >>> npzfile = np.load(outfile)

666 >>> npzfile.files

667 ['arr_0', 'arr_1']

668 >>> npzfile['arr_0']

669 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

670

671 Using `savez` with \\**kwds, the arrays are saved with the keyword names.

672

673 >>> outfile = TemporaryFile()

674 >>> np.savez(outfile, x=x, y=y)

675 >>> _ = outfile.seek(0)

676 >>> npzfile = np.load(outfile)

677 >>> sorted(npzfile.files)

678 ['x', 'y']

679 >>> npzfile['x']

680 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

681

682 """

683 _savez(file, args, kwds, False, allow_pickle=allow_pickle)

684

685

686def _savez_compressed_dispatcher(file, *args, allow_pickle=True, **kwds):

687 yield from args

688 yield from kwds.values()

689

690

691@array_function_dispatch(_savez_compressed_dispatcher)

692def savez_compressed(file, *args, allow_pickle=True, **kwds):

693 """

694 Save several arrays into a single file in compressed ``.npz`` format.

695

696 Provide arrays as keyword arguments to store them under the

697 corresponding name in the output file: ``savez_compressed(fn, x=x, y=y)``.

698

699 If arrays are specified as positional arguments, i.e.,

700 ``savez_compressed(fn, x, y)``, their names will be `arr_0`, `arr_1`, etc.

701

702 Parameters

703 ----------

704 file : file, str, or pathlib.Path

705 Either the filename (string) or an open file (file-like object)

706 where the data will be saved. If file is a string or a Path, the

707 ``.npz`` extension will be appended to the filename if it is not

708 already there.

709 args : Arguments, optional

710 Arrays to save to the file. Please use keyword arguments (see

711 `kwds` below) to assign names to arrays. Arrays specified as

712 args will be named "arr_0", "arr_1", and so on.

713 allow_pickle : bool, optional

714 Allow saving object arrays using Python pickles. Reasons for

715 disallowing pickles include security (loading pickled data can execute

716 arbitrary code) and portability (pickled objects may not be loadable

717 on different Python installations, for example if the stored objects

718 require libraries that are not available, and not all pickled data is

719 compatible between different versions of Python).

720 Default: True

721 kwds : Keyword arguments, optional

722 Arrays to save to the file. Each array will be saved to the

723 output file with its corresponding keyword name.

724

725 Returns

726 -------

727 None

728

729 See Also

730 --------

731 numpy.save : Save a single array to a binary file in NumPy format.

732 numpy.savetxt : Save an array to a file as plain text.

733 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format

734 numpy.load : Load the files created by savez_compressed.

735

736 Notes

737 -----

738 The ``.npz`` file format is a zipped archive of files named after the

739 variables they contain. The archive is compressed with

740 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable

741 in ``.npy`` format. For a description of the ``.npy`` format, see

742 :py:mod:`numpy.lib.format`.

743

744

745 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile`

746 object is returned. This is a dictionary-like object which can be queried

747 for its list of arrays (with the ``.files`` attribute), and for the arrays

748 themselves.

749

750 Examples

751 --------

752 >>> import numpy as np

753 >>> test_array = np.random.rand(3, 2)

754 >>> test_vector = np.random.rand(4)

755 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector)

756 >>> loaded = np.load('/tmp/123.npz')

757 >>> print(np.array_equal(test_array, loaded['a']))

758 True

759 >>> print(np.array_equal(test_vector, loaded['b']))

760 True

761

762 """

763 _savez(file, args, kwds, True, allow_pickle=allow_pickle)

764

765

766def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):

767 # Import is postponed to here since zipfile depends on gzip, an optional

768 # component of the so-called standard library.

769 import zipfile

770

771 if not hasattr(file, 'write'):

772 file = os.fspath(file)

773 if not file.endswith('.npz'):

774 file = file + '.npz'

775

776 namedict = kwds

777 for i, val in enumerate(args):

778 key = 'arr_%d' % i

779 if key in namedict.keys():

780 raise ValueError(

781 "Cannot use un-named variables and keyword %s" % key)

782 namedict[key] = val

783

784 if compress:

785 compression = zipfile.ZIP_DEFLATED

786 else:

787 compression = zipfile.ZIP_STORED

788

789 zipf = zipfile_factory(file, mode="w", compression=compression)

790 try:

791 for key, val in namedict.items():

792 fname = key + '.npy'

793 val = np.asanyarray(val)

794 # always force zip64, gh-10776

795 with zipf.open(fname, 'w', force_zip64=True) as fid:

796 format.write_array(fid, val,

797 allow_pickle=allow_pickle,

798 pickle_kwargs=pickle_kwargs)

799 finally:

800 zipf.close()

801

802

803def _ensure_ndmin_ndarray_check_param(ndmin):

804 """Just checks if the param ndmin is supported on

805 _ensure_ndmin_ndarray. It is intended to be used as

806 verification before running anything expensive.

807 e.g. loadtxt, genfromtxt

808 """

809 # Check correctness of the values of `ndmin`

810 if ndmin not in [0, 1, 2]:

811 raise ValueError(f"Illegal value of ndmin keyword: {ndmin}")

812

813def _ensure_ndmin_ndarray(a, *, ndmin: int):

814 """This is a helper function of loadtxt and genfromtxt to ensure

815 proper minimum dimension as requested

816

817 ndim : int. Supported values 1, 2, 3

818 ^^ whenever this changes, keep in sync with

819 _ensure_ndmin_ndarray_check_param

820 """

821 # Verify that the array has at least dimensions `ndmin`.

822 # Tweak the size and shape of the arrays - remove extraneous dimensions

823 if a.ndim > ndmin:

824 a = np.squeeze(a)

825 # and ensure we have the minimum number of dimensions asked for

826 # - has to be in this order for the odd case ndmin=1, a.squeeze().ndim=0

827 if a.ndim < ndmin:

828 if ndmin == 1:

829 a = np.atleast_1d(a)

830 elif ndmin == 2:

831 a = np.atleast_2d(a).T

832

833 return a

834

835

836# amount of lines loadtxt reads in one chunk, can be overridden for testing

837_loadtxt_chunksize = 50000

838

839

840def _check_nonneg_int(value, name="argument"):

841 try:

842 operator.index(value)

843 except TypeError:

844 raise TypeError(f"{name} must be an integer") from None

845 if value < 0:

846 raise ValueError(f"{name} must be nonnegative")

847

848

849def _preprocess_comments(iterable, comments, encoding):

850 """

851 Generator that consumes a line iterated iterable and strips out the

852 multiple (or multi-character) comments from lines.

853 This is a pre-processing step to achieve feature parity with loadtxt

854 (we assume that this feature is a nieche feature).

855 """

856 for line in iterable:

857 if isinstance(line, bytes):

858 # Need to handle conversion here, or the splitting would fail

859 line = line.decode(encoding)

860

861 for c in comments:

862 line = line.split(c, 1)[0]

863

864 yield line

865

866

867# The number of rows we read in one go if confronted with a parametric dtype

868_loadtxt_chunksize = 50000

869

870

871def _read(fname, *, delimiter=',', comment='#', quote='"',

872 imaginary_unit='j', usecols=None, skiplines=0,

873 max_rows=None, converters=None, ndmin=None, unpack=False,

874 dtype=np.float64, encoding=None):

875 r"""

876 Read a NumPy array from a text file.

877 This is a helper function for loadtxt.

878

879 Parameters

880 ----------

881 fname : file, str, or pathlib.Path

882 The filename or the file to be read.

883 delimiter : str, optional

884 Field delimiter of the fields in line of the file.

885 Default is a comma, ','. If None any sequence of whitespace is

886 considered a delimiter.

887 comment : str or sequence of str or None, optional

888 Character that begins a comment. All text from the comment

889 character to the end of the line is ignored.

890 Multiple comments or multiple-character comment strings are supported,

891 but may be slower and `quote` must be empty if used.

892 Use None to disable all use of comments.

893 quote : str or None, optional

894 Character that is used to quote string fields. Default is '"'

895 (a double quote). Use None to disable quote support.

896 imaginary_unit : str, optional

897 Character that represent the imaginary unit `sqrt(-1)`.

898 Default is 'j'.

899 usecols : array_like, optional

900 A one-dimensional array of integer column numbers. These are the

901 columns from the file to be included in the array. If this value

902 is not given, all the columns are used.

903 skiplines : int, optional

904 Number of lines to skip before interpreting the data in the file.

905 max_rows : int, optional

906 Maximum number of rows of data to read. Default is to read the

907 entire file.

908 converters : dict or callable, optional

909 A function to parse all columns strings into the desired value, or

910 a dictionary mapping column number to a parser function.

911 E.g. if column 0 is a date string: ``converters = {0: datestr2num}``.

912 Converters can also be used to provide a default value for missing

913 data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will

914 convert empty fields to 0.

915 Default: None

916 ndmin : int, optional

917 Minimum dimension of the array returned.

918 Allowed values are 0, 1 or 2. Default is 0.

919 unpack : bool, optional

920 If True, the returned array is transposed, so that arguments may be

921 unpacked using ``x, y, z = read(...)``. When used with a structured

922 data-type, arrays are returned for each field. Default is False.

923 dtype : numpy data type

924 A NumPy dtype instance, can be a structured dtype to map to the

925 columns of the file.

926 encoding : str, optional

927 Encoding used to decode the inputfile. The special value 'bytes'

928 (the default) enables backwards-compatible behavior for `converters`,

929 ensuring that inputs to the converter functions are encoded

930 bytes objects. The special value 'bytes' has no additional effect if

931 ``converters=None``. If encoding is ``'bytes'`` or ``None``, the

932 default system encoding is used.

933

934 Returns

935 -------

936 ndarray

937 NumPy array.

938 """

939 # Handle special 'bytes' keyword for encoding

940 byte_converters = False

941 if encoding == 'bytes':

942 encoding = None

943 byte_converters = True

944

945 if dtype is None:

946 raise TypeError("a dtype must be provided.")

947 dtype = np.dtype(dtype)

948

949 read_dtype_via_object_chunks = None

950 if dtype.kind in 'SUM' and (

951 dtype == "S0" or dtype == "U0" or dtype == "M8" or dtype == 'm8'):

952 # This is a legacy "flexible" dtype. We do not truly support

953 # parametric dtypes currently (no dtype discovery step in the core),

954 # but have to support these for backward compatibility.

955 read_dtype_via_object_chunks = dtype

956 dtype = np.dtype(object)

957

958 if usecols is not None:

959 # Allow usecols to be a single int or a sequence of ints, the C-code

960 # handles the rest

961 try:

962 usecols = list(usecols)

963 except TypeError:

964 usecols = [usecols]

965

966 _ensure_ndmin_ndarray_check_param(ndmin)

967

968 if comment is None:

969 comments = None

970 else:

971 # assume comments are a sequence of strings

972 if "" in comment:

973 raise ValueError(

974 "comments cannot be an empty string. Use comments=None to "

975 "disable comments."

976 )

977 comments = tuple(comment)

978 comment = None

979 if len(comments) == 0:

980 comments = None # No comments at all

981 elif len(comments) == 1:

982 # If there is only one comment, and that comment has one character,

983 # the normal parsing can deal with it just fine.

984 if isinstance(comments[0], str) and len(comments[0]) == 1:

985 comment = comments[0]

986 comments = None

987 else:

988 # Input validation if there are multiple comment characters

989 if delimiter in comments:

990 raise TypeError(

991 f"Comment characters '{comments}' cannot include the "

992 f"delimiter '{delimiter}'"

993 )

994

995 # comment is now either a 1 or 0 character string or a tuple:

996 if comments is not None:

997 # Note: An earlier version support two character comments (and could

998 # have been extended to multiple characters, we assume this is

999 # rare enough to not optimize for.

1000 if quote is not None:

1001 raise ValueError(

1002 "when multiple comments or a multi-character comment is "

1003 "given, quotes are not supported. In this case quotechar "

1004 "must be set to None.")

1005

1006 if len(imaginary_unit) != 1:

1007 raise ValueError('len(imaginary_unit) must be 1.')

1008

1009 _check_nonneg_int(skiplines)

1010 if max_rows is not None:

1011 _check_nonneg_int(max_rows)

1012 else:

1013 # Passing -1 to the C code means "read the entire file".

1014 max_rows = -1

1015

1016 fh_closing_ctx = contextlib.nullcontext()

1017 filelike = False

1018 try:

1019 if isinstance(fname, os.PathLike):

1020 fname = os.fspath(fname)

1021 if isinstance(fname, str):

1022 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)

1023 if encoding is None:

1024 encoding = getattr(fh, 'encoding', 'latin1')

1025

1026 fh_closing_ctx = contextlib.closing(fh)

1027 data = fh

1028 filelike = True

1029 else:

1030 if encoding is None:

1031 encoding = getattr(fname, 'encoding', 'latin1')

1032 data = iter(fname)

1033 except TypeError as e:

1034 raise ValueError(

1035 f"fname must be a string, filehandle, list of strings,\n"

1036 f"or generator. Got {type(fname)} instead.") from e

1037

1038 with fh_closing_ctx:

1039 if comments is not None:

1040 if filelike:

1041 data = iter(data)

1042 filelike = False

1043 data = _preprocess_comments(data, comments, encoding)

1044

1045 if read_dtype_via_object_chunks is None:

1046 arr = _load_from_filelike(

1047 data, delimiter=delimiter, comment=comment, quote=quote,

1048 imaginary_unit=imaginary_unit,

1049 usecols=usecols, skiplines=skiplines, max_rows=max_rows,

1050 converters=converters, dtype=dtype,

1051 encoding=encoding, filelike=filelike,

1052 byte_converters=byte_converters)

1053

1054 else:

1055 # This branch reads the file into chunks of object arrays and then

1056 # casts them to the desired actual dtype. This ensures correct

1057 # string-length and datetime-unit discovery (like `arr.astype()`).

1058 # Due to chunking, certain error reports are less clear, currently.

1059 if filelike:

1060 data = iter(data) # cannot chunk when reading from file

1061 filelike = False

1062

1063 c_byte_converters = False

1064 if read_dtype_via_object_chunks == "S":

1065 c_byte_converters = True # Use latin1 rather than ascii

1066

1067 chunks = []

1068 while max_rows != 0:

1069 if max_rows < 0:

1070 chunk_size = _loadtxt_chunksize

1071 else:

1072 chunk_size = min(_loadtxt_chunksize, max_rows)

1073

1074 next_arr = _load_from_filelike(

1075 data, delimiter=delimiter, comment=comment, quote=quote,

1076 imaginary_unit=imaginary_unit,

1077 usecols=usecols, skiplines=skiplines, max_rows=chunk_size,

1078 converters=converters, dtype=dtype,

1079 encoding=encoding, filelike=filelike,

1080 byte_converters=byte_converters,

1081 c_byte_converters=c_byte_converters)

1082 # Cast here already. We hope that this is better even for

1083 # large files because the storage is more compact. It could

1084 # be adapted (in principle the concatenate could cast).

1085 chunks.append(next_arr.astype(read_dtype_via_object_chunks))

1086

1087 skiprows = 0 # Only have to skip for first chunk

1088 if max_rows >= 0:

1089 max_rows -= chunk_size

1090 if len(next_arr) < chunk_size:

1091 # There was less data than requested, so we are done.

1092 break

1093

1094 # Need at least one chunk, but if empty, the last one may have

1095 # the wrong shape.

1096 if len(chunks) > 1 and len(chunks[-1]) == 0:

1097 del chunks[-1]

1098 if len(chunks) == 1:

1099 arr = chunks[0]

1100 else:

1101 arr = np.concatenate(chunks, axis=0)

1102

1103 # NOTE: ndmin works as advertised for structured dtypes, but normally

1104 # these would return a 1D result plus the structured dimension,

1105 # so ndmin=2 adds a third dimension even when no squeezing occurs.

1106 # A `squeeze=False` could be a better solution (pandas uses squeeze).

1107 arr = _ensure_ndmin_ndarray(arr, ndmin=ndmin)

1108

1109 if arr.shape:

1110 if arr.shape[0] == 0:

1111 warnings.warn(

1112 f'loadtxt: input contained no data: "{fname}"',

1113 category=UserWarning,

1114 stacklevel=3

1115 )

1116

1117 if unpack:

1118 # Unpack structured dtypes if requested:

1119 dt = arr.dtype

1120 if dt.names is not None:

1121 # For structured arrays, return an array for each field.

1122 return [arr[field] for field in dt.names]

1123 else:

1124 return arr.T

1125 else:

1126 return arr

1127

1128

1129@finalize_array_function_like

1130@set_module('numpy')

1131def loadtxt(fname, dtype=float, comments='#', delimiter=None,

1132 converters=None, skiprows=0, usecols=None, unpack=False,

1133 ndmin=0, encoding=None, max_rows=None, *, quotechar=None,

1134 like=None):

1135 r"""

1136 Load data from a text file.

1137

1138 Parameters

1139 ----------

1140 fname : file, str, pathlib.Path, list of str, generator

1141 File, filename, list, or generator to read. If the filename

1142 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note

1143 that generators must return bytes or strings. The strings

1144 in a list or produced by a generator are treated as lines.

1145 dtype : data-type, optional

1146 Data-type of the resulting array; default: float. If this is a

1147 structured data-type, the resulting array will be 1-dimensional, and

1148 each row will be interpreted as an element of the array. In this

1149 case, the number of columns used must match the number of fields in

1150 the data-type.

1151 comments : str or sequence of str or None, optional

1152 The characters or list of characters used to indicate the start of a

1153 comment. None implies no comments. For backwards compatibility, byte

1154 strings will be decoded as 'latin1'. The default is '#'.

1155 delimiter : str, optional

1156 The character used to separate the values. For backwards compatibility,

1157 byte strings will be decoded as 'latin1'. The default is whitespace.

1158

1159 .. versionchanged:: 1.23.0

1160 Only single character delimiters are supported. Newline characters

1161 cannot be used as the delimiter.

1162

1163 converters : dict or callable, optional

1164 Converter functions to customize value parsing. If `converters` is

1165 callable, the function is applied to all columns, else it must be a

1166 dict that maps column number to a parser function.

1167 See examples for further details.

1168 Default: None.

1169

1170 .. versionchanged:: 1.23.0

1171 The ability to pass a single callable to be applied to all columns

1172 was added.

1173

1174 skiprows : int, optional

1175 Skip the first `skiprows` lines, including comments; default: 0.

1176 usecols : int or sequence, optional

1177 Which columns to read, with 0 being the first. For example,

1178 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.

1179 The default, None, results in all columns being read.

1180 unpack : bool, optional

1181 If True, the returned array is transposed, so that arguments may be

1182 unpacked using ``x, y, z = loadtxt(...)``. When used with a

1183 structured data-type, arrays are returned for each field.

1184 Default is False.

1185 ndmin : int, optional

1186 The returned array will have at least `ndmin` dimensions.

1187 Otherwise mono-dimensional axes will be squeezed.

1188 Legal values: 0 (default), 1 or 2.

1189 encoding : str, optional

1190 Encoding used to decode the inputfile. Does not apply to input streams.

1191 The special value 'bytes' enables backward compatibility workarounds

1192 that ensures you receive byte arrays as results if possible and passes

1193 'latin1' encoded strings to converters. Override this value to receive

1194 unicode arrays and pass strings as input to converters. If set to None

1195 the system default is used. The default value is 'bytes'.

1196

1197 .. versionchanged:: 2.0

1198 Before NumPy 2, the default was ``'bytes'`` for Python 2

1199 compatibility. The default is now ``None``.

1200

1201 max_rows : int, optional

1202 Read `max_rows` rows of content after `skiprows` lines. The default is

1203 to read all the rows. Note that empty rows containing no data such as

1204 empty lines and comment lines are not counted towards `max_rows`,

1205 while such lines are counted in `skiprows`.

1206

1207 .. versionchanged:: 1.23.0

1208 Lines containing no data, including comment lines (e.g., lines

1209 starting with '#' or as specified via `comments`) are not counted

1210 towards `max_rows`.

1211 quotechar : unicode character or None, optional

1212 The character used to denote the start and end of a quoted item.

1213 Occurrences of the delimiter or comment characters are ignored within

1214 a quoted item. The default value is ``quotechar=None``, which means

1215 quoting support is disabled.

1216

1217 If two consecutive instances of `quotechar` are found within a quoted

1218 field, the first is treated as an escape character. See examples.

1219

1220 .. versionadded:: 1.23.0

1221 ${ARRAY_FUNCTION_LIKE}

1222

1223 .. versionadded:: 1.20.0

1224

1225 Returns

1226 -------

1227 out : ndarray

1228 Data read from the text file.

1229

1230 See Also

1231 --------

1232 load, fromstring, fromregex

1233 genfromtxt : Load data with missing values handled as specified.

1234 scipy.io.loadmat : reads MATLAB data files

1235

1236 Notes

1237 -----

1238 This function aims to be a fast reader for simply formatted files. The

1239 `genfromtxt` function provides more sophisticated handling of, e.g.,

1240 lines with missing values.

1241

1242 Each row in the input text file must have the same number of values to be

1243 able to read all values. If all rows do not have same number of values, a

1244 subset of up to n columns (where n is the least number of values present

1245 in all rows) can be read by specifying the columns via `usecols`.

1246

1247 The strings produced by the Python float.hex method can be used as

1248 input for floats.

1249

1250 Examples

1251 --------

1252 >>> import numpy as np

1253 >>> from io import StringIO # StringIO behaves like a file object

1254 >>> c = StringIO("0 1\n2 3")

1255 >>> np.loadtxt(c)

1256 array([[0., 1.],

1257 [2., 3.]])

1258

1259 >>> d = StringIO("M 21 72\nF 35 58")

1260 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),

1261 ... 'formats': ('S1', 'i4', 'f4')})

1262 array([(b'M', 21, 72.), (b'F', 35, 58.)],

1263 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')])

1264

1265 >>> c = StringIO("1,0,2\n3,0,4")

1266 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)

1267 >>> x

1268 array([1., 3.])

1269 >>> y

1270 array([2., 4.])

1271

1272 The `converters` argument is used to specify functions to preprocess the

1273 text prior to parsing. `converters` can be a dictionary that maps

1274 preprocessing functions to each column:

1275

1276 >>> s = StringIO("1.618, 2.296\n3.141, 4.669\n")

1277 >>> conv = {

1278 ... 0: lambda x: np.floor(float(x)), # conversion fn for column 0

1279 ... 1: lambda x: np.ceil(float(x)), # conversion fn for column 1

1280 ... }

1281 >>> np.loadtxt(s, delimiter=",", converters=conv)

1282 array([[1., 3.],

1283 [3., 5.]])

1284

1285 `converters` can be a callable instead of a dictionary, in which case it

1286 is applied to all columns:

1287

1288 >>> s = StringIO("0xDE 0xAD\n0xC0 0xDE")

1289 >>> import functools

1290 >>> conv = functools.partial(int, base=16)

1291 >>> np.loadtxt(s, converters=conv)

1292 array([[222., 173.],

1293 [192., 222.]])

1294

1295 This example shows how `converters` can be used to convert a field

1296 with a trailing minus sign into a negative number.

1297

1298 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94")

1299 >>> def conv(fld):

1300 ... return -float(fld[:-1]) if fld.endswith("-") else float(fld)

1301 ...

1302 >>> np.loadtxt(s, converters=conv)

1303 array([[ 10.01, -31.25],

1304 [ 19.22, 64.31],

1305 [-17.57, 63.94]])

1306

1307 Using a callable as the converter can be particularly useful for handling

1308 values with different formatting, e.g. floats with underscores:

1309

1310 >>> s = StringIO("1 2.7 100_000")

1311 >>> np.loadtxt(s, converters=float)

1312 array([1.e+00, 2.7e+00, 1.e+05])

1313

1314 This idea can be extended to automatically handle values specified in

1315 many different formats, such as hex values:

1316

1317 >>> def conv(val):

1318 ... try:

1319 ... return float(val)

1320 ... except ValueError:

1321 ... return float.fromhex(val)

1322 >>> s = StringIO("1, 2.5, 3_000, 0b4, 0x1.4000000000000p+2")

1323 >>> np.loadtxt(s, delimiter=",", converters=conv)

1324 array([1.0e+00, 2.5e+00, 3.0e+03, 1.8e+02, 5.0e+00])

1325

1326 Or a format where the ``-`` sign comes after the number:

1327

1328 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94")

1329 >>> conv = lambda x: -float(x[:-1]) if x.endswith("-") else float(x)

1330 >>> np.loadtxt(s, converters=conv)

1331 array([[ 10.01, -31.25],

1332 [ 19.22, 64.31],

1333 [-17.57, 63.94]])

1334

1335 Support for quoted fields is enabled with the `quotechar` parameter.

1336 Comment and delimiter characters are ignored when they appear within a

1337 quoted item delineated by `quotechar`:

1338

1339 >>> s = StringIO('"alpha, #42", 10.0\n"beta, #64", 2.0\n')

1340 >>> dtype = np.dtype([("label", "U12"), ("value", float)])

1341 >>> np.loadtxt(s, dtype=dtype, delimiter=",", quotechar='"')

1342 array([('alpha, #42', 10.), ('beta, #64', 2.)],

1343 dtype=[('label', '<U12'), ('value', '<f8')])

1344

1345 Quoted fields can be separated by multiple whitespace characters:

1346

1347 >>> s = StringIO('"alpha, #42" 10.0\n"beta, #64" 2.0\n')

1348 >>> dtype = np.dtype([("label", "U12"), ("value", float)])

1349 >>> np.loadtxt(s, dtype=dtype, delimiter=None, quotechar='"')

1350 array([('alpha, #42', 10.), ('beta, #64', 2.)],

1351 dtype=[('label', '<U12'), ('value', '<f8')])

1352

1353 Two consecutive quote characters within a quoted field are treated as a

1354 single escaped character:

1355

1356 >>> s = StringIO('"Hello, my name is ""Monty""!"')

1357 >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"')

1358 array('Hello, my name is "Monty"!', dtype='<U26')

1359

1360 Read subset of columns when all rows do not contain equal number of values:

1361

1362 >>> d = StringIO("1 2\n2 4\n3 9 12\n4 16 20")

1363 >>> np.loadtxt(d, usecols=(0, 1))

1364 array([[ 1., 2.],

1365 [ 2., 4.],

1366 [ 3., 9.],

1367 [ 4., 16.]])

1368

1369 """

1370

1371 if like is not None:

1372 return _loadtxt_with_like(

1373 like, fname, dtype=dtype, comments=comments, delimiter=delimiter,

1374 converters=converters, skiprows=skiprows, usecols=usecols,

1375 unpack=unpack, ndmin=ndmin, encoding=encoding,

1376 max_rows=max_rows

1377 )

1378

1379 if isinstance(delimiter, bytes):

1380 delimiter.decode("latin1")

1381

1382 if dtype is None:

1383 dtype = np.float64

1384

1385 comment = comments

1386 # Control character type conversions for Py3 convenience

1387 if comment is not None:

1388 if isinstance(comment, (str, bytes)):

1389 comment = [comment]

1390 comment = [

1391 x.decode('latin1') if isinstance(x, bytes) else x for x in comment]

1392 if isinstance(delimiter, bytes):

1393 delimiter = delimiter.decode('latin1')

1394

1395 arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,

1396 converters=converters, skiplines=skiprows, usecols=usecols,

1397 unpack=unpack, ndmin=ndmin, encoding=encoding,

1398 max_rows=max_rows, quote=quotechar)

1399

1400 return arr

1401

1402

1403_loadtxt_with_like = array_function_dispatch()(loadtxt)

1404

1405

1406def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,

1407 header=None, footer=None, comments=None,

1408 encoding=None):

1409 return (X,)

1410

1411

1412@array_function_dispatch(_savetxt_dispatcher)

1413def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',

1414 footer='', comments='# ', encoding=None):

1415 """

1416 Save an array to a text file.

1417

1418 Parameters

1419 ----------

1420 fname : filename, file handle or pathlib.Path

1421 If the filename ends in ``.gz``, the file is automatically saved in

1422 compressed gzip format. `loadtxt` understands gzipped files

1423 transparently.

1424 X : 1D or 2D array_like

1425 Data to be saved to a text file.

1426 fmt : str or sequence of strs, optional

1427 A single format (%10.5f), a sequence of formats, or a

1428 multi-format string, e.g. 'Iteration %d -- %10.5f', in which

1429 case `delimiter` is ignored. For complex `X`, the legal options

1430 for `fmt` are:

1431

1432 * a single specifier, ``fmt='%.4e'``, resulting in numbers formatted

1433 like ``' (%s+%sj)' % (fmt, fmt)``

1434 * a full string specifying every real and imaginary part, e.g.

1435 ``' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'`` for 3 columns

1436 * a list of specifiers, one per column - in this case, the real

1437 and imaginary part must have separate specifiers,

1438 e.g. ``['%.3e + %.3ej', '(%.15e%+.15ej)']`` for 2 columns

1439 delimiter : str, optional

1440 String or character separating columns.

1441 newline : str, optional

1442 String or character separating lines.

1443 header : str, optional

1444 String that will be written at the beginning of the file.

1445 footer : str, optional

1446 String that will be written at the end of the file.

1447 comments : str, optional

1448 String that will be prepended to the ``header`` and ``footer`` strings,

1449 to mark them as comments. Default: '# ', as expected by e.g.

1450 ``numpy.loadtxt``.

1451 encoding : {None, str}, optional

1452 Encoding used to encode the outputfile. Does not apply to output

1453 streams. If the encoding is something other than 'bytes' or 'latin1'

1454 you will not be able to load the file in NumPy versions < 1.14. Default

1455 is 'latin1'.

1456

1457 See Also

1458 --------

1459 save : Save an array to a binary file in NumPy ``.npy`` format

1460 savez : Save several arrays into an uncompressed ``.npz`` archive

1461 savez_compressed : Save several arrays into a compressed ``.npz`` archive

1462

1463 Notes

1464 -----

1465 Further explanation of the `fmt` parameter

1466 (``%[flag]width[.precision]specifier``):

1467

1468 flags:

1469 ``-`` : left justify

1470

1471 ``+`` : Forces to precede result with + or -.

1472

1473 ``0`` : Left pad the number with zeros instead of space (see width).

1474

1475 width:

1476 Minimum number of characters to be printed. The value is not truncated

1477 if it has more characters.

1478

1479 precision:

1480 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of

1481 digits.

1482 - For ``e, E`` and ``f`` specifiers, the number of digits to print

1483 after the decimal point.

1484 - For ``g`` and ``G``, the maximum number of significant digits.

1485 - For ``s``, the maximum number of characters.

1486

1487 specifiers:

1488 ``c`` : character

1489

1490 ``d`` or ``i`` : signed decimal integer

1491

1492 ``e`` or ``E`` : scientific notation with ``e`` or ``E``.

1493

1494 ``f`` : decimal floating point

1495

1496 ``g,G`` : use the shorter of ``e,E`` or ``f``

1497

1498 ``o`` : signed octal

1499

1500 ``s`` : string of characters

1501

1502 ``u`` : unsigned decimal integer

1503

1504 ``x,X`` : unsigned hexadecimal integer

1505

1506 This explanation of ``fmt`` is not complete, for an exhaustive

1507 specification see [1]_.

1508

1509 References

1510 ----------

1511 .. [1] `Format Specification Mini-Language

1512 <https://docs.python.org/library/string.html#format-specification-mini-language>`_,

1513 Python Documentation.

1514

1515 Examples

1516 --------

1517 >>> import numpy as np

1518 >>> x = y = z = np.arange(0.0,5.0,1.0)

1519 >>> np.savetxt('test.out', x, delimiter=',') # X is an array

1520 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays

1521 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation

1522

1523 """

1524

1525 class WriteWrap:

1526 """Convert to bytes on bytestream inputs.

1527

1528 """

1529 def __init__(self, fh, encoding):

1530 self.fh = fh

1531 self.encoding = encoding

1532 self.do_write = self.first_write

1533

1534 def close(self):

1535 self.fh.close()

1536

1537 def write(self, v):

1538 self.do_write(v)

1539

1540 def write_bytes(self, v):

1541 if isinstance(v, bytes):

1542 self.fh.write(v)

1543 else:

1544 self.fh.write(v.encode(self.encoding))

1545

1546 def write_normal(self, v):

1547 self.fh.write(asunicode(v))

1548

1549 def first_write(self, v):

1550 try:

1551 self.write_normal(v)

1552 self.write = self.write_normal

1553 except TypeError:

1554 # input is probably a bytestream

1555 self.write_bytes(v)

1556 self.write = self.write_bytes

1557

1558 own_fh = False

1559 if isinstance(fname, os.PathLike):

1560 fname = os.fspath(fname)

1561 if _is_string_like(fname):

1562 # datasource doesn't support creating a new file ...

1563 open(fname, 'wt').close()

1564 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)

1565 own_fh = True

1566 elif hasattr(fname, 'write'):

1567 # wrap to handle byte output streams

1568 fh = WriteWrap(fname, encoding or 'latin1')

1569 else:

1570 raise ValueError('fname must be a string or file handle')

1571

1572 try:

1573 X = np.asarray(X)

1574

1575 # Handle 1-dimensional arrays

1576 if X.ndim == 0 or X.ndim > 2:

1577 raise ValueError(

1578 "Expected 1D or 2D array, got %dD array instead" % X.ndim)

1579 elif X.ndim == 1:

1580 # Common case -- 1d array of numbers

1581 if X.dtype.names is None:

1582 X = np.atleast_2d(X).T

1583 ncol = 1

1584

1585 # Complex dtype -- each field indicates a separate column

1586 else:

1587 ncol = len(X.dtype.names)

1588 else:

1589 ncol = X.shape[1]

1590

1591 iscomplex_X = np.iscomplexobj(X)

1592 # `fmt` can be a string with multiple insertion points or a

1593 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')

1594 if type(fmt) in (list, tuple):

1595 if len(fmt) != ncol:

1596 raise AttributeError('fmt has wrong shape. %s' % str(fmt))

1597 format = delimiter.join(fmt)

1598 elif isinstance(fmt, str):

1599 n_fmt_chars = fmt.count('%')

1600 error = ValueError('fmt has wrong number of %% formats: %s' % fmt)

1601 if n_fmt_chars == 1:

1602 if iscomplex_X:

1603 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol

1604 else:

1605 fmt = [fmt, ] * ncol

1606 format = delimiter.join(fmt)

1607 elif iscomplex_X and n_fmt_chars != (2 * ncol):

1608 raise error

1609 elif ((not iscomplex_X) and n_fmt_chars != ncol):

1610 raise error

1611 else:

1612 format = fmt

1613 else:

1614 raise ValueError('invalid fmt: %r' % (fmt,))

1615

1616 if len(header) > 0:

1617 header = header.replace('\n', '\n' + comments)

1618 fh.write(comments + header + newline)

1619 if iscomplex_X:

1620 for row in X:

1621 row2 = []

1622 for number in row:

1623 row2.append(number.real)

1624 row2.append(number.imag)

1625 s = format % tuple(row2) + newline

1626 fh.write(s.replace('+-', '-'))

1627 else:

1628 for row in X:

1629 try:

1630 v = format % tuple(row) + newline

1631 except TypeError as e:

1632 raise TypeError("Mismatch between array dtype ('%s') and "

1633 "format specifier ('%s')"

1634 % (str(X.dtype), format)) from e

1635 fh.write(v)

1636

1637 if len(footer) > 0:

1638 footer = footer.replace('\n', '\n' + comments)

1639 fh.write(comments + footer + newline)

1640 finally:

1641 if own_fh:

1642 fh.close()

1643

1644

1645@set_module('numpy')

1646def fromregex(file, regexp, dtype, encoding=None):

1647 r"""

1648 Construct an array from a text file, using regular expression parsing.

1649

1650 The returned array is always a structured array, and is constructed from

1651 all matches of the regular expression in the file. Groups in the regular

1652 expression are converted to fields of the structured array.

1653

1654 Parameters

1655 ----------

1656 file : file, str, or pathlib.Path

1657 Filename or file object to read.

1658

1659 .. versionchanged:: 1.22.0

1660 Now accepts `os.PathLike` implementations.

1661

1662 regexp : str or regexp

1663 Regular expression used to parse the file.

1664 Groups in the regular expression correspond to fields in the dtype.

1665 dtype : dtype or list of dtypes

1666 Dtype for the structured array; must be a structured datatype.

1667 encoding : str, optional

1668 Encoding used to decode the inputfile. Does not apply to input streams.

1669

1670 Returns

1671 -------

1672 output : ndarray

1673 The output array, containing the part of the content of `file` that

1674 was matched by `regexp`. `output` is always a structured array.

1675

1676 Raises

1677 ------

1678 TypeError

1679 When `dtype` is not a valid dtype for a structured array.

1680

1681 See Also

1682 --------

1683 fromstring, loadtxt

1684

1685 Notes

1686 -----

1687 Dtypes for structured arrays can be specified in several forms, but all

1688 forms specify at least the data type and field name. For details see

1689 `basics.rec`.

1690

1691 Examples

1692 --------

1693 >>> import numpy as np

1694 >>> from io import StringIO

1695 >>> text = StringIO("1312 foo\n1534 bar\n444 qux")

1696

1697 >>> regexp = r"(\d+)\s+(...)" # match [digits, whitespace, anything]

1698 >>> output = np.fromregex(text, regexp,

1699 ... [('num', np.int64), ('key', 'S3')])

1700 >>> output

1701 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')],

1702 dtype=[('num', '<i8'), ('key', 'S3')])

1703 >>> output['num']

1704 array([1312, 1534, 444])

1705

1706 """

1707 own_fh = False

1708 if not hasattr(file, "read"):

1709 file = os.fspath(file)

1710 file = np.lib._datasource.open(file, 'rt', encoding=encoding)

1711 own_fh = True

1712

1713 try:

1714 if not isinstance(dtype, np.dtype):

1715 dtype = np.dtype(dtype)

1716 if dtype.names is None:

1717 raise TypeError('dtype must be a structured datatype.')

1718

1719 content = file.read()

1720 if isinstance(content, bytes) and isinstance(regexp, str):

1721 regexp = asbytes(regexp)

1722

1723 if not hasattr(regexp, 'match'):

1724 regexp = re.compile(regexp)

1725 seq = regexp.findall(content)

1726 if seq and not isinstance(seq[0], tuple):

1727 # Only one group is in the regexp.

1728 # Create the new array as a single data-type and then

1729 # re-interpret as a single-field structured array.

1730 newdtype = np.dtype(dtype[dtype.names[0]])

1731 output = np.array(seq, dtype=newdtype)

1732 output.dtype = dtype

1733 else:

1734 output = np.array(seq, dtype=dtype)

1735

1736 return output

1737 finally:

1738 if own_fh:

1739 file.close()

1740

1741

1742#####--------------------------------------------------------------------------

1743#---- --- ASCII functions ---

1744#####--------------------------------------------------------------------------

1745

1746

1747@finalize_array_function_like

1748@set_module('numpy')

1749def genfromtxt(fname, dtype=float, comments='#', delimiter=None,

1750 skip_header=0, skip_footer=0, converters=None,

1751 missing_values=None, filling_values=None, usecols=None,

1752 names=None, excludelist=None,

1753 deletechars=''.join(sorted(NameValidator.defaultdeletechars)),

1754 replace_space='_', autostrip=False, case_sensitive=True,

1755 defaultfmt="f%i", unpack=None, usemask=False, loose=True,

1756 invalid_raise=True, max_rows=None, encoding=None,

1757 *, ndmin=0, like=None):

1758 """

1759 Load data from a text file, with missing values handled as specified.

1760

1761 Each line past the first `skip_header` lines is split at the `delimiter`

1762 character, and characters following the `comments` character are discarded.

1763

1764 Parameters

1765 ----------

1766 fname : file, str, pathlib.Path, list of str, generator

1767 File, filename, list, or generator to read. If the filename

1768 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note

1769 that generators must return bytes or strings. The strings

1770 in a list or produced by a generator are treated as lines.

1771 dtype : dtype, optional

1772 Data type of the resulting array.

1773 If None, the dtypes will be determined by the contents of each

1774 column, individually.

1775 comments : str, optional

1776 The character used to indicate the start of a comment.

1777 All the characters occurring on a line after a comment are discarded.

1778 delimiter : str, int, or sequence, optional

1779 The string used to separate values. By default, any consecutive

1780 whitespaces act as delimiter. An integer or sequence of integers

1781 can also be provided as width(s) of each field.

1782 skiprows : int, optional

1783 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead.

1784 skip_header : int, optional

1785 The number of lines to skip at the beginning of the file.

1786 skip_footer : int, optional

1787 The number of lines to skip at the end of the file.

1788 converters : variable, optional

1789 The set of functions that convert the data of a column to a value.

1790 The converters can also be used to provide a default value

1791 for missing data: ``converters = {3: lambda s: float(s or 0)}``.

1792 missing : variable, optional

1793 `missing` was removed in numpy 1.10. Please use `missing_values`

1794 instead.

1795 missing_values : variable, optional

1796 The set of strings corresponding to missing data.

1797 filling_values : variable, optional

1798 The set of values to be used as default when the data are missing.

1799 usecols : sequence, optional

1800 Which columns to read, with 0 being the first. For example,

1801 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.

1802 names : {None, True, str, sequence}, optional

1803 If `names` is True, the field names are read from the first line after

1804 the first `skip_header` lines. This line can optionally be preceded

1805 by a comment delimiter. Any content before the comment delimiter is

1806 discarded. If `names` is a sequence or a single-string of

1807 comma-separated names, the names will be used to define the field

1808 names in a structured dtype. If `names` is None, the names of the

1809 dtype fields will be used, if any.

1810 excludelist : sequence, optional

1811 A list of names to exclude. This list is appended to the default list

1812 ['return','file','print']. Excluded names are appended with an

1813 underscore: for example, `file` would become `file_`.

1814 deletechars : str, optional

1815 A string combining invalid characters that must be deleted from the

1816 names.

1817 defaultfmt : str, optional

1818 A format used to define default field names, such as "f%i" or "f_%02i".

1819 autostrip : bool, optional

1820 Whether to automatically strip white spaces from the variables.

1821 replace_space : char, optional

1822 Character(s) used in replacement of white spaces in the variable

1823 names. By default, use a '_'.

1824 case_sensitive : {True, False, 'upper', 'lower'}, optional

1825 If True, field names are case sensitive.

1826 If False or 'upper', field names are converted to upper case.

1827 If 'lower', field names are converted to lower case.

1828 unpack : bool, optional

1829 If True, the returned array is transposed, so that arguments may be

1830 unpacked using ``x, y, z = genfromtxt(...)``. When used with a

1831 structured data-type, arrays are returned for each field.

1832 Default is False.

1833 usemask : bool, optional

1834 If True, return a masked array.

1835 If False, return a regular array.

1836 loose : bool, optional

1837 If True, do not raise errors for invalid values.

1838 invalid_raise : bool, optional

1839 If True, an exception is raised if an inconsistency is detected in the

1840 number of columns.

1841 If False, a warning is emitted and the offending lines are skipped.

1842 max_rows : int, optional

1843 The maximum number of rows to read. Must not be used with skip_footer

1844 at the same time. If given, the value must be at least 1. Default is

1845 to read the entire file.

1846 encoding : str, optional

1847 Encoding used to decode the inputfile. Does not apply when `fname`

1848 is a file object. The special value 'bytes' enables backward

1849 compatibility workarounds that ensure that you receive byte arrays

1850 when possible and passes latin1 encoded strings to converters.

1851 Override this value to receive unicode arrays and pass strings

1852 as input to converters. If set to None the system default is used.

1853 The default value is 'bytes'.

1854

1855 .. versionchanged:: 2.0

1856 Before NumPy 2, the default was ``'bytes'`` for Python 2

1857 compatibility. The default is now ``None``.

1858

1859 ndmin : int, optional

1860 Same parameter as `loadtxt`

1861

1862 .. versionadded:: 1.23.0

1863 ${ARRAY_FUNCTION_LIKE}

1864

1865 .. versionadded:: 1.20.0

1866

1867 Returns

1868 -------

1869 out : ndarray

1870 Data read from the text file. If `usemask` is True, this is a

1871 masked array.

1872

1873 See Also

1874 --------

1875 numpy.loadtxt : equivalent function when no data is missing.

1876

1877 Notes

1878 -----

1879 * When spaces are used as delimiters, or when no delimiter has been given

1880 as input, there should not be any missing data between two fields.

1881 * When variables are named (either by a flexible dtype or with a `names`

1882 sequence), there must not be any header in the file (else a ValueError

1883 exception is raised).

1884 * Individual values are not stripped of spaces by default.

1885 When using a custom converter, make sure the function does remove spaces.

1886 * Custom converters may receive unexpected values due to dtype

1887 discovery.

1888

1889 References

1890 ----------

1891 .. [1] NumPy User Guide, section `I/O with NumPy

1892 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.

1893

1894 Examples

1895 --------

1896 >>> from io import StringIO

1897 >>> import numpy as np

1898

1899 Comma delimited file with mixed dtype

1900

1901 >>> s = StringIO("1,1.3,abcde")

1902 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),

1903 ... ('mystring','S5')], delimiter=",")

1904 >>> data

1905 array((1, 1.3, b'abcde'),

1906 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])

1907

1908 Using dtype = None

1909

1910 >>> _ = s.seek(0) # needed for StringIO example only

1911 >>> data = np.genfromtxt(s, dtype=None,

1912 ... names = ['myint','myfloat','mystring'], delimiter=",")

1913 >>> data

1914 array((1, 1.3, 'abcde'),

1915 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '<U5')])

1916

1917 Specifying dtype and names

1918

1919 >>> _ = s.seek(0)

1920 >>> data = np.genfromtxt(s, dtype="i8,f8,S5",

1921 ... names=['myint','myfloat','mystring'], delimiter=",")

1922 >>> data

1923 array((1, 1.3, b'abcde'),

1924 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])

1925

1926 An example with fixed-width columns

1927

1928 >>> s = StringIO("11.3abcde")

1929 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],

1930 ... delimiter=[1,3,5])

1931 >>> data

1932 array((1, 1.3, 'abcde'),

1933 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '<U5')])

1934

1935 An example to show comments

1936

1937 >>> f = StringIO('''

1938 ... text,# of chars

1939 ... hello world,11

1940 ... numpy,5''')

1941 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',')

1942 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')],

1943 dtype=[('f0', 'S12'), ('f1', 'S12')])

1944

1945 """

1946

1947 if like is not None:

1948 return _genfromtxt_with_like(

1949 like, fname, dtype=dtype, comments=comments, delimiter=delimiter,

1950 skip_header=skip_header, skip_footer=skip_footer,

1951 converters=converters, missing_values=missing_values,

1952 filling_values=filling_values, usecols=usecols, names=names,

1953 excludelist=excludelist, deletechars=deletechars,

1954 replace_space=replace_space, autostrip=autostrip,

1955 case_sensitive=case_sensitive, defaultfmt=defaultfmt,

1956 unpack=unpack, usemask=usemask, loose=loose,

1957 invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding,

1958 ndmin=ndmin,

1959 )

1960

1961 _ensure_ndmin_ndarray_check_param(ndmin)

1962

1963 if max_rows is not None:

1964 if skip_footer:

1965 raise ValueError(

1966 "The keywords 'skip_footer' and 'max_rows' can not be "

1967 "specified at the same time.")

1968 if max_rows < 1:

1969 raise ValueError("'max_rows' must be at least 1.")

1970

1971 if usemask:

1972 from numpy.ma import MaskedArray, make_mask_descr

1973 # Check the input dictionary of converters

1974 user_converters = converters or {}

1975 if not isinstance(user_converters, dict):

1976 raise TypeError(

1977 "The input argument 'converter' should be a valid dictionary "

1978 "(got '%s' instead)" % type(user_converters))

1979

1980 if encoding == 'bytes':

1981 encoding = None

1982 byte_converters = True

1983 else:

1984 byte_converters = False

1985

1986 # Initialize the filehandle, the LineSplitter and the NameValidator

1987 if isinstance(fname, os.PathLike):

1988 fname = os.fspath(fname)

1989 if isinstance(fname, str):

1990 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding)

1991 fid_ctx = contextlib.closing(fid)

1992 else:

1993 fid = fname

1994 fid_ctx = contextlib.nullcontext(fid)

1995 try:

1996 fhd = iter(fid)

1997 except TypeError as e:

1998 raise TypeError(

1999 "fname must be a string, a filehandle, a sequence of strings,\n"

2000 f"or an iterator of strings. Got {type(fname)} instead."

2001 ) from e

2002 with fid_ctx:

2003 split_line = LineSplitter(delimiter=delimiter, comments=comments,

2004 autostrip=autostrip, encoding=encoding)

2005 validate_names = NameValidator(excludelist=excludelist,

2006 deletechars=deletechars,

2007 case_sensitive=case_sensitive,

2008 replace_space=replace_space)

2009

2010 # Skip the first `skip_header` rows

2011 try:

2012 for i in range(skip_header):

2013 next(fhd)

2014

2015 # Keep on until we find the first valid values

2016 first_values = None

2017

2018 while not first_values:

2019 first_line = _decode_line(next(fhd), encoding)

2020 if (names is True) and (comments is not None):

2021 if comments in first_line:

2022 first_line = (

2023 ''.join(first_line.split(comments)[1:]))

2024 first_values = split_line(first_line)

2025 except StopIteration:

2026 # return an empty array if the datafile is empty

2027 first_line = ''

2028 first_values = []

2029 warnings.warn(

2030 'genfromtxt: Empty input file: "%s"' % fname, stacklevel=2

2031 )

2032

2033 # Should we take the first values as names ?

2034 if names is True:

2035 fval = first_values[0].strip()

2036 if comments is not None:

2037 if fval in comments:

2038 del first_values[0]

2039

2040 # Check the columns to use: make sure `usecols` is a list

2041 if usecols is not None:

2042 try:

2043 usecols = [_.strip() for _ in usecols.split(",")]

2044 except AttributeError:

2045 try:

2046 usecols = list(usecols)

2047 except TypeError:

2048 usecols = [usecols, ]

2049 nbcols = len(usecols or first_values)

2050

2051 # Check the names and overwrite the dtype.names if needed

2052 if names is True:

2053 names = validate_names([str(_.strip()) for _ in first_values])

2054 first_line = ''

2055 elif _is_string_like(names):

2056 names = validate_names([_.strip() for _ in names.split(',')])

2057 elif names:

2058 names = validate_names(names)

2059 # Get the dtype

2060 if dtype is not None:

2061 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,

2062 excludelist=excludelist,

2063 deletechars=deletechars,

2064 case_sensitive=case_sensitive,

2065 replace_space=replace_space)

2066 # Make sure the names is a list (for 2.5)

2067 if names is not None:

2068 names = list(names)

2069

2070 if usecols:

2071 for (i, current) in enumerate(usecols):

2072 # if usecols is a list of names, convert to a list of indices

2073 if _is_string_like(current):

2074 usecols[i] = names.index(current)

2075 elif current < 0:

2076 usecols[i] = current + len(first_values)

2077 # If the dtype is not None, make sure we update it

2078 if (dtype is not None) and (len(dtype) > nbcols):

2079 descr = dtype.descr

2080 dtype = np.dtype([descr[_] for _ in usecols])

2081 names = list(dtype.names)

2082 # If `names` is not None, update the names

2083 elif (names is not None) and (len(names) > nbcols):

2084 names = [names[_] for _ in usecols]

2085 elif (names is not None) and (dtype is not None):

2086 names = list(dtype.names)

2087

2088 # Process the missing values ...............................

2089 # Rename missing_values for convenience

2090 user_missing_values = missing_values or ()

2091 if isinstance(user_missing_values, bytes):

2092 user_missing_values = user_missing_values.decode('latin1')

2093

2094 # Define the list of missing_values (one column: one list)

2095 missing_values = [[''] for _ in range(nbcols)]

2096

2097 # We have a dictionary: process it field by field

2098 if isinstance(user_missing_values, dict):

2099 # Loop on the items

2100 for (key, val) in user_missing_values.items():

2101 # Is the key a string ?

2102 if _is_string_like(key):

2103 try:

2104 # Transform it into an integer

2105 key = names.index(key)

2106 except ValueError:

2107 # We couldn't find it: the name must have been dropped

2108 continue

2109 # Redefine the key as needed if it's a column number

2110 if usecols:

2111 try:

2112 key = usecols.index(key)

2113 except ValueError:

2114 pass

2115 # Transform the value as a list of string

2116 if isinstance(val, (list, tuple)):

2117 val = [str(_) for _ in val]

2118 else:

2119 val = [str(val), ]

2120 # Add the value(s) to the current list of missing

2121 if key is None:

2122 # None acts as default

2123 for miss in missing_values:

2124 miss.extend(val)

2125 else:

2126 missing_values[key].extend(val)

2127 # We have a sequence : each item matches a column

2128 elif isinstance(user_missing_values, (list, tuple)):

2129 for (value, entry) in zip(user_missing_values, missing_values):

2130 value = str(value)

2131 if value not in entry:

2132 entry.append(value)

2133 # We have a string : apply it to all entries

2134 elif isinstance(user_missing_values, str):

2135 user_value = user_missing_values.split(",")

2136 for entry in missing_values:

2137 entry.extend(user_value)

2138 # We have something else: apply it to all entries

2139 else:

2140 for entry in missing_values:

2141 entry.extend([str(user_missing_values)])

2142

2143 # Process the filling_values ...............................

2144 # Rename the input for convenience

2145 user_filling_values = filling_values

2146 if user_filling_values is None:

2147 user_filling_values = []

2148 # Define the default

2149 filling_values = [None] * nbcols

2150 # We have a dictionary : update each entry individually

2151 if isinstance(user_filling_values, dict):

2152 for (key, val) in user_filling_values.items():

2153 if _is_string_like(key):

2154 try:

2155 # Transform it into an integer

2156 key = names.index(key)

2157 except ValueError:

2158 # We couldn't find it: the name must have been dropped

2159 continue

2160 # Redefine the key if it's a column number

2161 # and usecols is defined

2162 if usecols:

2163 try:

2164 key = usecols.index(key)

2165 except ValueError:

2166 pass

2167 # Add the value to the list

2168 filling_values[key] = val

2169 # We have a sequence : update on a one-to-one basis

2170 elif isinstance(user_filling_values, (list, tuple)):

2171 n = len(user_filling_values)

2172 if (n <= nbcols):

2173 filling_values[:n] = user_filling_values

2174 else:

2175 filling_values = user_filling_values[:nbcols]

2176 # We have something else : use it for all entries

2177 else:

2178 filling_values = [user_filling_values] * nbcols

2179

2180 # Initialize the converters ................................

2181 if dtype is None:

2182 # Note: we can't use a [...]*nbcols, as we would have 3 times

2183 # the same converter, instead of 3 different converters.

2184 converters = [

2185 StringConverter(None, missing_values=miss, default=fill)

2186 for (miss, fill) in zip(missing_values, filling_values)

2187 ]

2188 else:

2189 dtype_flat = flatten_dtype(dtype, flatten_base=True)

2190 # Initialize the converters

2191 if len(dtype_flat) > 1:

2192 # Flexible type : get a converter from each dtype

2193 zipit = zip(dtype_flat, missing_values, filling_values)

2194 converters = [StringConverter(dt,

2195 locked=True,

2196 missing_values=miss,

2197 default=fill)

2198 for (dt, miss, fill) in zipit]

2199 else:

2200 # Set to a default converter (but w/ different missing values)

2201 zipit = zip(missing_values, filling_values)

2202 converters = [StringConverter(dtype,

2203 locked=True,

2204 missing_values=miss,

2205 default=fill)

2206 for (miss, fill) in zipit]

2207 # Update the converters to use the user-defined ones

2208 uc_update = []

2209 for (j, conv) in user_converters.items():

2210 # If the converter is specified by column names,

2211 # use the index instead

2212 if _is_string_like(j):

2213 try:

2214 j = names.index(j)

2215 i = j

2216 except ValueError:

2217 continue

2218 elif usecols:

2219 try:

2220 i = usecols.index(j)

2221 except ValueError:

2222 # Unused converter specified

2223 continue

2224 else:

2225 i = j

2226 # Find the value to test - first_line is not filtered by usecols:

2227 if len(first_line):

2228 testing_value = first_values[j]

2229 else:

2230 testing_value = None

2231 if conv is bytes:

2232 user_conv = asbytes

2233 elif byte_converters:

2234 # Converters may use decode to workaround numpy's old

2235 # behavior, so encode the string again before passing

2236 # to the user converter.

2237 def tobytes_first(x, conv):

2238 if type(x) is bytes:

2239 return conv(x)

2240 return conv(x.encode("latin1"))

2241 user_conv = functools.partial(tobytes_first, conv=conv)

2242 else:

2243 user_conv = conv

2244 converters[i].update(user_conv, locked=True,

2245 testing_value=testing_value,

2246 default=filling_values[i],

2247 missing_values=missing_values[i],)

2248 uc_update.append((i, user_conv))

2249 # Make sure we have the corrected keys in user_converters...

2250 user_converters.update(uc_update)

2251

2252 # Fixme: possible error as following variable never used.

2253 # miss_chars = [_.missing_values for _ in converters]

2254

2255 # Initialize the output lists ...

2256 # ... rows

2257 rows = []

2258 append_to_rows = rows.append

2259 # ... masks

2260 if usemask:

2261 masks = []

2262 append_to_masks = masks.append

2263 # ... invalid

2264 invalid = []

2265 append_to_invalid = invalid.append

2266

2267 # Parse each line

2268 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):

2269 values = split_line(line)

2270 nbvalues = len(values)

2271 # Skip an empty line

2272 if nbvalues == 0:

2273 continue

2274 if usecols:

2275 # Select only the columns we need

2276 try:

2277 values = [values[_] for _ in usecols]

2278 except IndexError:

2279 append_to_invalid((i + skip_header + 1, nbvalues))

2280 continue

2281 elif nbvalues != nbcols:

2282 append_to_invalid((i + skip_header + 1, nbvalues))

2283 continue

2284 # Store the values

2285 append_to_rows(tuple(values))

2286 if usemask:

2287 append_to_masks(tuple([v.strip() in m

2288 for (v, m) in zip(values,

2289 missing_values)]))

2290 if len(rows) == max_rows:

2291 break

2292

2293 # Upgrade the converters (if needed)

2294 if dtype is None:

2295 for (i, converter) in enumerate(converters):

2296 current_column = [itemgetter(i)(_m) for _m in rows]

2297 try:

2298 converter.iterupgrade(current_column)

2299 except ConverterLockError:

2300 errmsg = "Converter #%i is locked and cannot be upgraded: " % i

2301 current_column = map(itemgetter(i), rows)

2302 for (j, value) in enumerate(current_column):

2303 try:

2304 converter.upgrade(value)

2305 except (ConverterError, ValueError):

2306 errmsg += "(occurred line #%i for value '%s')"

2307 errmsg %= (j + 1 + skip_header, value)

2308 raise ConverterError(errmsg)

2309

2310 # Check that we don't have invalid values

2311 nbinvalid = len(invalid)

2312 if nbinvalid > 0:

2313 nbrows = len(rows) + nbinvalid - skip_footer

2314 # Construct the error message

2315 template = " Line #%%i (got %%i columns instead of %i)" % nbcols

2316 if skip_footer > 0:

2317 nbinvalid_skipped = len([_ for _ in invalid

2318 if _[0] > nbrows + skip_header])

2319 invalid = invalid[:nbinvalid - nbinvalid_skipped]

2320 skip_footer -= nbinvalid_skipped

2321#

2322# nbrows -= skip_footer

2323# errmsg = [template % (i, nb)

2324# for (i, nb) in invalid if i < nbrows]

2325# else:

2326 errmsg = [template % (i, nb)

2327 for (i, nb) in invalid]

2328 if len(errmsg):

2329 errmsg.insert(0, "Some errors were detected !")

2330 errmsg = "\n".join(errmsg)

2331 # Raise an exception ?

2332 if invalid_raise:

2333 raise ValueError(errmsg)

2334 # Issue a warning ?

2335 else:

2336 warnings.warn(errmsg, ConversionWarning, stacklevel=2)

2337

2338 # Strip the last skip_footer data

2339 if skip_footer > 0:

2340 rows = rows[:-skip_footer]

2341 if usemask:

2342 masks = masks[:-skip_footer]

2343

2344 # Convert each value according to the converter:

2345 # We want to modify the list in place to avoid creating a new one...

2346 if loose:

2347 rows = list(

2348 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]

2349 for (i, conv) in enumerate(converters)]))

2350 else:

2351 rows = list(

2352 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]

2353 for (i, conv) in enumerate(converters)]))

2354

2355 # Reset the dtype

2356 data = rows

2357 if dtype is None:

2358 # Get the dtypes from the types of the converters

2359 column_types = [conv.type for conv in converters]

2360 # Find the columns with strings...

2361 strcolidx = [i for (i, v) in enumerate(column_types)

2362 if v == np.str_]

2363

2364 if byte_converters and strcolidx:

2365 # convert strings back to bytes for backward compatibility

2366 warnings.warn(

2367 "Reading unicode strings without specifying the encoding "

2368 "argument is deprecated. Set the encoding, use None for the "

2369 "system default.",

2370 np.exceptions.VisibleDeprecationWarning, stacklevel=2)

2371

2372 def encode_unicode_cols(row_tup):

2373 row = list(row_tup)

2374 for i in strcolidx:

2375 row[i] = row[i].encode('latin1')

2376 return tuple(row)

2377

2378 try:

2379 data = [encode_unicode_cols(r) for r in data]

2380 except UnicodeEncodeError:

2381 pass

2382 else:

2383 for i in strcolidx:

2384 column_types[i] = np.bytes_

2385

2386 # Update string types to be the right length

2387 sized_column_types = column_types[:]

2388 for i, col_type in enumerate(column_types):

2389 if np.issubdtype(col_type, np.character):

2390 n_chars = max(len(row[i]) for row in data)

2391 sized_column_types[i] = (col_type, n_chars)

2392

2393 if names is None:

2394 # If the dtype is uniform (before sizing strings)

2395 base = {

2396 c_type

2397 for c, c_type in zip(converters, column_types)

2398 if c._checked}

2399 if len(base) == 1:

2400 uniform_type, = base

2401 (ddtype, mdtype) = (uniform_type, bool)

2402 else:

2403 ddtype = [(defaultfmt % i, dt)

2404 for (i, dt) in enumerate(sized_column_types)]

2405 if usemask:

2406 mdtype = [(defaultfmt % i, bool)

2407 for (i, dt) in enumerate(sized_column_types)]

2408 else:

2409 ddtype = list(zip(names, sized_column_types))

2410 mdtype = list(zip(names, [bool] * len(sized_column_types)))

2411 output = np.array(data, dtype=ddtype)

2412 if usemask:

2413 outputmask = np.array(masks, dtype=mdtype)

2414 else:

2415 # Overwrite the initial dtype names if needed

2416 if names and dtype.names is not None:

2417 dtype.names = names

2418 # Case 1. We have a structured type

2419 if len(dtype_flat) > 1:

2420 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]

2421 # First, create the array using a flattened dtype:

2422 # [('a', int), ('b1', int), ('b2', float)]

2423 # Then, view the array using the specified dtype.

2424 if 'O' in (_.char for _ in dtype_flat):

2425 if has_nested_fields(dtype):

2426 raise NotImplementedError(

2427 "Nested fields involving objects are not supported...")

2428 else:

2429 output = np.array(data, dtype=dtype)

2430 else:

2431 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])

2432 output = rows.view(dtype)

2433 # Now, process the rowmasks the same way

2434 if usemask:

2435 rowmasks = np.array(

2436 masks, dtype=np.dtype([('', bool) for t in dtype_flat]))

2437 # Construct the new dtype

2438 mdtype = make_mask_descr(dtype)

2439 outputmask = rowmasks.view(mdtype)

2440 # Case #2. We have a basic dtype

2441 else:

2442 # We used some user-defined converters

2443 if user_converters:

2444 ishomogeneous = True

2445 descr = []

2446 for i, ttype in enumerate([conv.type for conv in converters]):

2447 # Keep the dtype of the current converter

2448 if i in user_converters:

2449 ishomogeneous &= (ttype == dtype.type)

2450 if np.issubdtype(ttype, np.character):

2451 ttype = (ttype, max(len(row[i]) for row in data))

2452 descr.append(('', ttype))

2453 else:

2454 descr.append(('', dtype))

2455 # So we changed the dtype ?

2456 if not ishomogeneous:

2457 # We have more than one field

2458 if len(descr) > 1:

2459 dtype = np.dtype(descr)

2460 # We have only one field: drop the name if not needed.

2461 else:

2462 dtype = np.dtype(ttype)

2463 #

2464 output = np.array(data, dtype)

2465 if usemask:

2466 if dtype.names is not None:

2467 mdtype = [(_, bool) for _ in dtype.names]

2468 else:

2469 mdtype = bool

2470 outputmask = np.array(masks, dtype=mdtype)

2471 # Try to take care of the missing data we missed

2472 names = output.dtype.names

2473 if usemask and names:

2474 for (name, conv) in zip(names, converters):

2475 missing_values = [conv(_) for _ in conv.missing_values

2476 if _ != '']

2477 for mval in missing_values:

2478 outputmask[name] |= (output[name] == mval)

2479 # Construct the final array

2480 if usemask:

2481 output = output.view(MaskedArray)

2482 output._mask = outputmask

2483

2484 output = _ensure_ndmin_ndarray(output, ndmin=ndmin)

2485

2486 if unpack:

2487 if names is None:

2488 return output.T

2489 elif len(names) == 1:

2490 # squeeze single-name dtypes too

2491 return output[names[0]]

2492 else:

2493 # For structured arrays with multiple fields,

2494 # return an array for each field.

2495 return [output[field] for field in names]

2496 return output

2497

2498

2499_genfromtxt_with_like = array_function_dispatch()(genfromtxt)

2500

2501

2502def recfromtxt(fname, **kwargs):

2503 """

2504 Load ASCII data from a file and return it in a record array.

2505

2506 If ``usemask=False`` a standard `recarray` is returned,

2507 if ``usemask=True`` a MaskedRecords array is returned.

2508

2509 .. deprecated:: 2.0

2510 Use `numpy.genfromtxt` instead.

2511

2512 Parameters

2513 ----------

2514 fname, kwargs : For a description of input parameters, see `genfromtxt`.

2515

2516 See Also

2517 --------

2518 numpy.genfromtxt : generic function

2519

2520 Notes

2521 -----

2522 By default, `dtype` is None, which means that the data-type of the output

2523 array will be determined from the data.

2524

2525 """

2526

2527 # Deprecated in NumPy 2.0, 2023-07-11

2528 warnings.warn(

2529 "`recfromtxt` is deprecated, "

2530 "use `numpy.genfromtxt` instead."

2531 "(deprecated in NumPy 2.0)",

2532 DeprecationWarning,

2533 stacklevel=2

2534 )

2535

2536 kwargs.setdefault("dtype", None)

2537 usemask = kwargs.get('usemask', False)

2538 output = genfromtxt(fname, **kwargs)

2539 if usemask:

2540 from numpy.ma.mrecords import MaskedRecords

2541 output = output.view(MaskedRecords)

2542 else:

2543 output = output.view(np.recarray)

2544 return output

2545

2546

2547def recfromcsv(fname, **kwargs):

2548 """

2549 Load ASCII data stored in a comma-separated file.

2550

2551 The returned array is a record array (if ``usemask=False``, see

2552 `recarray`) or a masked record array (if ``usemask=True``,

2553 see `ma.mrecords.MaskedRecords`).

2554

2555 .. deprecated:: 2.0

2556 Use `numpy.genfromtxt` with comma as `delimiter` instead.

2557

2558 Parameters

2559 ----------

2560 fname, kwargs : For a description of input parameters, see `genfromtxt`.

2561

2562 See Also

2563 --------

2564 numpy.genfromtxt : generic function to load ASCII data.

2565

2566 Notes

2567 -----

2568 By default, `dtype` is None, which means that the data-type of the output

2569 array will be determined from the data.

2570

2571 """

2572

2573 # Deprecated in NumPy 2.0, 2023-07-11

2574 warnings.warn(

2575 "`recfromcsv` is deprecated, "

2576 "use `numpy.genfromtxt` with comma as `delimiter` instead. "

2577 "(deprecated in NumPy 2.0)",

2578 DeprecationWarning,

2579 stacklevel=2

2580 )

2581

2582 # Set default kwargs for genfromtxt as relevant to csv import.

2583 kwargs.setdefault("case_sensitive", "lower")

2584 kwargs.setdefault("names", True)

2585 kwargs.setdefault("delimiter", ",")

2586 kwargs.setdefault("dtype", None)

2587 output = genfromtxt(fname, **kwargs)

2588

2589 usemask = kwargs.get("usemask", False)

2590 if usemask:

2591 from numpy.ma.mrecords import MaskedRecords

2592 output = output.view(MaskedRecords)

2593 else:

2594 output = output.view(np.recarray)

2595 return output

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_npyio_impl.py: 10%

806 statements