Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_npyio_impl.py: 16%

802 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-09 06:12 +0000

1""" 

2IO related functions. 

3""" 

4import os 

5import re 

6import functools 

7import itertools 

8import warnings 

9import weakref 

10import contextlib 

11import operator 

12from operator import itemgetter, index as opindex, methodcaller 

13from collections.abc import Mapping 

14import pickle 

15 

16import numpy as np 

17from . import format 

18from ._datasource import DataSource 

19from numpy._core import overrides 

20from numpy._core.multiarray import packbits, unpackbits 

21from numpy._core._multiarray_umath import _load_from_filelike 

22from numpy._core.overrides import set_array_function_like_doc, set_module 

23from ._iotools import ( 

24 LineSplitter, NameValidator, StringConverter, ConverterError, 

25 ConverterLockError, ConversionWarning, _is_string_like, 

26 has_nested_fields, flatten_dtype, easy_dtype, _decode_line 

27 ) 

28from numpy._utils import asunicode, asbytes 

29 

30 

31__all__ = [ 

32 'savetxt', 'loadtxt', 'genfromtxt', 'load', 'save', 'savez', 

33 'savez_compressed', 'packbits', 'unpackbits', 'fromregex' 

34 ] 

35 

36 

37array_function_dispatch = functools.partial( 

38 overrides.array_function_dispatch, module='numpy') 

39 

40 

41class BagObj: 

42 """ 

43 BagObj(obj) 

44 

45 Convert attribute look-ups to getitems on the object passed in. 

46 

47 Parameters 

48 ---------- 

49 obj : class instance 

50 Object on which attribute look-up is performed. 

51 

52 Examples 

53 -------- 

54 >>> from numpy.lib._npyio_impl import BagObj as BO 

55 >>> class BagDemo: 

56 ... def __getitem__(self, key): # An instance of BagObj(BagDemo) 

57 ... # will call this method when any 

58 ... # attribute look-up is required 

59 ... result = "Doesn't matter what you want, " 

60 ... return result + "you're gonna get this" 

61 ... 

62 >>> demo_obj = BagDemo() 

63 >>> bagobj = BO(demo_obj) 

64 >>> bagobj.hello_there 

65 "Doesn't matter what you want, you're gonna get this" 

66 >>> bagobj.I_can_be_anything 

67 "Doesn't matter what you want, you're gonna get this" 

68 

69 """ 

70 

71 def __init__(self, obj): 

72 # Use weakref to make NpzFile objects collectable by refcount 

73 self._obj = weakref.proxy(obj) 

74 

75 def __getattribute__(self, key): 

76 try: 

77 return object.__getattribute__(self, '_obj')[key] 

78 except KeyError: 

79 raise AttributeError(key) from None 

80 

81 def __dir__(self): 

82 """ 

83 Enables dir(bagobj) to list the files in an NpzFile. 

84 

85 This also enables tab-completion in an interpreter or IPython. 

86 """ 

87 return list(object.__getattribute__(self, '_obj').keys()) 

88 

89 

90def zipfile_factory(file, *args, **kwargs): 

91 """ 

92 Create a ZipFile. 

93 

94 Allows for Zip64, and the `file` argument can accept file, str, or 

95 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile 

96 constructor. 

97 """ 

98 if not hasattr(file, 'read'): 

99 file = os.fspath(file) 

100 import zipfile 

101 kwargs['allowZip64'] = True 

102 return zipfile.ZipFile(file, *args, **kwargs) 

103 

104 

105@set_module('numpy.lib.npyio') 

106class NpzFile(Mapping): 

107 """ 

108 NpzFile(fid) 

109 

110 A dictionary-like object with lazy-loading of files in the zipped 

111 archive provided on construction. 

112 

113 `NpzFile` is used to load files in the NumPy ``.npz`` data archive 

114 format. It assumes that files in the archive have a ``.npy`` extension, 

115 other files are ignored. 

116 

117 The arrays and file strings are lazily loaded on either 

118 getitem access using ``obj['key']`` or attribute lookup using 

119 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can 

120 be obtained with ``obj.files`` and the ZipFile object itself using 

121 ``obj.zip``. 

122 

123 Attributes 

124 ---------- 

125 files : list of str 

126 List of all files in the archive with a ``.npy`` extension. 

127 zip : ZipFile instance 

128 The ZipFile object initialized with the zipped archive. 

129 f : BagObj instance 

130 An object on which attribute can be performed as an alternative 

131 to getitem access on the `NpzFile` instance itself. 

132 allow_pickle : bool, optional 

133 Allow loading pickled data. Default: False 

134 

135 .. versionchanged:: 1.16.3 

136 Made default False in response to CVE-2019-6446. 

137 

138 pickle_kwargs : dict, optional 

139 Additional keyword arguments to pass on to pickle.load. 

140 These are only useful when loading object arrays saved on 

141 Python 2 when using Python 3. 

142 max_header_size : int, optional 

143 Maximum allowed size of the header. Large headers may not be safe 

144 to load securely and thus require explicitly passing a larger value. 

145 See :py:func:`ast.literal_eval()` for details. 

146 This option is ignored when `allow_pickle` is passed. In that case 

147 the file is by definition trusted and the limit is unnecessary. 

148 

149 Parameters 

150 ---------- 

151 fid : file, str, or pathlib.Path 

152 The zipped archive to open. This is either a file-like object 

153 or a string containing the path to the archive. 

154 own_fid : bool, optional 

155 Whether NpzFile should close the file handle. 

156 Requires that `fid` is a file-like object. 

157 

158 Examples 

159 -------- 

160 >>> from tempfile import TemporaryFile 

161 >>> outfile = TemporaryFile() 

162 >>> x = np.arange(10) 

163 >>> y = np.sin(x) 

164 >>> np.savez(outfile, x=x, y=y) 

165 >>> _ = outfile.seek(0) 

166 

167 >>> npz = np.load(outfile) 

168 >>> isinstance(npz, np.lib.npyio.NpzFile) 

169 True 

170 >>> npz 

171 NpzFile 'object' with keys: x, y 

172 >>> sorted(npz.files) 

173 ['x', 'y'] 

174 >>> npz['x'] # getitem access 

175 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

176 >>> npz.f.x # attribute lookup 

177 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

178 

179 """ 

180 # Make __exit__ safe if zipfile_factory raises an exception 

181 zip = None 

182 fid = None 

183 _MAX_REPR_ARRAY_COUNT = 5 

184 

185 def __init__(self, fid, own_fid=False, allow_pickle=False, 

186 pickle_kwargs=None, *, 

187 max_header_size=format._MAX_HEADER_SIZE): 

188 # Import is postponed to here since zipfile depends on gzip, an 

189 # optional component of the so-called standard library. 

190 _zip = zipfile_factory(fid) 

191 self._files = _zip.namelist() 

192 self.files = [] 

193 self.allow_pickle = allow_pickle 

194 self.max_header_size = max_header_size 

195 self.pickle_kwargs = pickle_kwargs 

196 for x in self._files: 

197 if x.endswith('.npy'): 

198 self.files.append(x[:-4]) 

199 else: 

200 self.files.append(x) 

201 self.zip = _zip 

202 self.f = BagObj(self) 

203 if own_fid: 

204 self.fid = fid 

205 

206 def __enter__(self): 

207 return self 

208 

209 def __exit__(self, exc_type, exc_value, traceback): 

210 self.close() 

211 

212 def close(self): 

213 """ 

214 Close the file. 

215 

216 """ 

217 if self.zip is not None: 

218 self.zip.close() 

219 self.zip = None 

220 if self.fid is not None: 

221 self.fid.close() 

222 self.fid = None 

223 self.f = None # break reference cycle 

224 

225 def __del__(self): 

226 self.close() 

227 

228 # Implement the Mapping ABC 

229 def __iter__(self): 

230 return iter(self.files) 

231 

232 def __len__(self): 

233 return len(self.files) 

234 

235 def __getitem__(self, key): 

236 # FIXME: This seems like it will copy strings around 

237 # more than is strictly necessary. The zipfile 

238 # will read the string and then 

239 # the format.read_array will copy the string 

240 # to another place in memory. 

241 # It would be better if the zipfile could read 

242 # (or at least uncompress) the data 

243 # directly into the array memory. 

244 member = False 

245 if key in self._files: 

246 member = True 

247 elif key in self.files: 

248 member = True 

249 key += '.npy' 

250 if member: 

251 bytes = self.zip.open(key) 

252 magic = bytes.read(len(format.MAGIC_PREFIX)) 

253 bytes.close() 

254 if magic == format.MAGIC_PREFIX: 

255 bytes = self.zip.open(key) 

256 return format.read_array(bytes, 

257 allow_pickle=self.allow_pickle, 

258 pickle_kwargs=self.pickle_kwargs, 

259 max_header_size=self.max_header_size) 

260 else: 

261 return self.zip.read(key) 

262 else: 

263 raise KeyError(f"{key} is not a file in the archive") 

264 

265 def __contains__(self, key): 

266 return (key in self._files or key in self.files) 

267 

268 def __repr__(self): 

269 # Get filename or default to `object` 

270 if isinstance(self.fid, str): 

271 filename = self.fid 

272 else: 

273 filename = getattr(self.fid, "name", "object") 

274 

275 # Get the name of arrays 

276 array_names = ', '.join(self.files[:self._MAX_REPR_ARRAY_COUNT]) 

277 if len(self.files) > self._MAX_REPR_ARRAY_COUNT: 

278 array_names += "..." 

279 return f"NpzFile {filename!r} with keys: {array_names}" 

280 

281 # Work around problems with the docstrings in the Mapping methods 

282 # They contain a `->`, which confuses the type annotation interpretations 

283 # of sphinx-docs. See gh-25964 

284 

285 def get(self, key, default=None, /): 

286 """ 

287 D.get(k,[,d]) returns D[k] if k in D, else d. d defaults to None. 

288 """ 

289 return Mapping.get(self, key, default) 

290 

291 def items(self): 

292 """ 

293 D.items() returns a set-like object providing a view on the items 

294 """ 

295 return Mapping.items(self) 

296 

297 def keys(self): 

298 """ 

299 D.keys() returns a set-like object providing a view on the keys 

300 """ 

301 return Mapping.keys(self) 

302 

303 def values(self): 

304 """ 

305 D.values() returns a set-like object providing a view on the values 

306 """ 

307 return Mapping.values(self) 

308 

309 

310@set_module('numpy') 

311def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, 

312 encoding='ASCII', *, max_header_size=format._MAX_HEADER_SIZE): 

313 """ 

314 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. 

315 

316 .. warning:: Loading files that contain object arrays uses the ``pickle`` 

317 module, which is not secure against erroneous or maliciously 

318 constructed data. Consider passing ``allow_pickle=False`` to 

319 load data that is known not to contain object arrays for the 

320 safer handling of untrusted sources. 

321 

322 Parameters 

323 ---------- 

324 file : file-like object, string, or pathlib.Path 

325 The file to read. File-like objects must support the 

326 ``seek()`` and ``read()`` methods and must always 

327 be opened in binary mode. Pickled files require that the 

328 file-like object support the ``readline()`` method as well. 

329 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional 

330 If not None, then memory-map the file, using the given mode (see 

331 `numpy.memmap` for a detailed description of the modes). A 

332 memory-mapped array is kept on disk. However, it can be accessed 

333 and sliced like any ndarray. Memory mapping is especially useful 

334 for accessing small fragments of large files without reading the 

335 entire file into memory. 

336 allow_pickle : bool, optional 

337 Allow loading pickled object arrays stored in npy files. Reasons for 

338 disallowing pickles include security, as loading pickled data can 

339 execute arbitrary code. If pickles are disallowed, loading object 

340 arrays will fail. Default: False 

341 

342 .. versionchanged:: 1.16.3 

343 Made default False in response to CVE-2019-6446. 

344 

345 fix_imports : bool, optional 

346 Only useful when loading Python 2 generated pickled files on Python 3, 

347 which includes npy/npz files containing object arrays. If `fix_imports` 

348 is True, pickle will try to map the old Python 2 names to the new names 

349 used in Python 3. 

350 encoding : str, optional 

351 What encoding to use when reading Python 2 strings. Only useful when 

352 loading Python 2 generated pickled files in Python 3, which includes 

353 npy/npz files containing object arrays. Values other than 'latin1', 

354 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical 

355 data. Default: 'ASCII' 

356 max_header_size : int, optional 

357 Maximum allowed size of the header. Large headers may not be safe 

358 to load securely and thus require explicitly passing a larger value. 

359 See :py:func:`ast.literal_eval()` for details. 

360 This option is ignored when `allow_pickle` is passed. In that case 

361 the file is by definition trusted and the limit is unnecessary. 

362 

363 Returns 

364 ------- 

365 result : array, tuple, dict, etc. 

366 Data stored in the file. For ``.npz`` files, the returned instance 

367 of NpzFile class must be closed to avoid leaking file descriptors. 

368 

369 Raises 

370 ------ 

371 OSError 

372 If the input file does not exist or cannot be read. 

373 UnpicklingError 

374 If ``allow_pickle=True``, but the file cannot be loaded as a pickle. 

375 ValueError 

376 The file contains an object array, but ``allow_pickle=False`` given. 

377 EOFError 

378 When calling ``np.load`` multiple times on the same file handle, 

379 if all data has already been read 

380 

381 See Also 

382 -------- 

383 save, savez, savez_compressed, loadtxt 

384 memmap : Create a memory-map to an array stored in a file on disk. 

385 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

386 

387 Notes 

388 ----- 

389 - If the file contains pickle data, then whatever object is stored 

390 in the pickle is returned. 

391 - If the file is a ``.npy`` file, then a single array is returned. 

392 - If the file is a ``.npz`` file, then a dictionary-like object is 

393 returned, containing ``{filename: array}`` key-value pairs, one for 

394 each file in the archive. 

395 - If the file is a ``.npz`` file, the returned value supports the 

396 context manager protocol in a similar fashion to the open function:: 

397 

398 with load('foo.npz') as data: 

399 a = data['a'] 

400 

401 The underlying file descriptor is closed when exiting the 'with' 

402 block. 

403 

404 Examples 

405 -------- 

406 Store data to disk, and load it again: 

407 

408 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) 

409 >>> np.load('/tmp/123.npy') 

410 array([[1, 2, 3], 

411 [4, 5, 6]]) 

412 

413 Store compressed data to disk, and load it again: 

414 

415 >>> a=np.array([[1, 2, 3], [4, 5, 6]]) 

416 >>> b=np.array([1, 2]) 

417 >>> np.savez('/tmp/123.npz', a=a, b=b) 

418 >>> data = np.load('/tmp/123.npz') 

419 >>> data['a'] 

420 array([[1, 2, 3], 

421 [4, 5, 6]]) 

422 >>> data['b'] 

423 array([1, 2]) 

424 >>> data.close() 

425 

426 Mem-map the stored array, and then access the second row 

427 directly from disk: 

428 

429 >>> X = np.load('/tmp/123.npy', mmap_mode='r') 

430 >>> X[1, :] 

431 memmap([4, 5, 6]) 

432 

433 """ 

434 if encoding not in ('ASCII', 'latin1', 'bytes'): 

435 # The 'encoding' value for pickle also affects what encoding 

436 # the serialized binary data of NumPy arrays is loaded 

437 # in. Pickle does not pass on the encoding information to 

438 # NumPy. The unpickling code in numpy._core.multiarray is 

439 # written to assume that unicode data appearing where binary 

440 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'. 

441 # 

442 # Other encoding values can corrupt binary data, and we 

443 # purposefully disallow them. For the same reason, the errors= 

444 # argument is not exposed, as values other than 'strict' 

445 # result can similarly silently corrupt numerical data. 

446 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'") 

447 

448 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports) 

449 

450 with contextlib.ExitStack() as stack: 

451 if hasattr(file, 'read'): 

452 fid = file 

453 own_fid = False 

454 else: 

455 fid = stack.enter_context(open(os.fspath(file), "rb")) 

456 own_fid = True 

457 

458 # Code to distinguish from NumPy binary files and pickles. 

459 _ZIP_PREFIX = b'PK\x03\x04' 

460 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this 

461 N = len(format.MAGIC_PREFIX) 

462 magic = fid.read(N) 

463 if not magic: 

464 raise EOFError("No data left in file") 

465 # If the file size is less than N, we need to make sure not 

466 # to seek past the beginning of the file 

467 fid.seek(-min(N, len(magic)), 1) # back-up 

468 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): 

469 # zip-file (assume .npz) 

470 # Potentially transfer file ownership to NpzFile 

471 stack.pop_all() 

472 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle, 

473 pickle_kwargs=pickle_kwargs, 

474 max_header_size=max_header_size) 

475 return ret 

476 elif magic == format.MAGIC_PREFIX: 

477 # .npy file 

478 if mmap_mode: 

479 if allow_pickle: 

480 max_header_size = 2**64 

481 return format.open_memmap(file, mode=mmap_mode, 

482 max_header_size=max_header_size) 

483 else: 

484 return format.read_array(fid, allow_pickle=allow_pickle, 

485 pickle_kwargs=pickle_kwargs, 

486 max_header_size=max_header_size) 

487 else: 

488 # Try a pickle 

489 if not allow_pickle: 

490 raise ValueError("Cannot load file containing pickled data " 

491 "when allow_pickle=False") 

492 try: 

493 return pickle.load(fid, **pickle_kwargs) 

494 except Exception as e: 

495 raise pickle.UnpicklingError( 

496 f"Failed to interpret file {file!r} as a pickle") from e 

497 

498 

499def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None): 

500 return (arr,) 

501 

502 

503@array_function_dispatch(_save_dispatcher) 

504def save(file, arr, allow_pickle=True, fix_imports=True): 

505 """ 

506 Save an array to a binary file in NumPy ``.npy`` format. 

507 

508 Parameters 

509 ---------- 

510 file : file, str, or pathlib.Path 

511 File or filename to which the data is saved. If file is a file-object, 

512 then the filename is unchanged. If file is a string or Path, 

513 a ``.npy`` extension will be appended to the filename if it does not 

514 already have one. 

515 arr : array_like 

516 Array data to be saved. 

517 allow_pickle : bool, optional 

518 Allow saving object arrays using Python pickles. Reasons for 

519 disallowing pickles include security (loading pickled data can execute 

520 arbitrary code) and portability (pickled objects may not be loadable 

521 on different Python installations, for example if the stored objects 

522 require libraries that are not available, and not all pickled data is 

523 compatible between Python 2 and Python 3). 

524 Default: True 

525 fix_imports : bool, optional 

526 Only useful in forcing objects in object arrays on Python 3 to be 

527 pickled in a Python 2 compatible way. If `fix_imports` is True, pickle 

528 will try to map the new Python 3 names to the old module names used in 

529 Python 2, so that the pickle data stream is readable with Python 2. 

530 

531 See Also 

532 -------- 

533 savez : Save several arrays into a ``.npz`` archive 

534 savetxt, load 

535 

536 Notes 

537 ----- 

538 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. 

539 

540 Any data saved to the file is appended to the end of the file. 

541 

542 Examples 

543 -------- 

544 >>> from tempfile import TemporaryFile 

545 >>> outfile = TemporaryFile() 

546 

547 >>> x = np.arange(10) 

548 >>> np.save(outfile, x) 

549 

550 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file 

551 >>> np.load(outfile) 

552 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

553 

554 

555 >>> with open('test.npy', 'wb') as f: 

556 ... np.save(f, np.array([1, 2])) 

557 ... np.save(f, np.array([1, 3])) 

558 >>> with open('test.npy', 'rb') as f: 

559 ... a = np.load(f) 

560 ... b = np.load(f) 

561 >>> print(a, b) 

562 # [1 2] [1 3] 

563 """ 

564 if hasattr(file, 'write'): 

565 file_ctx = contextlib.nullcontext(file) 

566 else: 

567 file = os.fspath(file) 

568 if not file.endswith('.npy'): 

569 file = file + '.npy' 

570 file_ctx = open(file, "wb") 

571 

572 with file_ctx as fid: 

573 arr = np.asanyarray(arr) 

574 format.write_array(fid, arr, allow_pickle=allow_pickle, 

575 pickle_kwargs=dict(fix_imports=fix_imports)) 

576 

577 

578def _savez_dispatcher(file, *args, **kwds): 

579 yield from args 

580 yield from kwds.values() 

581 

582 

583@array_function_dispatch(_savez_dispatcher) 

584def savez(file, *args, **kwds): 

585 """Save several arrays into a single file in uncompressed ``.npz`` format. 

586 

587 Provide arrays as keyword arguments to store them under the 

588 corresponding name in the output file: ``savez(fn, x=x, y=y)``. 

589 

590 If arrays are specified as positional arguments, i.e., ``savez(fn, 

591 x, y)``, their names will be `arr_0`, `arr_1`, etc. 

592 

593 Parameters 

594 ---------- 

595 file : file, str, or pathlib.Path 

596 Either the filename (string) or an open file (file-like object) 

597 where the data will be saved. If file is a string or a Path, the 

598 ``.npz`` extension will be appended to the filename if it is not 

599 already there. 

600 args : Arguments, optional 

601 Arrays to save to the file. Please use keyword arguments (see 

602 `kwds` below) to assign names to arrays. Arrays specified as 

603 args will be named "arr_0", "arr_1", and so on. 

604 kwds : Keyword arguments, optional 

605 Arrays to save to the file. Each array will be saved to the 

606 output file with its corresponding keyword name. 

607 

608 Returns 

609 ------- 

610 None 

611 

612 See Also 

613 -------- 

614 save : Save a single array to a binary file in NumPy format. 

615 savetxt : Save an array to a file as plain text. 

616 savez_compressed : Save several arrays into a compressed ``.npz`` archive 

617 

618 Notes 

619 ----- 

620 The ``.npz`` file format is a zipped archive of files named after the 

621 variables they contain. The archive is not compressed and each file 

622 in the archive contains one variable in ``.npy`` format. For a 

623 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. 

624 

625 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile` 

626 object is returned. This is a dictionary-like object which can be queried 

627 for its list of arrays (with the ``.files`` attribute), and for the arrays 

628 themselves. 

629 

630 Keys passed in `kwds` are used as filenames inside the ZIP archive. 

631 Therefore, keys should be valid filenames; e.g., avoid keys that begin with 

632 ``/`` or contain ``.``. 

633 

634 When naming variables with keyword arguments, it is not possible to name a 

635 variable ``file``, as this would cause the ``file`` argument to be defined 

636 twice in the call to ``savez``. 

637 

638 Examples 

639 -------- 

640 >>> from tempfile import TemporaryFile 

641 >>> outfile = TemporaryFile() 

642 >>> x = np.arange(10) 

643 >>> y = np.sin(x) 

644 

645 Using `savez` with \\*args, the arrays are saved with default names. 

646 

647 >>> np.savez(outfile, x, y) 

648 >>> _ = outfile.seek(0) # Only needed to simulate closing & reopening file 

649 >>> npzfile = np.load(outfile) 

650 >>> npzfile.files 

651 ['arr_0', 'arr_1'] 

652 >>> npzfile['arr_0'] 

653 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

654 

655 Using `savez` with \\**kwds, the arrays are saved with the keyword names. 

656 

657 >>> outfile = TemporaryFile() 

658 >>> np.savez(outfile, x=x, y=y) 

659 >>> _ = outfile.seek(0) 

660 >>> npzfile = np.load(outfile) 

661 >>> sorted(npzfile.files) 

662 ['x', 'y'] 

663 >>> npzfile['x'] 

664 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

665 

666 """ 

667 _savez(file, args, kwds, False) 

668 

669 

670def _savez_compressed_dispatcher(file, *args, **kwds): 

671 yield from args 

672 yield from kwds.values() 

673 

674 

675@array_function_dispatch(_savez_compressed_dispatcher) 

676def savez_compressed(file, *args, **kwds): 

677 """ 

678 Save several arrays into a single file in compressed ``.npz`` format. 

679 

680 Provide arrays as keyword arguments to store them under the 

681 corresponding name in the output file: ``savez_compressed(fn, x=x, y=y)``. 

682 

683 If arrays are specified as positional arguments, i.e., 

684 ``savez_compressed(fn, x, y)``, their names will be `arr_0`, `arr_1`, etc. 

685 

686 Parameters 

687 ---------- 

688 file : file, str, or pathlib.Path 

689 Either the filename (string) or an open file (file-like object) 

690 where the data will be saved. If file is a string or a Path, the 

691 ``.npz`` extension will be appended to the filename if it is not 

692 already there. 

693 args : Arguments, optional 

694 Arrays to save to the file. Please use keyword arguments (see 

695 `kwds` below) to assign names to arrays. Arrays specified as 

696 args will be named "arr_0", "arr_1", and so on. 

697 kwds : Keyword arguments, optional 

698 Arrays to save to the file. Each array will be saved to the 

699 output file with its corresponding keyword name. 

700 

701 Returns 

702 ------- 

703 None 

704 

705 See Also 

706 -------- 

707 numpy.save : Save a single array to a binary file in NumPy format. 

708 numpy.savetxt : Save an array to a file as plain text. 

709 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format 

710 numpy.load : Load the files created by savez_compressed. 

711 

712 Notes 

713 ----- 

714 The ``.npz`` file format is a zipped archive of files named after the 

715 variables they contain. The archive is compressed with 

716 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable 

717 in ``.npy`` format. For a description of the ``.npy`` format, see 

718 :py:mod:`numpy.lib.format`. 

719 

720 

721 When opening the saved ``.npz`` file with `load` a `~lib.npyio.NpzFile` 

722 object is returned. This is a dictionary-like object which can be queried 

723 for its list of arrays (with the ``.files`` attribute), and for the arrays 

724 themselves. 

725 

726 Examples 

727 -------- 

728 >>> test_array = np.random.rand(3, 2) 

729 >>> test_vector = np.random.rand(4) 

730 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector) 

731 >>> loaded = np.load('/tmp/123.npz') 

732 >>> print(np.array_equal(test_array, loaded['a'])) 

733 True 

734 >>> print(np.array_equal(test_vector, loaded['b'])) 

735 True 

736 

737 """ 

738 _savez(file, args, kwds, True) 

739 

740 

741def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): 

742 # Import is postponed to here since zipfile depends on gzip, an optional 

743 # component of the so-called standard library. 

744 import zipfile 

745 

746 if not hasattr(file, 'write'): 

747 file = os.fspath(file) 

748 if not file.endswith('.npz'): 

749 file = file + '.npz' 

750 

751 namedict = kwds 

752 for i, val in enumerate(args): 

753 key = 'arr_%d' % i 

754 if key in namedict.keys(): 

755 raise ValueError( 

756 "Cannot use un-named variables and keyword %s" % key) 

757 namedict[key] = val 

758 

759 if compress: 

760 compression = zipfile.ZIP_DEFLATED 

761 else: 

762 compression = zipfile.ZIP_STORED 

763 

764 zipf = zipfile_factory(file, mode="w", compression=compression) 

765 

766 for key, val in namedict.items(): 

767 fname = key + '.npy' 

768 val = np.asanyarray(val) 

769 # always force zip64, gh-10776 

770 with zipf.open(fname, 'w', force_zip64=True) as fid: 

771 format.write_array(fid, val, 

772 allow_pickle=allow_pickle, 

773 pickle_kwargs=pickle_kwargs) 

774 

775 zipf.close() 

776 

777 

778def _ensure_ndmin_ndarray_check_param(ndmin): 

779 """Just checks if the param ndmin is supported on 

780 _ensure_ndmin_ndarray. It is intended to be used as 

781 verification before running anything expensive. 

782 e.g. loadtxt, genfromtxt 

783 """ 

784 # Check correctness of the values of `ndmin` 

785 if ndmin not in [0, 1, 2]: 

786 raise ValueError(f"Illegal value of ndmin keyword: {ndmin}") 

787 

788def _ensure_ndmin_ndarray(a, *, ndmin: int): 

789 """This is a helper function of loadtxt and genfromtxt to ensure 

790 proper minimum dimension as requested 

791 

792 ndim : int. Supported values 1, 2, 3 

793 ^^ whenever this changes, keep in sync with 

794 _ensure_ndmin_ndarray_check_param 

795 """ 

796 # Verify that the array has at least dimensions `ndmin`. 

797 # Tweak the size and shape of the arrays - remove extraneous dimensions 

798 if a.ndim > ndmin: 

799 a = np.squeeze(a) 

800 # and ensure we have the minimum number of dimensions asked for 

801 # - has to be in this order for the odd case ndmin=1, a.squeeze().ndim=0 

802 if a.ndim < ndmin: 

803 if ndmin == 1: 

804 a = np.atleast_1d(a) 

805 elif ndmin == 2: 

806 a = np.atleast_2d(a).T 

807 

808 return a 

809 

810 

811# amount of lines loadtxt reads in one chunk, can be overridden for testing 

812_loadtxt_chunksize = 50000 

813 

814 

815def _check_nonneg_int(value, name="argument"): 

816 try: 

817 operator.index(value) 

818 except TypeError: 

819 raise TypeError(f"{name} must be an integer") from None 

820 if value < 0: 

821 raise ValueError(f"{name} must be nonnegative") 

822 

823 

824def _preprocess_comments(iterable, comments, encoding): 

825 """ 

826 Generator that consumes a line iterated iterable and strips out the 

827 multiple (or multi-character) comments from lines. 

828 This is a pre-processing step to achieve feature parity with loadtxt 

829 (we assume that this feature is a nieche feature). 

830 """ 

831 for line in iterable: 

832 if isinstance(line, bytes): 

833 # Need to handle conversion here, or the splitting would fail 

834 line = line.decode(encoding) 

835 

836 for c in comments: 

837 line = line.split(c, 1)[0] 

838 

839 yield line 

840 

841 

842# The number of rows we read in one go if confronted with a parametric dtype 

843_loadtxt_chunksize = 50000 

844 

845 

846def _read(fname, *, delimiter=',', comment='#', quote='"', 

847 imaginary_unit='j', usecols=None, skiplines=0, 

848 max_rows=None, converters=None, ndmin=None, unpack=False, 

849 dtype=np.float64, encoding=None): 

850 r""" 

851 Read a NumPy array from a text file. 

852 This is a helper function for loadtxt. 

853 

854 Parameters 

855 ---------- 

856 fname : file, str, or pathlib.Path 

857 The filename or the file to be read. 

858 delimiter : str, optional 

859 Field delimiter of the fields in line of the file. 

860 Default is a comma, ','. If None any sequence of whitespace is 

861 considered a delimiter. 

862 comment : str or sequence of str or None, optional 

863 Character that begins a comment. All text from the comment 

864 character to the end of the line is ignored. 

865 Multiple comments or multiple-character comment strings are supported, 

866 but may be slower and `quote` must be empty if used. 

867 Use None to disable all use of comments. 

868 quote : str or None, optional 

869 Character that is used to quote string fields. Default is '"' 

870 (a double quote). Use None to disable quote support. 

871 imaginary_unit : str, optional 

872 Character that represent the imaginary unit `sqrt(-1)`. 

873 Default is 'j'. 

874 usecols : array_like, optional 

875 A one-dimensional array of integer column numbers. These are the 

876 columns from the file to be included in the array. If this value 

877 is not given, all the columns are used. 

878 skiplines : int, optional 

879 Number of lines to skip before interpreting the data in the file. 

880 max_rows : int, optional 

881 Maximum number of rows of data to read. Default is to read the 

882 entire file. 

883 converters : dict or callable, optional 

884 A function to parse all columns strings into the desired value, or 

885 a dictionary mapping column number to a parser function. 

886 E.g. if column 0 is a date string: ``converters = {0: datestr2num}``. 

887 Converters can also be used to provide a default value for missing 

888 data, e.g. ``converters = lambda s: float(s.strip() or 0)`` will 

889 convert empty fields to 0. 

890 Default: None 

891 ndmin : int, optional 

892 Minimum dimension of the array returned. 

893 Allowed values are 0, 1 or 2. Default is 0. 

894 unpack : bool, optional 

895 If True, the returned array is transposed, so that arguments may be 

896 unpacked using ``x, y, z = read(...)``. When used with a structured 

897 data-type, arrays are returned for each field. Default is False. 

898 dtype : numpy data type 

899 A NumPy dtype instance, can be a structured dtype to map to the 

900 columns of the file. 

901 encoding : str, optional 

902 Encoding used to decode the inputfile. The special value 'bytes' 

903 (the default) enables backwards-compatible behavior for `converters`, 

904 ensuring that inputs to the converter functions are encoded 

905 bytes objects. The special value 'bytes' has no additional effect if 

906 ``converters=None``. If encoding is ``'bytes'`` or ``None``, the 

907 default system encoding is used. 

908 

909 Returns 

910 ------- 

911 ndarray 

912 NumPy array. 

913 """ 

914 # Handle special 'bytes' keyword for encoding 

915 byte_converters = False 

916 if encoding == 'bytes': 

917 encoding = None 

918 byte_converters = True 

919 

920 if dtype is None: 

921 raise TypeError("a dtype must be provided.") 

922 dtype = np.dtype(dtype) 

923 

924 read_dtype_via_object_chunks = None 

925 if dtype.kind in 'SUM' and ( 

926 dtype == "S0" or dtype == "U0" or dtype == "M8" or dtype == 'm8'): 

927 # This is a legacy "flexible" dtype. We do not truly support 

928 # parametric dtypes currently (no dtype discovery step in the core), 

929 # but have to support these for backward compatibility. 

930 read_dtype_via_object_chunks = dtype 

931 dtype = np.dtype(object) 

932 

933 if usecols is not None: 

934 # Allow usecols to be a single int or a sequence of ints, the C-code 

935 # handles the rest 

936 try: 

937 usecols = list(usecols) 

938 except TypeError: 

939 usecols = [usecols] 

940 

941 _ensure_ndmin_ndarray_check_param(ndmin) 

942 

943 if comment is None: 

944 comments = None 

945 else: 

946 # assume comments are a sequence of strings 

947 if "" in comment: 

948 raise ValueError( 

949 "comments cannot be an empty string. Use comments=None to " 

950 "disable comments." 

951 ) 

952 comments = tuple(comment) 

953 comment = None 

954 if len(comments) == 0: 

955 comments = None # No comments at all 

956 elif len(comments) == 1: 

957 # If there is only one comment, and that comment has one character, 

958 # the normal parsing can deal with it just fine. 

959 if isinstance(comments[0], str) and len(comments[0]) == 1: 

960 comment = comments[0] 

961 comments = None 

962 else: 

963 # Input validation if there are multiple comment characters 

964 if delimiter in comments: 

965 raise TypeError( 

966 f"Comment characters '{comments}' cannot include the " 

967 f"delimiter '{delimiter}'" 

968 ) 

969 

970 # comment is now either a 1 or 0 character string or a tuple: 

971 if comments is not None: 

972 # Note: An earlier version support two character comments (and could 

973 # have been extended to multiple characters, we assume this is 

974 # rare enough to not optimize for. 

975 if quote is not None: 

976 raise ValueError( 

977 "when multiple comments or a multi-character comment is " 

978 "given, quotes are not supported. In this case quotechar " 

979 "must be set to None.") 

980 

981 if len(imaginary_unit) != 1: 

982 raise ValueError('len(imaginary_unit) must be 1.') 

983 

984 _check_nonneg_int(skiplines) 

985 if max_rows is not None: 

986 _check_nonneg_int(max_rows) 

987 else: 

988 # Passing -1 to the C code means "read the entire file". 

989 max_rows = -1 

990 

991 fh_closing_ctx = contextlib.nullcontext() 

992 filelike = False 

993 try: 

994 if isinstance(fname, os.PathLike): 

995 fname = os.fspath(fname) 

996 if isinstance(fname, str): 

997 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) 

998 if encoding is None: 

999 encoding = getattr(fh, 'encoding', 'latin1') 

1000 

1001 fh_closing_ctx = contextlib.closing(fh) 

1002 data = fh 

1003 filelike = True 

1004 else: 

1005 if encoding is None: 

1006 encoding = getattr(fname, 'encoding', 'latin1') 

1007 data = iter(fname) 

1008 except TypeError as e: 

1009 raise ValueError( 

1010 f"fname must be a string, filehandle, list of strings,\n" 

1011 f"or generator. Got {type(fname)} instead.") from e 

1012 

1013 with fh_closing_ctx: 

1014 if comments is not None: 

1015 if filelike: 

1016 data = iter(data) 

1017 filelike = False 

1018 data = _preprocess_comments(data, comments, encoding) 

1019 

1020 if read_dtype_via_object_chunks is None: 

1021 arr = _load_from_filelike( 

1022 data, delimiter=delimiter, comment=comment, quote=quote, 

1023 imaginary_unit=imaginary_unit, 

1024 usecols=usecols, skiplines=skiplines, max_rows=max_rows, 

1025 converters=converters, dtype=dtype, 

1026 encoding=encoding, filelike=filelike, 

1027 byte_converters=byte_converters) 

1028 

1029 else: 

1030 # This branch reads the file into chunks of object arrays and then 

1031 # casts them to the desired actual dtype. This ensures correct 

1032 # string-length and datetime-unit discovery (like `arr.astype()`). 

1033 # Due to chunking, certain error reports are less clear, currently. 

1034 if filelike: 

1035 data = iter(data) # cannot chunk when reading from file 

1036 

1037 c_byte_converters = False 

1038 if read_dtype_via_object_chunks == "S": 

1039 c_byte_converters = True # Use latin1 rather than ascii 

1040 

1041 chunks = [] 

1042 while max_rows != 0: 

1043 if max_rows < 0: 

1044 chunk_size = _loadtxt_chunksize 

1045 else: 

1046 chunk_size = min(_loadtxt_chunksize, max_rows) 

1047 

1048 next_arr = _load_from_filelike( 

1049 data, delimiter=delimiter, comment=comment, quote=quote, 

1050 imaginary_unit=imaginary_unit, 

1051 usecols=usecols, skiplines=skiplines, max_rows=max_rows, 

1052 converters=converters, dtype=dtype, 

1053 encoding=encoding, filelike=filelike, 

1054 byte_converters=byte_converters, 

1055 c_byte_converters=c_byte_converters) 

1056 # Cast here already. We hope that this is better even for 

1057 # large files because the storage is more compact. It could 

1058 # be adapted (in principle the concatenate could cast). 

1059 chunks.append(next_arr.astype(read_dtype_via_object_chunks)) 

1060 

1061 skiprows = 0 # Only have to skip for first chunk 

1062 if max_rows >= 0: 

1063 max_rows -= chunk_size 

1064 if len(next_arr) < chunk_size: 

1065 # There was less data than requested, so we are done. 

1066 break 

1067 

1068 # Need at least one chunk, but if empty, the last one may have 

1069 # the wrong shape. 

1070 if len(chunks) > 1 and len(chunks[-1]) == 0: 

1071 del chunks[-1] 

1072 if len(chunks) == 1: 

1073 arr = chunks[0] 

1074 else: 

1075 arr = np.concatenate(chunks, axis=0) 

1076 

1077 # NOTE: ndmin works as advertised for structured dtypes, but normally 

1078 # these would return a 1D result plus the structured dimension, 

1079 # so ndmin=2 adds a third dimension even when no squeezing occurs. 

1080 # A `squeeze=False` could be a better solution (pandas uses squeeze). 

1081 arr = _ensure_ndmin_ndarray(arr, ndmin=ndmin) 

1082 

1083 if arr.shape: 

1084 if arr.shape[0] == 0: 

1085 warnings.warn( 

1086 f'loadtxt: input contained no data: "{fname}"', 

1087 category=UserWarning, 

1088 stacklevel=3 

1089 ) 

1090 

1091 if unpack: 

1092 # Unpack structured dtypes if requested: 

1093 dt = arr.dtype 

1094 if dt.names is not None: 

1095 # For structured arrays, return an array for each field. 

1096 return [arr[field] for field in dt.names] 

1097 else: 

1098 return arr.T 

1099 else: 

1100 return arr 

1101 

1102 

1103@set_array_function_like_doc 

1104@set_module('numpy') 

1105def loadtxt(fname, dtype=float, comments='#', delimiter=None, 

1106 converters=None, skiprows=0, usecols=None, unpack=False, 

1107 ndmin=0, encoding=None, max_rows=None, *, quotechar=None, 

1108 like=None): 

1109 r""" 

1110 Load data from a text file. 

1111 

1112 Parameters 

1113 ---------- 

1114 fname : file, str, pathlib.Path, list of str, generator 

1115 File, filename, list, or generator to read. If the filename 

1116 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note 

1117 that generators must return bytes or strings. The strings 

1118 in a list or produced by a generator are treated as lines. 

1119 dtype : data-type, optional 

1120 Data-type of the resulting array; default: float. If this is a 

1121 structured data-type, the resulting array will be 1-dimensional, and 

1122 each row will be interpreted as an element of the array. In this 

1123 case, the number of columns used must match the number of fields in 

1124 the data-type. 

1125 comments : str or sequence of str or None, optional 

1126 The characters or list of characters used to indicate the start of a 

1127 comment. None implies no comments. For backwards compatibility, byte 

1128 strings will be decoded as 'latin1'. The default is '#'. 

1129 delimiter : str, optional 

1130 The character used to separate the values. For backwards compatibility, 

1131 byte strings will be decoded as 'latin1'. The default is whitespace. 

1132 

1133 .. versionchanged:: 1.23.0 

1134 Only single character delimiters are supported. Newline characters 

1135 cannot be used as the delimiter. 

1136 

1137 converters : dict or callable, optional 

1138 Converter functions to customize value parsing. If `converters` is 

1139 callable, the function is applied to all columns, else it must be a 

1140 dict that maps column number to a parser function. 

1141 See examples for further details. 

1142 Default: None. 

1143 

1144 .. versionchanged:: 1.23.0 

1145 The ability to pass a single callable to be applied to all columns 

1146 was added. 

1147 

1148 skiprows : int, optional 

1149 Skip the first `skiprows` lines, including comments; default: 0. 

1150 usecols : int or sequence, optional 

1151 Which columns to read, with 0 being the first. For example, 

1152 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. 

1153 The default, None, results in all columns being read. 

1154 

1155 .. versionchanged:: 1.11.0 

1156 When a single column has to be read it is possible to use 

1157 an integer instead of a tuple. E.g ``usecols = 3`` reads the 

1158 fourth column the same way as ``usecols = (3,)`` would. 

1159 unpack : bool, optional 

1160 If True, the returned array is transposed, so that arguments may be 

1161 unpacked using ``x, y, z = loadtxt(...)``. When used with a 

1162 structured data-type, arrays are returned for each field. 

1163 Default is False. 

1164 ndmin : int, optional 

1165 The returned array will have at least `ndmin` dimensions. 

1166 Otherwise mono-dimensional axes will be squeezed. 

1167 Legal values: 0 (default), 1 or 2. 

1168 

1169 .. versionadded:: 1.6.0 

1170 encoding : str, optional 

1171 Encoding used to decode the inputfile. Does not apply to input streams. 

1172 The special value 'bytes' enables backward compatibility workarounds 

1173 that ensures you receive byte arrays as results if possible and passes 

1174 'latin1' encoded strings to converters. Override this value to receive 

1175 unicode arrays and pass strings as input to converters. If set to None 

1176 the system default is used. The default value is 'bytes'. 

1177 

1178 .. versionadded:: 1.14.0 

1179 .. versionchanged:: 2.0 

1180 Before NumPy 2, the default was ``'bytes'`` for Python 2 

1181 compatibility. The default is now ``None``. 

1182 

1183 max_rows : int, optional 

1184 Read `max_rows` rows of content after `skiprows` lines. The default is 

1185 to read all the rows. Note that empty rows containing no data such as 

1186 empty lines and comment lines are not counted towards `max_rows`, 

1187 while such lines are counted in `skiprows`. 

1188 

1189 .. versionadded:: 1.16.0 

1190 

1191 .. versionchanged:: 1.23.0 

1192 Lines containing no data, including comment lines (e.g., lines 

1193 starting with '#' or as specified via `comments`) are not counted 

1194 towards `max_rows`. 

1195 quotechar : unicode character or None, optional 

1196 The character used to denote the start and end of a quoted item. 

1197 Occurrences of the delimiter or comment characters are ignored within 

1198 a quoted item. The default value is ``quotechar=None``, which means 

1199 quoting support is disabled. 

1200 

1201 If two consecutive instances of `quotechar` are found within a quoted 

1202 field, the first is treated as an escape character. See examples. 

1203 

1204 .. versionadded:: 1.23.0 

1205 ${ARRAY_FUNCTION_LIKE} 

1206 

1207 .. versionadded:: 1.20.0 

1208 

1209 Returns 

1210 ------- 

1211 out : ndarray 

1212 Data read from the text file. 

1213 

1214 See Also 

1215 -------- 

1216 load, fromstring, fromregex 

1217 genfromtxt : Load data with missing values handled as specified. 

1218 scipy.io.loadmat : reads MATLAB data files 

1219 

1220 Notes 

1221 ----- 

1222 This function aims to be a fast reader for simply formatted files. The 

1223 `genfromtxt` function provides more sophisticated handling of, e.g., 

1224 lines with missing values. 

1225 

1226 Each row in the input text file must have the same number of values to be 

1227 able to read all values. If all rows do not have same number of values, a 

1228 subset of up to n columns (where n is the least number of values present 

1229 in all rows) can be read by specifying the columns via `usecols`. 

1230 

1231 .. versionadded:: 1.10.0 

1232 

1233 The strings produced by the Python float.hex method can be used as 

1234 input for floats. 

1235 

1236 Examples 

1237 -------- 

1238 >>> from io import StringIO # StringIO behaves like a file object 

1239 >>> c = StringIO("0 1\n2 3") 

1240 >>> np.loadtxt(c) 

1241 array([[0., 1.], 

1242 [2., 3.]]) 

1243 

1244 >>> d = StringIO("M 21 72\nF 35 58") 

1245 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), 

1246 ... 'formats': ('S1', 'i4', 'f4')}) 

1247 array([(b'M', 21, 72.), (b'F', 35, 58.)], 

1248 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')]) 

1249 

1250 >>> c = StringIO("1,0,2\n3,0,4") 

1251 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) 

1252 >>> x 

1253 array([1., 3.]) 

1254 >>> y 

1255 array([2., 4.]) 

1256 

1257 The `converters` argument is used to specify functions to preprocess the 

1258 text prior to parsing. `converters` can be a dictionary that maps 

1259 preprocessing functions to each column: 

1260 

1261 >>> s = StringIO("1.618, 2.296\n3.141, 4.669\n") 

1262 >>> conv = { 

1263 ... 0: lambda x: np.floor(float(x)), # conversion fn for column 0 

1264 ... 1: lambda x: np.ceil(float(x)), # conversion fn for column 1 

1265 ... } 

1266 >>> np.loadtxt(s, delimiter=",", converters=conv) 

1267 array([[1., 3.], 

1268 [3., 5.]]) 

1269 

1270 `converters` can be a callable instead of a dictionary, in which case it 

1271 is applied to all columns: 

1272 

1273 >>> s = StringIO("0xDE 0xAD\n0xC0 0xDE") 

1274 >>> import functools 

1275 >>> conv = functools.partial(int, base=16) 

1276 >>> np.loadtxt(s, converters=conv) 

1277 array([[222., 173.], 

1278 [192., 222.]]) 

1279 

1280 This example shows how `converters` can be used to convert a field 

1281 with a trailing minus sign into a negative number. 

1282 

1283 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94") 

1284 >>> def conv(fld): 

1285 ... return -float(fld[:-1]) if fld.endswith("-") else float(fld) 

1286 ... 

1287 >>> np.loadtxt(s, converters=conv) 

1288 array([[ 10.01, -31.25], 

1289 [ 19.22, 64.31], 

1290 [-17.57, 63.94]]) 

1291 

1292 Using a callable as the converter can be particularly useful for handling 

1293 values with different formatting, e.g. floats with underscores: 

1294 

1295 >>> s = StringIO("1 2.7 100_000") 

1296 >>> np.loadtxt(s, converters=float) 

1297 array([1.e+00, 2.7e+00, 1.e+05]) 

1298 

1299 This idea can be extended to automatically handle values specified in 

1300 many different formats, such as hex values: 

1301 

1302 >>> def conv(val): 

1303 ... try: 

1304 ... return float(val) 

1305 ... except ValueError: 

1306 ... return float.fromhex(val) 

1307 >>> s = StringIO("1, 2.5, 3_000, 0b4, 0x1.4000000000000p+2") 

1308 >>> np.loadtxt(s, delimiter=",", converters=conv) 

1309 array([1.0e+00, 2.5e+00, 3.0e+03, 1.8e+02, 5.0e+00]) 

1310 

1311 Or a format where the ``-`` sign comes after the number: 

1312 

1313 >>> s = StringIO("10.01 31.25-\n19.22 64.31\n17.57- 63.94") 

1314 >>> conv = lambda x: -float(x[:-1]) if x.endswith("-") else float(x) 

1315 >>> np.loadtxt(s, converters=conv) 

1316 array([[ 10.01, -31.25], 

1317 [ 19.22, 64.31], 

1318 [-17.57, 63.94]]) 

1319 

1320 Support for quoted fields is enabled with the `quotechar` parameter. 

1321 Comment and delimiter characters are ignored when they appear within a 

1322 quoted item delineated by `quotechar`: 

1323 

1324 >>> s = StringIO('"alpha, #42", 10.0\n"beta, #64", 2.0\n') 

1325 >>> dtype = np.dtype([("label", "U12"), ("value", float)]) 

1326 >>> np.loadtxt(s, dtype=dtype, delimiter=",", quotechar='"') 

1327 array([('alpha, #42', 10.), ('beta, #64', 2.)], 

1328 dtype=[('label', '<U12'), ('value', '<f8')]) 

1329 

1330 Quoted fields can be separated by multiple whitespace characters: 

1331 

1332 >>> s = StringIO('"alpha, #42" 10.0\n"beta, #64" 2.0\n') 

1333 >>> dtype = np.dtype([("label", "U12"), ("value", float)]) 

1334 >>> np.loadtxt(s, dtype=dtype, delimiter=None, quotechar='"') 

1335 array([('alpha, #42', 10.), ('beta, #64', 2.)], 

1336 dtype=[('label', '<U12'), ('value', '<f8')]) 

1337 

1338 Two consecutive quote characters within a quoted field are treated as a 

1339 single escaped character: 

1340 

1341 >>> s = StringIO('"Hello, my name is ""Monty""!"') 

1342 >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"') 

1343 array('Hello, my name is "Monty"!', dtype='<U26') 

1344 

1345 Read subset of columns when all rows do not contain equal number of values: 

1346 

1347 >>> d = StringIO("1 2\n2 4\n3 9 12\n4 16 20") 

1348 >>> np.loadtxt(d, usecols=(0, 1)) 

1349 array([[ 1., 2.], 

1350 [ 2., 4.], 

1351 [ 3., 9.], 

1352 [ 4., 16.]]) 

1353 

1354 """ 

1355 

1356 if like is not None: 

1357 return _loadtxt_with_like( 

1358 like, fname, dtype=dtype, comments=comments, delimiter=delimiter, 

1359 converters=converters, skiprows=skiprows, usecols=usecols, 

1360 unpack=unpack, ndmin=ndmin, encoding=encoding, 

1361 max_rows=max_rows 

1362 ) 

1363 

1364 if isinstance(delimiter, bytes): 

1365 delimiter.decode("latin1") 

1366 

1367 if dtype is None: 

1368 dtype = np.float64 

1369 

1370 comment = comments 

1371 # Control character type conversions for Py3 convenience 

1372 if comment is not None: 

1373 if isinstance(comment, (str, bytes)): 

1374 comment = [comment] 

1375 comment = [ 

1376 x.decode('latin1') if isinstance(x, bytes) else x for x in comment] 

1377 if isinstance(delimiter, bytes): 

1378 delimiter = delimiter.decode('latin1') 

1379 

1380 arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter, 

1381 converters=converters, skiplines=skiprows, usecols=usecols, 

1382 unpack=unpack, ndmin=ndmin, encoding=encoding, 

1383 max_rows=max_rows, quote=quotechar) 

1384 

1385 return arr 

1386 

1387 

1388_loadtxt_with_like = array_function_dispatch()(loadtxt) 

1389 

1390 

1391def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None, 

1392 header=None, footer=None, comments=None, 

1393 encoding=None): 

1394 return (X,) 

1395 

1396 

1397@array_function_dispatch(_savetxt_dispatcher) 

1398def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', 

1399 footer='', comments='# ', encoding=None): 

1400 """ 

1401 Save an array to a text file. 

1402 

1403 Parameters 

1404 ---------- 

1405 fname : filename, file handle or pathlib.Path 

1406 If the filename ends in ``.gz``, the file is automatically saved in 

1407 compressed gzip format. `loadtxt` understands gzipped files 

1408 transparently. 

1409 X : 1D or 2D array_like 

1410 Data to be saved to a text file. 

1411 fmt : str or sequence of strs, optional 

1412 A single format (%10.5f), a sequence of formats, or a 

1413 multi-format string, e.g. 'Iteration %d -- %10.5f', in which 

1414 case `delimiter` is ignored. For complex `X`, the legal options 

1415 for `fmt` are: 

1416 

1417 * a single specifier, ``fmt='%.4e'``, resulting in numbers formatted 

1418 like ``' (%s+%sj)' % (fmt, fmt)`` 

1419 * a full string specifying every real and imaginary part, e.g. 

1420 ``' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'`` for 3 columns 

1421 * a list of specifiers, one per column - in this case, the real 

1422 and imaginary part must have separate specifiers, 

1423 e.g. ``['%.3e + %.3ej', '(%.15e%+.15ej)']`` for 2 columns 

1424 delimiter : str, optional 

1425 String or character separating columns. 

1426 newline : str, optional 

1427 String or character separating lines. 

1428 

1429 .. versionadded:: 1.5.0 

1430 header : str, optional 

1431 String that will be written at the beginning of the file. 

1432 

1433 .. versionadded:: 1.7.0 

1434 footer : str, optional 

1435 String that will be written at the end of the file. 

1436 

1437 .. versionadded:: 1.7.0 

1438 comments : str, optional 

1439 String that will be prepended to the ``header`` and ``footer`` strings, 

1440 to mark them as comments. Default: '# ', as expected by e.g. 

1441 ``numpy.loadtxt``. 

1442 

1443 .. versionadded:: 1.7.0 

1444 encoding : {None, str}, optional 

1445 Encoding used to encode the outputfile. Does not apply to output 

1446 streams. If the encoding is something other than 'bytes' or 'latin1' 

1447 you will not be able to load the file in NumPy versions < 1.14. Default 

1448 is 'latin1'. 

1449 

1450 .. versionadded:: 1.14.0 

1451 

1452 

1453 See Also 

1454 -------- 

1455 save : Save an array to a binary file in NumPy ``.npy`` format 

1456 savez : Save several arrays into an uncompressed ``.npz`` archive 

1457 savez_compressed : Save several arrays into a compressed ``.npz`` archive 

1458 

1459 Notes 

1460 ----- 

1461 Further explanation of the `fmt` parameter 

1462 (``%[flag]width[.precision]specifier``): 

1463 

1464 flags: 

1465 ``-`` : left justify 

1466 

1467 ``+`` : Forces to precede result with + or -. 

1468 

1469 ``0`` : Left pad the number with zeros instead of space (see width). 

1470 

1471 width: 

1472 Minimum number of characters to be printed. The value is not truncated 

1473 if it has more characters. 

1474 

1475 precision: 

1476 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of 

1477 digits. 

1478 - For ``e, E`` and ``f`` specifiers, the number of digits to print 

1479 after the decimal point. 

1480 - For ``g`` and ``G``, the maximum number of significant digits. 

1481 - For ``s``, the maximum number of characters. 

1482 

1483 specifiers: 

1484 ``c`` : character 

1485 

1486 ``d`` or ``i`` : signed decimal integer 

1487 

1488 ``e`` or ``E`` : scientific notation with ``e`` or ``E``. 

1489 

1490 ``f`` : decimal floating point 

1491 

1492 ``g,G`` : use the shorter of ``e,E`` or ``f`` 

1493 

1494 ``o`` : signed octal 

1495 

1496 ``s`` : string of characters 

1497 

1498 ``u`` : unsigned decimal integer 

1499 

1500 ``x,X`` : unsigned hexadecimal integer 

1501 

1502 This explanation of ``fmt`` is not complete, for an exhaustive 

1503 specification see [1]_. 

1504 

1505 References 

1506 ---------- 

1507 .. [1] `Format Specification Mini-Language 

1508 <https://docs.python.org/library/string.html#format-specification-mini-language>`_, 

1509 Python Documentation. 

1510 

1511 Examples 

1512 -------- 

1513 >>> x = y = z = np.arange(0.0,5.0,1.0) 

1514 >>> np.savetxt('test.out', x, delimiter=',') # X is an array 

1515 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays 

1516 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation 

1517 

1518 """ 

1519 

1520 class WriteWrap: 

1521 """Convert to bytes on bytestream inputs. 

1522 

1523 """ 

1524 def __init__(self, fh, encoding): 

1525 self.fh = fh 

1526 self.encoding = encoding 

1527 self.do_write = self.first_write 

1528 

1529 def close(self): 

1530 self.fh.close() 

1531 

1532 def write(self, v): 

1533 self.do_write(v) 

1534 

1535 def write_bytes(self, v): 

1536 if isinstance(v, bytes): 

1537 self.fh.write(v) 

1538 else: 

1539 self.fh.write(v.encode(self.encoding)) 

1540 

1541 def write_normal(self, v): 

1542 self.fh.write(asunicode(v)) 

1543 

1544 def first_write(self, v): 

1545 try: 

1546 self.write_normal(v) 

1547 self.write = self.write_normal 

1548 except TypeError: 

1549 # input is probably a bytestream 

1550 self.write_bytes(v) 

1551 self.write = self.write_bytes 

1552 

1553 own_fh = False 

1554 if isinstance(fname, os.PathLike): 

1555 fname = os.fspath(fname) 

1556 if _is_string_like(fname): 

1557 # datasource doesn't support creating a new file ... 

1558 open(fname, 'wt').close() 

1559 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) 

1560 own_fh = True 

1561 elif hasattr(fname, 'write'): 

1562 # wrap to handle byte output streams 

1563 fh = WriteWrap(fname, encoding or 'latin1') 

1564 else: 

1565 raise ValueError('fname must be a string or file handle') 

1566 

1567 try: 

1568 X = np.asarray(X) 

1569 

1570 # Handle 1-dimensional arrays 

1571 if X.ndim == 0 or X.ndim > 2: 

1572 raise ValueError( 

1573 "Expected 1D or 2D array, got %dD array instead" % X.ndim) 

1574 elif X.ndim == 1: 

1575 # Common case -- 1d array of numbers 

1576 if X.dtype.names is None: 

1577 X = np.atleast_2d(X).T 

1578 ncol = 1 

1579 

1580 # Complex dtype -- each field indicates a separate column 

1581 else: 

1582 ncol = len(X.dtype.names) 

1583 else: 

1584 ncol = X.shape[1] 

1585 

1586 iscomplex_X = np.iscomplexobj(X) 

1587 # `fmt` can be a string with multiple insertion points or a 

1588 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') 

1589 if type(fmt) in (list, tuple): 

1590 if len(fmt) != ncol: 

1591 raise AttributeError('fmt has wrong shape. %s' % str(fmt)) 

1592 format = delimiter.join(fmt) 

1593 elif isinstance(fmt, str): 

1594 n_fmt_chars = fmt.count('%') 

1595 error = ValueError('fmt has wrong number of %% formats: %s' % fmt) 

1596 if n_fmt_chars == 1: 

1597 if iscomplex_X: 

1598 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol 

1599 else: 

1600 fmt = [fmt, ] * ncol 

1601 format = delimiter.join(fmt) 

1602 elif iscomplex_X and n_fmt_chars != (2 * ncol): 

1603 raise error 

1604 elif ((not iscomplex_X) and n_fmt_chars != ncol): 

1605 raise error 

1606 else: 

1607 format = fmt 

1608 else: 

1609 raise ValueError('invalid fmt: %r' % (fmt,)) 

1610 

1611 if len(header) > 0: 

1612 header = header.replace('\n', '\n' + comments) 

1613 fh.write(comments + header + newline) 

1614 if iscomplex_X: 

1615 for row in X: 

1616 row2 = [] 

1617 for number in row: 

1618 row2.append(number.real) 

1619 row2.append(number.imag) 

1620 s = format % tuple(row2) + newline 

1621 fh.write(s.replace('+-', '-')) 

1622 else: 

1623 for row in X: 

1624 try: 

1625 v = format % tuple(row) + newline 

1626 except TypeError as e: 

1627 raise TypeError("Mismatch between array dtype ('%s') and " 

1628 "format specifier ('%s')" 

1629 % (str(X.dtype), format)) from e 

1630 fh.write(v) 

1631 

1632 if len(footer) > 0: 

1633 footer = footer.replace('\n', '\n' + comments) 

1634 fh.write(comments + footer + newline) 

1635 finally: 

1636 if own_fh: 

1637 fh.close() 

1638 

1639 

1640@set_module('numpy') 

1641def fromregex(file, regexp, dtype, encoding=None): 

1642 r""" 

1643 Construct an array from a text file, using regular expression parsing. 

1644 

1645 The returned array is always a structured array, and is constructed from 

1646 all matches of the regular expression in the file. Groups in the regular 

1647 expression are converted to fields of the structured array. 

1648 

1649 Parameters 

1650 ---------- 

1651 file : file, str, or pathlib.Path 

1652 Filename or file object to read. 

1653 

1654 .. versionchanged:: 1.22.0 

1655 Now accepts `os.PathLike` implementations. 

1656 regexp : str or regexp 

1657 Regular expression used to parse the file. 

1658 Groups in the regular expression correspond to fields in the dtype. 

1659 dtype : dtype or list of dtypes 

1660 Dtype for the structured array; must be a structured datatype. 

1661 encoding : str, optional 

1662 Encoding used to decode the inputfile. Does not apply to input streams. 

1663 

1664 .. versionadded:: 1.14.0 

1665 

1666 Returns 

1667 ------- 

1668 output : ndarray 

1669 The output array, containing the part of the content of `file` that 

1670 was matched by `regexp`. `output` is always a structured array. 

1671 

1672 Raises 

1673 ------ 

1674 TypeError 

1675 When `dtype` is not a valid dtype for a structured array. 

1676 

1677 See Also 

1678 -------- 

1679 fromstring, loadtxt 

1680 

1681 Notes 

1682 ----- 

1683 Dtypes for structured arrays can be specified in several forms, but all 

1684 forms specify at least the data type and field name. For details see 

1685 `basics.rec`. 

1686 

1687 Examples 

1688 -------- 

1689 >>> from io import StringIO 

1690 >>> text = StringIO("1312 foo\n1534 bar\n444 qux") 

1691 

1692 >>> regexp = r"(\d+)\s+(...)" # match [digits, whitespace, anything] 

1693 >>> output = np.fromregex(text, regexp, 

1694 ... [('num', np.int64), ('key', 'S3')]) 

1695 >>> output 

1696 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')], 

1697 dtype=[('num', '<i8'), ('key', 'S3')]) 

1698 >>> output['num'] 

1699 array([1312, 1534, 444]) 

1700 

1701 """ 

1702 own_fh = False 

1703 if not hasattr(file, "read"): 

1704 file = os.fspath(file) 

1705 file = np.lib._datasource.open(file, 'rt', encoding=encoding) 

1706 own_fh = True 

1707 

1708 try: 

1709 if not isinstance(dtype, np.dtype): 

1710 dtype = np.dtype(dtype) 

1711 if dtype.names is None: 

1712 raise TypeError('dtype must be a structured datatype.') 

1713 

1714 content = file.read() 

1715 if isinstance(content, bytes) and isinstance(regexp, str): 

1716 regexp = asbytes(regexp) 

1717 

1718 if not hasattr(regexp, 'match'): 

1719 regexp = re.compile(regexp) 

1720 seq = regexp.findall(content) 

1721 if seq and not isinstance(seq[0], tuple): 

1722 # Only one group is in the regexp. 

1723 # Create the new array as a single data-type and then 

1724 # re-interpret as a single-field structured array. 

1725 newdtype = np.dtype(dtype[dtype.names[0]]) 

1726 output = np.array(seq, dtype=newdtype) 

1727 output.dtype = dtype 

1728 else: 

1729 output = np.array(seq, dtype=dtype) 

1730 

1731 return output 

1732 finally: 

1733 if own_fh: 

1734 file.close() 

1735 

1736 

1737#####-------------------------------------------------------------------------- 

1738#---- --- ASCII functions --- 

1739#####-------------------------------------------------------------------------- 

1740 

1741 

1742@set_array_function_like_doc 

1743@set_module('numpy') 

1744def genfromtxt(fname, dtype=float, comments='#', delimiter=None, 

1745 skip_header=0, skip_footer=0, converters=None, 

1746 missing_values=None, filling_values=None, usecols=None, 

1747 names=None, excludelist=None, 

1748 deletechars=''.join(sorted(NameValidator.defaultdeletechars)), 

1749 replace_space='_', autostrip=False, case_sensitive=True, 

1750 defaultfmt="f%i", unpack=None, usemask=False, loose=True, 

1751 invalid_raise=True, max_rows=None, encoding=None, 

1752 *, ndmin=0, like=None): 

1753 """ 

1754 Load data from a text file, with missing values handled as specified. 

1755 

1756 Each line past the first `skip_header` lines is split at the `delimiter` 

1757 character, and characters following the `comments` character are discarded. 

1758 

1759 Parameters 

1760 ---------- 

1761 fname : file, str, pathlib.Path, list of str, generator 

1762 File, filename, list, or generator to read. If the filename 

1763 extension is ``.gz`` or ``.bz2``, the file is first decompressed. Note 

1764 that generators must return bytes or strings. The strings 

1765 in a list or produced by a generator are treated as lines. 

1766 dtype : dtype, optional 

1767 Data type of the resulting array. 

1768 If None, the dtypes will be determined by the contents of each 

1769 column, individually. 

1770 comments : str, optional 

1771 The character used to indicate the start of a comment. 

1772 All the characters occurring on a line after a comment are discarded. 

1773 delimiter : str, int, or sequence, optional 

1774 The string used to separate values. By default, any consecutive 

1775 whitespaces act as delimiter. An integer or sequence of integers 

1776 can also be provided as width(s) of each field. 

1777 skiprows : int, optional 

1778 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead. 

1779 skip_header : int, optional 

1780 The number of lines to skip at the beginning of the file. 

1781 skip_footer : int, optional 

1782 The number of lines to skip at the end of the file. 

1783 converters : variable, optional 

1784 The set of functions that convert the data of a column to a value. 

1785 The converters can also be used to provide a default value 

1786 for missing data: ``converters = {3: lambda s: float(s or 0)}``. 

1787 missing : variable, optional 

1788 `missing` was removed in numpy 1.10. Please use `missing_values` 

1789 instead. 

1790 missing_values : variable, optional 

1791 The set of strings corresponding to missing data. 

1792 filling_values : variable, optional 

1793 The set of values to be used as default when the data are missing. 

1794 usecols : sequence, optional 

1795 Which columns to read, with 0 being the first. For example, 

1796 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. 

1797 names : {None, True, str, sequence}, optional 

1798 If `names` is True, the field names are read from the first line after 

1799 the first `skip_header` lines. This line can optionally be preceded 

1800 by a comment delimiter. Any content before the comment delimiter is 

1801 discarded. If `names` is a sequence or a single-string of 

1802 comma-separated names, the names will be used to define the field 

1803 names in a structured dtype. If `names` is None, the names of the 

1804 dtype fields will be used, if any. 

1805 excludelist : sequence, optional 

1806 A list of names to exclude. This list is appended to the default list 

1807 ['return','file','print']. Excluded names are appended with an 

1808 underscore: for example, `file` would become `file_`. 

1809 deletechars : str, optional 

1810 A string combining invalid characters that must be deleted from the 

1811 names. 

1812 defaultfmt : str, optional 

1813 A format used to define default field names, such as "f%i" or "f_%02i". 

1814 autostrip : bool, optional 

1815 Whether to automatically strip white spaces from the variables. 

1816 replace_space : char, optional 

1817 Character(s) used in replacement of white spaces in the variable 

1818 names. By default, use a '_'. 

1819 case_sensitive : {True, False, 'upper', 'lower'}, optional 

1820 If True, field names are case sensitive. 

1821 If False or 'upper', field names are converted to upper case. 

1822 If 'lower', field names are converted to lower case. 

1823 unpack : bool, optional 

1824 If True, the returned array is transposed, so that arguments may be 

1825 unpacked using ``x, y, z = genfromtxt(...)``. When used with a 

1826 structured data-type, arrays are returned for each field. 

1827 Default is False. 

1828 usemask : bool, optional 

1829 If True, return a masked array. 

1830 If False, return a regular array. 

1831 loose : bool, optional 

1832 If True, do not raise errors for invalid values. 

1833 invalid_raise : bool, optional 

1834 If True, an exception is raised if an inconsistency is detected in the 

1835 number of columns. 

1836 If False, a warning is emitted and the offending lines are skipped. 

1837 max_rows : int, optional 

1838 The maximum number of rows to read. Must not be used with skip_footer 

1839 at the same time. If given, the value must be at least 1. Default is 

1840 to read the entire file. 

1841 

1842 .. versionadded:: 1.10.0 

1843 encoding : str, optional 

1844 Encoding used to decode the inputfile. Does not apply when `fname` 

1845 is a file object. The special value 'bytes' enables backward 

1846 compatibility workarounds that ensure that you receive byte arrays 

1847 when possible and passes latin1 encoded strings to converters. 

1848 Override this value to receive unicode arrays and pass strings 

1849 as input to converters. If set to None the system default is used. 

1850 The default value is 'bytes'. 

1851 

1852 .. versionadded:: 1.14.0 

1853 .. versionchanged:: 2.0 

1854 Before NumPy 2, the default was ``'bytes'`` for Python 2 

1855 compatibility. The default is now ``None``. 

1856 

1857 ndmin : int, optional 

1858 Same parameter as `loadtxt` 

1859 

1860 .. versionadded:: 1.23.0 

1861 ${ARRAY_FUNCTION_LIKE} 

1862 

1863 .. versionadded:: 1.20.0 

1864 

1865 Returns 

1866 ------- 

1867 out : ndarray 

1868 Data read from the text file. If `usemask` is True, this is a 

1869 masked array. 

1870 

1871 See Also 

1872 -------- 

1873 numpy.loadtxt : equivalent function when no data is missing. 

1874 

1875 Notes 

1876 ----- 

1877 * When spaces are used as delimiters, or when no delimiter has been given 

1878 as input, there should not be any missing data between two fields. 

1879 * When variables are named (either by a flexible dtype or with a `names` 

1880 sequence), there must not be any header in the file (else a ValueError 

1881 exception is raised). 

1882 * Individual values are not stripped of spaces by default. 

1883 When using a custom converter, make sure the function does remove spaces. 

1884 * Custom converters may receive unexpected values due to dtype 

1885 discovery. 

1886 

1887 References 

1888 ---------- 

1889 .. [1] NumPy User Guide, section `I/O with NumPy 

1890 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_. 

1891 

1892 Examples 

1893 -------- 

1894 >>> from io import StringIO 

1895 >>> import numpy as np 

1896 

1897 Comma delimited file with mixed dtype 

1898 

1899 >>> s = StringIO("1,1.3,abcde") 

1900 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), 

1901 ... ('mystring','S5')], delimiter=",") 

1902 >>> data 

1903 array((1, 1.3, b'abcde'), 

1904 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 

1905 

1906 Using dtype = None 

1907 

1908 >>> _ = s.seek(0) # needed for StringIO example only 

1909 >>> data = np.genfromtxt(s, dtype=None, 

1910 ... names = ['myint','myfloat','mystring'], delimiter=",") 

1911 >>> data 

1912 array((1, 1.3, 'abcde'), 

1913 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '<U5')]) 

1914 

1915 Specifying dtype and names 

1916 

1917 >>> _ = s.seek(0) 

1918 >>> data = np.genfromtxt(s, dtype="i8,f8,S5", 

1919 ... names=['myint','myfloat','mystring'], delimiter=",") 

1920 >>> data 

1921 array((1, 1.3, b'abcde'), 

1922 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 

1923 

1924 An example with fixed-width columns 

1925 

1926 >>> s = StringIO("11.3abcde") 

1927 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], 

1928 ... delimiter=[1,3,5]) 

1929 >>> data 

1930 array((1, 1.3, 'abcde'), 

1931 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '<U5')]) 

1932 

1933 An example to show comments 

1934 

1935 >>> f = StringIO(''' 

1936 ... text,# of chars 

1937 ... hello world,11 

1938 ... numpy,5''') 

1939 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',') 

1940 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')], 

1941 dtype=[('f0', 'S12'), ('f1', 'S12')]) 

1942 

1943 """ 

1944 

1945 if like is not None: 

1946 return _genfromtxt_with_like( 

1947 like, fname, dtype=dtype, comments=comments, delimiter=delimiter, 

1948 skip_header=skip_header, skip_footer=skip_footer, 

1949 converters=converters, missing_values=missing_values, 

1950 filling_values=filling_values, usecols=usecols, names=names, 

1951 excludelist=excludelist, deletechars=deletechars, 

1952 replace_space=replace_space, autostrip=autostrip, 

1953 case_sensitive=case_sensitive, defaultfmt=defaultfmt, 

1954 unpack=unpack, usemask=usemask, loose=loose, 

1955 invalid_raise=invalid_raise, max_rows=max_rows, encoding=encoding, 

1956 ndmin=ndmin, 

1957 ) 

1958 

1959 _ensure_ndmin_ndarray_check_param(ndmin) 

1960 

1961 if max_rows is not None: 

1962 if skip_footer: 

1963 raise ValueError( 

1964 "The keywords 'skip_footer' and 'max_rows' can not be " 

1965 "specified at the same time.") 

1966 if max_rows < 1: 

1967 raise ValueError("'max_rows' must be at least 1.") 

1968 

1969 if usemask: 

1970 from numpy.ma import MaskedArray, make_mask_descr 

1971 # Check the input dictionary of converters 

1972 user_converters = converters or {} 

1973 if not isinstance(user_converters, dict): 

1974 raise TypeError( 

1975 "The input argument 'converter' should be a valid dictionary " 

1976 "(got '%s' instead)" % type(user_converters)) 

1977 

1978 if encoding == 'bytes': 

1979 encoding = None 

1980 byte_converters = True 

1981 else: 

1982 byte_converters = False 

1983 

1984 # Initialize the filehandle, the LineSplitter and the NameValidator 

1985 if isinstance(fname, os.PathLike): 

1986 fname = os.fspath(fname) 

1987 if isinstance(fname, str): 

1988 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) 

1989 fid_ctx = contextlib.closing(fid) 

1990 else: 

1991 fid = fname 

1992 fid_ctx = contextlib.nullcontext(fid) 

1993 try: 

1994 fhd = iter(fid) 

1995 except TypeError as e: 

1996 raise TypeError( 

1997 "fname must be a string, a filehandle, a sequence of strings,\n" 

1998 f"or an iterator of strings. Got {type(fname)} instead." 

1999 ) from e 

2000 with fid_ctx: 

2001 split_line = LineSplitter(delimiter=delimiter, comments=comments, 

2002 autostrip=autostrip, encoding=encoding) 

2003 validate_names = NameValidator(excludelist=excludelist, 

2004 deletechars=deletechars, 

2005 case_sensitive=case_sensitive, 

2006 replace_space=replace_space) 

2007 

2008 # Skip the first `skip_header` rows 

2009 try: 

2010 for i in range(skip_header): 

2011 next(fhd) 

2012 

2013 # Keep on until we find the first valid values 

2014 first_values = None 

2015 

2016 while not first_values: 

2017 first_line = _decode_line(next(fhd), encoding) 

2018 if (names is True) and (comments is not None): 

2019 if comments in first_line: 

2020 first_line = ( 

2021 ''.join(first_line.split(comments)[1:])) 

2022 first_values = split_line(first_line) 

2023 except StopIteration: 

2024 # return an empty array if the datafile is empty 

2025 first_line = '' 

2026 first_values = [] 

2027 warnings.warn( 

2028 'genfromtxt: Empty input file: "%s"' % fname, stacklevel=2 

2029 ) 

2030 

2031 # Should we take the first values as names ? 

2032 if names is True: 

2033 fval = first_values[0].strip() 

2034 if comments is not None: 

2035 if fval in comments: 

2036 del first_values[0] 

2037 

2038 # Check the columns to use: make sure `usecols` is a list 

2039 if usecols is not None: 

2040 try: 

2041 usecols = [_.strip() for _ in usecols.split(",")] 

2042 except AttributeError: 

2043 try: 

2044 usecols = list(usecols) 

2045 except TypeError: 

2046 usecols = [usecols, ] 

2047 nbcols = len(usecols or first_values) 

2048 

2049 # Check the names and overwrite the dtype.names if needed 

2050 if names is True: 

2051 names = validate_names([str(_.strip()) for _ in first_values]) 

2052 first_line = '' 

2053 elif _is_string_like(names): 

2054 names = validate_names([_.strip() for _ in names.split(',')]) 

2055 elif names: 

2056 names = validate_names(names) 

2057 # Get the dtype 

2058 if dtype is not None: 

2059 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, 

2060 excludelist=excludelist, 

2061 deletechars=deletechars, 

2062 case_sensitive=case_sensitive, 

2063 replace_space=replace_space) 

2064 # Make sure the names is a list (for 2.5) 

2065 if names is not None: 

2066 names = list(names) 

2067 

2068 if usecols: 

2069 for (i, current) in enumerate(usecols): 

2070 # if usecols is a list of names, convert to a list of indices 

2071 if _is_string_like(current): 

2072 usecols[i] = names.index(current) 

2073 elif current < 0: 

2074 usecols[i] = current + len(first_values) 

2075 # If the dtype is not None, make sure we update it 

2076 if (dtype is not None) and (len(dtype) > nbcols): 

2077 descr = dtype.descr 

2078 dtype = np.dtype([descr[_] for _ in usecols]) 

2079 names = list(dtype.names) 

2080 # If `names` is not None, update the names 

2081 elif (names is not None) and (len(names) > nbcols): 

2082 names = [names[_] for _ in usecols] 

2083 elif (names is not None) and (dtype is not None): 

2084 names = list(dtype.names) 

2085 

2086 # Process the missing values ............................... 

2087 # Rename missing_values for convenience 

2088 user_missing_values = missing_values or () 

2089 if isinstance(user_missing_values, bytes): 

2090 user_missing_values = user_missing_values.decode('latin1') 

2091 

2092 # Define the list of missing_values (one column: one list) 

2093 missing_values = [list(['']) for _ in range(nbcols)] 

2094 

2095 # We have a dictionary: process it field by field 

2096 if isinstance(user_missing_values, dict): 

2097 # Loop on the items 

2098 for (key, val) in user_missing_values.items(): 

2099 # Is the key a string ? 

2100 if _is_string_like(key): 

2101 try: 

2102 # Transform it into an integer 

2103 key = names.index(key) 

2104 except ValueError: 

2105 # We couldn't find it: the name must have been dropped 

2106 continue 

2107 # Redefine the key as needed if it's a column number 

2108 if usecols: 

2109 try: 

2110 key = usecols.index(key) 

2111 except ValueError: 

2112 pass 

2113 # Transform the value as a list of string 

2114 if isinstance(val, (list, tuple)): 

2115 val = [str(_) for _ in val] 

2116 else: 

2117 val = [str(val), ] 

2118 # Add the value(s) to the current list of missing 

2119 if key is None: 

2120 # None acts as default 

2121 for miss in missing_values: 

2122 miss.extend(val) 

2123 else: 

2124 missing_values[key].extend(val) 

2125 # We have a sequence : each item matches a column 

2126 elif isinstance(user_missing_values, (list, tuple)): 

2127 for (value, entry) in zip(user_missing_values, missing_values): 

2128 value = str(value) 

2129 if value not in entry: 

2130 entry.append(value) 

2131 # We have a string : apply it to all entries 

2132 elif isinstance(user_missing_values, str): 

2133 user_value = user_missing_values.split(",") 

2134 for entry in missing_values: 

2135 entry.extend(user_value) 

2136 # We have something else: apply it to all entries 

2137 else: 

2138 for entry in missing_values: 

2139 entry.extend([str(user_missing_values)]) 

2140 

2141 # Process the filling_values ............................... 

2142 # Rename the input for convenience 

2143 user_filling_values = filling_values 

2144 if user_filling_values is None: 

2145 user_filling_values = [] 

2146 # Define the default 

2147 filling_values = [None] * nbcols 

2148 # We have a dictionary : update each entry individually 

2149 if isinstance(user_filling_values, dict): 

2150 for (key, val) in user_filling_values.items(): 

2151 if _is_string_like(key): 

2152 try: 

2153 # Transform it into an integer 

2154 key = names.index(key) 

2155 except ValueError: 

2156 # We couldn't find it: the name must have been dropped 

2157 continue 

2158 # Redefine the key if it's a column number 

2159 # and usecols is defined 

2160 if usecols: 

2161 try: 

2162 key = usecols.index(key) 

2163 except ValueError: 

2164 pass 

2165 # Add the value to the list 

2166 filling_values[key] = val 

2167 # We have a sequence : update on a one-to-one basis 

2168 elif isinstance(user_filling_values, (list, tuple)): 

2169 n = len(user_filling_values) 

2170 if (n <= nbcols): 

2171 filling_values[:n] = user_filling_values 

2172 else: 

2173 filling_values = user_filling_values[:nbcols] 

2174 # We have something else : use it for all entries 

2175 else: 

2176 filling_values = [user_filling_values] * nbcols 

2177 

2178 # Initialize the converters ................................ 

2179 if dtype is None: 

2180 # Note: we can't use a [...]*nbcols, as we would have 3 times 

2181 # the same converter, instead of 3 different converters. 

2182 converters = [ 

2183 StringConverter(None, missing_values=miss, default=fill) 

2184 for (miss, fill) in zip(missing_values, filling_values) 

2185 ] 

2186 else: 

2187 dtype_flat = flatten_dtype(dtype, flatten_base=True) 

2188 # Initialize the converters 

2189 if len(dtype_flat) > 1: 

2190 # Flexible type : get a converter from each dtype 

2191 zipit = zip(dtype_flat, missing_values, filling_values) 

2192 converters = [StringConverter(dt, 

2193 locked=True, 

2194 missing_values=miss, 

2195 default=fill) 

2196 for (dt, miss, fill) in zipit] 

2197 else: 

2198 # Set to a default converter (but w/ different missing values) 

2199 zipit = zip(missing_values, filling_values) 

2200 converters = [StringConverter(dtype, 

2201 locked=True, 

2202 missing_values=miss, 

2203 default=fill) 

2204 for (miss, fill) in zipit] 

2205 # Update the converters to use the user-defined ones 

2206 uc_update = [] 

2207 for (j, conv) in user_converters.items(): 

2208 # If the converter is specified by column names, 

2209 # use the index instead 

2210 if _is_string_like(j): 

2211 try: 

2212 j = names.index(j) 

2213 i = j 

2214 except ValueError: 

2215 continue 

2216 elif usecols: 

2217 try: 

2218 i = usecols.index(j) 

2219 except ValueError: 

2220 # Unused converter specified 

2221 continue 

2222 else: 

2223 i = j 

2224 # Find the value to test - first_line is not filtered by usecols: 

2225 if len(first_line): 

2226 testing_value = first_values[j] 

2227 else: 

2228 testing_value = None 

2229 if conv is bytes: 

2230 user_conv = asbytes 

2231 elif byte_converters: 

2232 # Converters may use decode to workaround numpy's old 

2233 # behavior, so encode the string again before passing 

2234 # to the user converter. 

2235 def tobytes_first(x, conv): 

2236 if type(x) is bytes: 

2237 return conv(x) 

2238 return conv(x.encode("latin1")) 

2239 user_conv = functools.partial(tobytes_first, conv=conv) 

2240 else: 

2241 user_conv = conv 

2242 converters[i].update(user_conv, locked=True, 

2243 testing_value=testing_value, 

2244 default=filling_values[i], 

2245 missing_values=missing_values[i],) 

2246 uc_update.append((i, user_conv)) 

2247 # Make sure we have the corrected keys in user_converters... 

2248 user_converters.update(uc_update) 

2249 

2250 # Fixme: possible error as following variable never used. 

2251 # miss_chars = [_.missing_values for _ in converters] 

2252 

2253 # Initialize the output lists ... 

2254 # ... rows 

2255 rows = [] 

2256 append_to_rows = rows.append 

2257 # ... masks 

2258 if usemask: 

2259 masks = [] 

2260 append_to_masks = masks.append 

2261 # ... invalid 

2262 invalid = [] 

2263 append_to_invalid = invalid.append 

2264 

2265 # Parse each line 

2266 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): 

2267 values = split_line(line) 

2268 nbvalues = len(values) 

2269 # Skip an empty line 

2270 if nbvalues == 0: 

2271 continue 

2272 if usecols: 

2273 # Select only the columns we need 

2274 try: 

2275 values = [values[_] for _ in usecols] 

2276 except IndexError: 

2277 append_to_invalid((i + skip_header + 1, nbvalues)) 

2278 continue 

2279 elif nbvalues != nbcols: 

2280 append_to_invalid((i + skip_header + 1, nbvalues)) 

2281 continue 

2282 # Store the values 

2283 append_to_rows(tuple(values)) 

2284 if usemask: 

2285 append_to_masks(tuple([v.strip() in m 

2286 for (v, m) in zip(values, 

2287 missing_values)])) 

2288 if len(rows) == max_rows: 

2289 break 

2290 

2291 # Upgrade the converters (if needed) 

2292 if dtype is None: 

2293 for (i, converter) in enumerate(converters): 

2294 current_column = [itemgetter(i)(_m) for _m in rows] 

2295 try: 

2296 converter.iterupgrade(current_column) 

2297 except ConverterLockError: 

2298 errmsg = "Converter #%i is locked and cannot be upgraded: " % i 

2299 current_column = map(itemgetter(i), rows) 

2300 for (j, value) in enumerate(current_column): 

2301 try: 

2302 converter.upgrade(value) 

2303 except (ConverterError, ValueError): 

2304 errmsg += "(occurred line #%i for value '%s')" 

2305 errmsg %= (j + 1 + skip_header, value) 

2306 raise ConverterError(errmsg) 

2307 

2308 # Check that we don't have invalid values 

2309 nbinvalid = len(invalid) 

2310 if nbinvalid > 0: 

2311 nbrows = len(rows) + nbinvalid - skip_footer 

2312 # Construct the error message 

2313 template = " Line #%%i (got %%i columns instead of %i)" % nbcols 

2314 if skip_footer > 0: 

2315 nbinvalid_skipped = len([_ for _ in invalid 

2316 if _[0] > nbrows + skip_header]) 

2317 invalid = invalid[:nbinvalid - nbinvalid_skipped] 

2318 skip_footer -= nbinvalid_skipped 

2319# 

2320# nbrows -= skip_footer 

2321# errmsg = [template % (i, nb) 

2322# for (i, nb) in invalid if i < nbrows] 

2323# else: 

2324 errmsg = [template % (i, nb) 

2325 for (i, nb) in invalid] 

2326 if len(errmsg): 

2327 errmsg.insert(0, "Some errors were detected !") 

2328 errmsg = "\n".join(errmsg) 

2329 # Raise an exception ? 

2330 if invalid_raise: 

2331 raise ValueError(errmsg) 

2332 # Issue a warning ? 

2333 else: 

2334 warnings.warn(errmsg, ConversionWarning, stacklevel=2) 

2335 

2336 # Strip the last skip_footer data 

2337 if skip_footer > 0: 

2338 rows = rows[:-skip_footer] 

2339 if usemask: 

2340 masks = masks[:-skip_footer] 

2341 

2342 # Convert each value according to the converter: 

2343 # We want to modify the list in place to avoid creating a new one... 

2344 if loose: 

2345 rows = list( 

2346 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] 

2347 for (i, conv) in enumerate(converters)])) 

2348 else: 

2349 rows = list( 

2350 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] 

2351 for (i, conv) in enumerate(converters)])) 

2352 

2353 # Reset the dtype 

2354 data = rows 

2355 if dtype is None: 

2356 # Get the dtypes from the types of the converters 

2357 column_types = [conv.type for conv in converters] 

2358 # Find the columns with strings... 

2359 strcolidx = [i for (i, v) in enumerate(column_types) 

2360 if v == np.str_] 

2361 

2362 if byte_converters and strcolidx: 

2363 # convert strings back to bytes for backward compatibility 

2364 warnings.warn( 

2365 "Reading unicode strings without specifying the encoding " 

2366 "argument is deprecated. Set the encoding, use None for the " 

2367 "system default.", 

2368 np.exceptions.VisibleDeprecationWarning, stacklevel=2) 

2369 

2370 def encode_unicode_cols(row_tup): 

2371 row = list(row_tup) 

2372 for i in strcolidx: 

2373 row[i] = row[i].encode('latin1') 

2374 return tuple(row) 

2375 

2376 try: 

2377 data = [encode_unicode_cols(r) for r in data] 

2378 except UnicodeEncodeError: 

2379 pass 

2380 else: 

2381 for i in strcolidx: 

2382 column_types[i] = np.bytes_ 

2383 

2384 # Update string types to be the right length 

2385 sized_column_types = column_types[:] 

2386 for i, col_type in enumerate(column_types): 

2387 if np.issubdtype(col_type, np.character): 

2388 n_chars = max(len(row[i]) for row in data) 

2389 sized_column_types[i] = (col_type, n_chars) 

2390 

2391 if names is None: 

2392 # If the dtype is uniform (before sizing strings) 

2393 base = { 

2394 c_type 

2395 for c, c_type in zip(converters, column_types) 

2396 if c._checked} 

2397 if len(base) == 1: 

2398 uniform_type, = base 

2399 (ddtype, mdtype) = (uniform_type, bool) 

2400 else: 

2401 ddtype = [(defaultfmt % i, dt) 

2402 for (i, dt) in enumerate(sized_column_types)] 

2403 if usemask: 

2404 mdtype = [(defaultfmt % i, bool) 

2405 for (i, dt) in enumerate(sized_column_types)] 

2406 else: 

2407 ddtype = list(zip(names, sized_column_types)) 

2408 mdtype = list(zip(names, [bool] * len(sized_column_types))) 

2409 output = np.array(data, dtype=ddtype) 

2410 if usemask: 

2411 outputmask = np.array(masks, dtype=mdtype) 

2412 else: 

2413 # Overwrite the initial dtype names if needed 

2414 if names and dtype.names is not None: 

2415 dtype.names = names 

2416 # Case 1. We have a structured type 

2417 if len(dtype_flat) > 1: 

2418 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] 

2419 # First, create the array using a flattened dtype: 

2420 # [('a', int), ('b1', int), ('b2', float)] 

2421 # Then, view the array using the specified dtype. 

2422 if 'O' in (_.char for _ in dtype_flat): 

2423 if has_nested_fields(dtype): 

2424 raise NotImplementedError( 

2425 "Nested fields involving objects are not supported...") 

2426 else: 

2427 output = np.array(data, dtype=dtype) 

2428 else: 

2429 rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) 

2430 output = rows.view(dtype) 

2431 # Now, process the rowmasks the same way 

2432 if usemask: 

2433 rowmasks = np.array( 

2434 masks, dtype=np.dtype([('', bool) for t in dtype_flat])) 

2435 # Construct the new dtype 

2436 mdtype = make_mask_descr(dtype) 

2437 outputmask = rowmasks.view(mdtype) 

2438 # Case #2. We have a basic dtype 

2439 else: 

2440 # We used some user-defined converters 

2441 if user_converters: 

2442 ishomogeneous = True 

2443 descr = [] 

2444 for i, ttype in enumerate([conv.type for conv in converters]): 

2445 # Keep the dtype of the current converter 

2446 if i in user_converters: 

2447 ishomogeneous &= (ttype == dtype.type) 

2448 if np.issubdtype(ttype, np.character): 

2449 ttype = (ttype, max(len(row[i]) for row in data)) 

2450 descr.append(('', ttype)) 

2451 else: 

2452 descr.append(('', dtype)) 

2453 # So we changed the dtype ? 

2454 if not ishomogeneous: 

2455 # We have more than one field 

2456 if len(descr) > 1: 

2457 dtype = np.dtype(descr) 

2458 # We have only one field: drop the name if not needed. 

2459 else: 

2460 dtype = np.dtype(ttype) 

2461 # 

2462 output = np.array(data, dtype) 

2463 if usemask: 

2464 if dtype.names is not None: 

2465 mdtype = [(_, bool) for _ in dtype.names] 

2466 else: 

2467 mdtype = bool 

2468 outputmask = np.array(masks, dtype=mdtype) 

2469 # Try to take care of the missing data we missed 

2470 names = output.dtype.names 

2471 if usemask and names: 

2472 for (name, conv) in zip(names, converters): 

2473 missing_values = [conv(_) for _ in conv.missing_values 

2474 if _ != ''] 

2475 for mval in missing_values: 

2476 outputmask[name] |= (output[name] == mval) 

2477 # Construct the final array 

2478 if usemask: 

2479 output = output.view(MaskedArray) 

2480 output._mask = outputmask 

2481 

2482 output = _ensure_ndmin_ndarray(output, ndmin=ndmin) 

2483 

2484 if unpack: 

2485 if names is None: 

2486 return output.T 

2487 elif len(names) == 1: 

2488 # squeeze single-name dtypes too 

2489 return output[names[0]] 

2490 else: 

2491 # For structured arrays with multiple fields, 

2492 # return an array for each field. 

2493 return [output[field] for field in names] 

2494 return output 

2495 

2496 

2497_genfromtxt_with_like = array_function_dispatch()(genfromtxt) 

2498 

2499 

2500def recfromtxt(fname, **kwargs): 

2501 """ 

2502 Load ASCII data from a file and return it in a record array. 

2503 

2504 If ``usemask=False`` a standard `recarray` is returned, 

2505 if ``usemask=True`` a MaskedRecords array is returned. 

2506 

2507 .. deprecated:: 2.0 

2508 Use `numpy.genfromtxt` instead. 

2509 

2510 Parameters 

2511 ---------- 

2512 fname, kwargs : For a description of input parameters, see `genfromtxt`. 

2513 

2514 See Also 

2515 -------- 

2516 numpy.genfromtxt : generic function 

2517 

2518 Notes 

2519 ----- 

2520 By default, `dtype` is None, which means that the data-type of the output 

2521 array will be determined from the data. 

2522 

2523 """ 

2524 

2525 # Deprecated in NumPy 2.0, 2023-07-11 

2526 warnings.warn( 

2527 "`recfromtxt` is deprecated, " 

2528 "use `numpy.genfromtxt` instead." 

2529 "(deprecated in NumPy 2.0)", 

2530 DeprecationWarning, 

2531 stacklevel=2 

2532 ) 

2533 

2534 kwargs.setdefault("dtype", None) 

2535 usemask = kwargs.get('usemask', False) 

2536 output = genfromtxt(fname, **kwargs) 

2537 if usemask: 

2538 from numpy.ma.mrecords import MaskedRecords 

2539 output = output.view(MaskedRecords) 

2540 else: 

2541 output = output.view(np.recarray) 

2542 return output 

2543 

2544 

2545def recfromcsv(fname, **kwargs): 

2546 """ 

2547 Load ASCII data stored in a comma-separated file. 

2548 

2549 The returned array is a record array (if ``usemask=False``, see 

2550 `recarray`) or a masked record array (if ``usemask=True``, 

2551 see `ma.mrecords.MaskedRecords`). 

2552 

2553 .. deprecated:: 2.0 

2554 Use `numpy.genfromtxt` with comma as `delimiter` instead. 

2555 

2556 Parameters 

2557 ---------- 

2558 fname, kwargs : For a description of input parameters, see `genfromtxt`. 

2559 

2560 See Also 

2561 -------- 

2562 numpy.genfromtxt : generic function to load ASCII data. 

2563 

2564 Notes 

2565 ----- 

2566 By default, `dtype` is None, which means that the data-type of the output 

2567 array will be determined from the data. 

2568 

2569 """ 

2570 

2571 # Deprecated in NumPy 2.0, 2023-07-11 

2572 warnings.warn( 

2573 "`recfromcsv` is deprecated, " 

2574 "use `numpy.genfromtxt` with comma as `delimiter` instead. " 

2575 "(deprecated in NumPy 2.0)", 

2576 DeprecationWarning, 

2577 stacklevel=2 

2578 ) 

2579 

2580 # Set default kwargs for genfromtxt as relevant to csv import. 

2581 kwargs.setdefault("case_sensitive", "lower") 

2582 kwargs.setdefault("names", True) 

2583 kwargs.setdefault("delimiter", ",") 

2584 kwargs.setdefault("dtype", None) 

2585 output = genfromtxt(fname, **kwargs) 

2586 

2587 usemask = kwargs.get("usemask", False) 

2588 if usemask: 

2589 from numpy.ma.mrecords import MaskedRecords 

2590 output = output.view(MaskedRecords) 

2591 else: 

2592 output = output.view(np.recarray) 

2593 return output