Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/joblib/numpy_pickle.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

238 statements  

1"""Utilities for fast persistence of big data, with optional compression.""" 

2 

3# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> 

4# Copyright (c) 2009 Gael Varoquaux 

5# License: BSD Style, 3 clauses. 

6 

7import io 

8import os 

9import pickle 

10import warnings 

11from pathlib import Path 

12 

13from .backports import make_memmap 

14from .compressor import ( 

15 _COMPRESSORS, 

16 LZ4_NOT_INSTALLED_ERROR, 

17 BinaryZlibFile, 

18 BZ2CompressorWrapper, 

19 GzipCompressorWrapper, 

20 LZ4CompressorWrapper, 

21 LZMACompressorWrapper, 

22 XZCompressorWrapper, 

23 ZlibCompressorWrapper, 

24 lz4, 

25 register_compressor, 

26) 

27 

28# For compatibility with old versions of joblib, we need ZNDArrayWrapper 

29# to be visible in the current namespace. 

30from .numpy_pickle_compat import ( 

31 NDArrayWrapper, 

32 ZNDArrayWrapper, # noqa: F401 

33 load_compatibility, 

34) 

35from .numpy_pickle_utils import ( 

36 BUFFER_SIZE, 

37 Pickler, 

38 Unpickler, 

39 _ensure_native_byte_order, 

40 _read_bytes, 

41 _reconstruct, 

42 _validate_fileobject_and_memmap, 

43 _write_fileobject, 

44) 

45 

46# Register supported compressors 

47register_compressor("zlib", ZlibCompressorWrapper()) 

48register_compressor("gzip", GzipCompressorWrapper()) 

49register_compressor("bz2", BZ2CompressorWrapper()) 

50register_compressor("lzma", LZMACompressorWrapper()) 

51register_compressor("xz", XZCompressorWrapper()) 

52register_compressor("lz4", LZ4CompressorWrapper()) 

53 

54 

55############################################################################### 

56# Utility objects for persistence. 

57 

58# For convenience, 16 bytes are used to be sure to cover all the possible 

59# dtypes' alignments. For reference, see: 

60# https://numpy.org/devdocs/dev/alignment.html 

61NUMPY_ARRAY_ALIGNMENT_BYTES = 16 

62 

63 

64class NumpyArrayWrapper(object): 

65 """An object to be persisted instead of numpy arrays. 

66 

67 This object is used to hack into the pickle machinery and read numpy 

68 array data from our custom persistence format. 

69 More precisely, this object is used for: 

70 * carrying the information of the persisted array: subclass, shape, order, 

71 dtype. Those ndarray metadata are used to correctly reconstruct the array 

72 with low level numpy functions. 

73 * determining if memmap is allowed on the array. 

74 * reading the array bytes from a file. 

75 * reading the array using memorymap from a file. 

76 * writing the array bytes to a file. 

77 

78 Attributes 

79 ---------- 

80 subclass: numpy.ndarray subclass 

81 Determine the subclass of the wrapped array. 

82 shape: numpy.ndarray shape 

83 Determine the shape of the wrapped array. 

84 order: {'C', 'F'} 

85 Determine the order of wrapped array data. 'C' is for C order, 'F' is 

86 for fortran order. 

87 dtype: numpy.ndarray dtype 

88 Determine the data type of the wrapped array. 

89 allow_mmap: bool 

90 Determine if memory mapping is allowed on the wrapped array. 

91 Default: False. 

92 """ 

93 

94 def __init__( 

95 self, 

96 subclass, 

97 shape, 

98 order, 

99 dtype, 

100 allow_mmap=False, 

101 numpy_array_alignment_bytes=NUMPY_ARRAY_ALIGNMENT_BYTES, 

102 ): 

103 """Constructor. Store the useful information for later.""" 

104 self.subclass = subclass 

105 self.shape = shape 

106 self.order = order 

107 self.dtype = dtype 

108 self.allow_mmap = allow_mmap 

109 # We make numpy_array_alignment_bytes an instance attribute to allow us 

110 # to change our mind about the default alignment and still load the old 

111 # pickles (with the previous alignment) correctly 

112 self.numpy_array_alignment_bytes = numpy_array_alignment_bytes 

113 

114 def safe_get_numpy_array_alignment_bytes(self): 

115 # NumpyArrayWrapper instances loaded from joblib <= 1.1 pickles don't 

116 # have an numpy_array_alignment_bytes attribute 

117 return getattr(self, "numpy_array_alignment_bytes", None) 

118 

119 def write_array(self, array, pickler): 

120 """Write array bytes to pickler file handle. 

121 

122 This function is an adaptation of the numpy write_array function 

123 available in version 1.10.1 in numpy/lib/format.py. 

124 """ 

125 # Set buffer size to 16 MiB to hide the Python loop overhead. 

126 buffersize = max(16 * 1024**2 // array.itemsize, 1) 

127 if array.dtype.hasobject: 

128 # We contain Python objects so we cannot write out the data 

129 # directly. Instead, we will pickle it out with version 5 of the 

130 # pickle protocol. 

131 pickle.dump(array, pickler.file_handle, protocol=5) 

132 else: 

133 numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes() 

134 if numpy_array_alignment_bytes is not None: 

135 current_pos = pickler.file_handle.tell() 

136 pos_after_padding_byte = current_pos + 1 

137 padding_length = numpy_array_alignment_bytes - ( 

138 pos_after_padding_byte % numpy_array_alignment_bytes 

139 ) 

140 # A single byte is written that contains the padding length in 

141 # bytes 

142 padding_length_byte = int.to_bytes( 

143 padding_length, length=1, byteorder="little" 

144 ) 

145 pickler.file_handle.write(padding_length_byte) 

146 

147 if padding_length != 0: 

148 padding = b"\xff" * padding_length 

149 pickler.file_handle.write(padding) 

150 

151 for chunk in pickler.np.nditer( 

152 array, 

153 flags=["external_loop", "buffered", "zerosize_ok"], 

154 buffersize=buffersize, 

155 order=self.order, 

156 ): 

157 pickler.file_handle.write(chunk.tobytes("C")) 

158 

159 def read_array(self, unpickler, ensure_native_byte_order): 

160 """Read array from unpickler file handle. 

161 

162 This function is an adaptation of the numpy read_array function 

163 available in version 1.10.1 in numpy/lib/format.py. 

164 """ 

165 if len(self.shape) == 0: 

166 count = 1 

167 else: 

168 # joblib issue #859: we cast the elements of self.shape to int64 to 

169 # prevent a potential overflow when computing their product. 

170 shape_int64 = [unpickler.np.int64(x) for x in self.shape] 

171 count = unpickler.np.multiply.reduce(shape_int64) 

172 # Now read the actual data. 

173 if self.dtype.hasobject: 

174 # The array contained Python objects. We need to unpickle the data. 

175 array = pickle.load(unpickler.file_handle) 

176 else: 

177 numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes() 

178 if numpy_array_alignment_bytes is not None: 

179 padding_byte = unpickler.file_handle.read(1) 

180 padding_length = int.from_bytes(padding_byte, byteorder="little") 

181 if padding_length != 0: 

182 unpickler.file_handle.read(padding_length) 

183 

184 # This is not a real file. We have to read it the 

185 # memory-intensive way. 

186 # crc32 module fails on reads greater than 2 ** 32 bytes, 

187 # breaking large reads from gzip streams. Chunk reads to 

188 # BUFFER_SIZE bytes to avoid issue and reduce memory overhead 

189 # of the read. In non-chunked case count < max_read_count, so 

190 # only one read is performed. 

191 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, self.dtype.itemsize) 

192 

193 array = unpickler.np.empty(count, dtype=self.dtype) 

194 for i in range(0, count, max_read_count): 

195 read_count = min(max_read_count, count - i) 

196 read_size = int(read_count * self.dtype.itemsize) 

197 data = _read_bytes(unpickler.file_handle, read_size, "array data") 

198 array[i : i + read_count] = unpickler.np.frombuffer( 

199 data, dtype=self.dtype, count=read_count 

200 ) 

201 del data 

202 

203 if self.order == "F": 

204 array.shape = self.shape[::-1] 

205 array = array.transpose() 

206 else: 

207 array.shape = self.shape 

208 

209 if ensure_native_byte_order: 

210 # Detect byte order mismatch and swap as needed. 

211 array = _ensure_native_byte_order(array) 

212 

213 return array 

214 

215 def read_mmap(self, unpickler): 

216 """Read an array using numpy memmap.""" 

217 current_pos = unpickler.file_handle.tell() 

218 offset = current_pos 

219 numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes() 

220 

221 if numpy_array_alignment_bytes is not None: 

222 padding_byte = unpickler.file_handle.read(1) 

223 padding_length = int.from_bytes(padding_byte, byteorder="little") 

224 # + 1 is for the padding byte 

225 offset += padding_length + 1 

226 

227 if unpickler.mmap_mode == "w+": 

228 unpickler.mmap_mode = "r+" 

229 

230 marray = make_memmap( 

231 unpickler.filename, 

232 dtype=self.dtype, 

233 shape=self.shape, 

234 order=self.order, 

235 mode=unpickler.mmap_mode, 

236 offset=offset, 

237 ) 

238 # update the offset so that it corresponds to the end of the read array 

239 unpickler.file_handle.seek(offset + marray.nbytes) 

240 

241 if ( 

242 numpy_array_alignment_bytes is None 

243 and current_pos % NUMPY_ARRAY_ALIGNMENT_BYTES != 0 

244 ): 

245 message = ( 

246 f"The memmapped array {marray} loaded from the file " 

247 f"{unpickler.file_handle.name} is not byte aligned. " 

248 "This may cause segmentation faults if this memmapped array " 

249 "is used in some libraries like BLAS or PyTorch. " 

250 "To get rid of this warning, regenerate your pickle file " 

251 "with joblib >= 1.2.0. " 

252 "See https://github.com/joblib/joblib/issues/563 " 

253 "for more details" 

254 ) 

255 warnings.warn(message) 

256 

257 return marray 

258 

259 def read(self, unpickler, ensure_native_byte_order): 

260 """Read the array corresponding to this wrapper. 

261 

262 Use the unpickler to get all information to correctly read the array. 

263 

264 Parameters 

265 ---------- 

266 unpickler: NumpyUnpickler 

267 ensure_native_byte_order: bool 

268 If true, coerce the array to use the native endianness of the 

269 host system. 

270 

271 Returns 

272 ------- 

273 array: numpy.ndarray 

274 

275 """ 

276 # When requested, only use memmap mode if allowed. 

277 if unpickler.mmap_mode is not None and self.allow_mmap: 

278 assert not ensure_native_byte_order, ( 

279 "Memmaps cannot be coerced to a given byte order, " 

280 "this code path is impossible." 

281 ) 

282 array = self.read_mmap(unpickler) 

283 else: 

284 array = self.read_array(unpickler, ensure_native_byte_order) 

285 

286 # Manage array subclass case 

287 if hasattr(array, "__array_prepare__") and self.subclass not in ( 

288 unpickler.np.ndarray, 

289 unpickler.np.memmap, 

290 ): 

291 # We need to reconstruct another subclass 

292 new_array = _reconstruct(self.subclass, (0,), "b") 

293 return new_array.__array_prepare__(array) 

294 else: 

295 return array 

296 

297 

298############################################################################### 

299# Pickler classes 

300 

301 

302class NumpyPickler(Pickler): 

303 """A pickler to persist big data efficiently. 

304 

305 The main features of this object are: 

306 * persistence of numpy arrays in a single file. 

307 * optional compression with a special care on avoiding memory copies. 

308 

309 Attributes 

310 ---------- 

311 fp: file 

312 File object handle used for serializing the input object. 

313 protocol: int, optional 

314 Pickle protocol used. Default is pickle.DEFAULT_PROTOCOL. 

315 """ 

316 

317 dispatch = Pickler.dispatch.copy() 

318 

319 def __init__(self, fp, protocol=None): 

320 self.file_handle = fp 

321 self.buffered = isinstance(self.file_handle, BinaryZlibFile) 

322 

323 # By default we want a pickle protocol that only changes with 

324 # the major python version and not the minor one 

325 if protocol is None: 

326 protocol = pickle.DEFAULT_PROTOCOL 

327 

328 Pickler.__init__(self, self.file_handle, protocol=protocol) 

329 # delayed import of numpy, to avoid tight coupling 

330 try: 

331 import numpy as np 

332 except ImportError: 

333 np = None 

334 self.np = np 

335 

336 def _create_array_wrapper(self, array): 

337 """Create and returns a numpy array wrapper from a numpy array.""" 

338 order = ( 

339 "F" if (array.flags.f_contiguous and not array.flags.c_contiguous) else "C" 

340 ) 

341 allow_mmap = not self.buffered and not array.dtype.hasobject 

342 

343 kwargs = {} 

344 try: 

345 self.file_handle.tell() 

346 except io.UnsupportedOperation: 

347 kwargs = {"numpy_array_alignment_bytes": None} 

348 

349 wrapper = NumpyArrayWrapper( 

350 type(array), 

351 array.shape, 

352 order, 

353 array.dtype, 

354 allow_mmap=allow_mmap, 

355 **kwargs, 

356 ) 

357 

358 return wrapper 

359 

360 def save(self, obj): 

361 """Subclass the Pickler `save` method. 

362 

363 This is a total abuse of the Pickler class in order to use the numpy 

364 persistence function `save` instead of the default pickle 

365 implementation. The numpy array is replaced by a custom wrapper in the 

366 pickle persistence stack and the serialized array is written right 

367 after in the file. Warning: the file produced does not follow the 

368 pickle format. As such it can not be read with `pickle.load`. 

369 """ 

370 if self.np is not None and type(obj) in ( 

371 self.np.ndarray, 

372 self.np.matrix, 

373 self.np.memmap, 

374 ): 

375 if type(obj) is self.np.memmap: 

376 # Pickling doesn't work with memmapped arrays 

377 obj = self.np.asanyarray(obj) 

378 

379 # The array wrapper is pickled instead of the real array. 

380 wrapper = self._create_array_wrapper(obj) 

381 Pickler.save(self, wrapper) 

382 

383 # A framer was introduced with pickle protocol 4 and we want to 

384 # ensure the wrapper object is written before the numpy array 

385 # buffer in the pickle file. 

386 # See https://www.python.org/dev/peps/pep-3154/#framing to get 

387 # more information on the framer behavior. 

388 if self.proto >= 4: 

389 self.framer.commit_frame(force=True) 

390 

391 # And then array bytes are written right after the wrapper. 

392 wrapper.write_array(obj, self) 

393 return 

394 

395 return Pickler.save(self, obj) 

396 

397 

398class NumpyUnpickler(Unpickler): 

399 """A subclass of the Unpickler to unpickle our numpy pickles. 

400 

401 Attributes 

402 ---------- 

403 mmap_mode: str 

404 The memorymap mode to use for reading numpy arrays. 

405 file_handle: file_like 

406 File object to unpickle from. 

407 ensure_native_byte_order: bool 

408 If True, coerce the array to use the native endianness of the 

409 host system. 

410 filename: str 

411 Name of the file to unpickle from. It should correspond to file_handle. 

412 This parameter is required when using mmap_mode. 

413 np: module 

414 Reference to numpy module if numpy is installed else None. 

415 

416 """ 

417 

418 dispatch = Unpickler.dispatch.copy() 

419 

420 def __init__(self, filename, file_handle, ensure_native_byte_order, mmap_mode=None): 

421 # The next line is for backward compatibility with pickle generated 

422 # with joblib versions less than 0.10. 

423 self._dirname = os.path.dirname(filename) 

424 

425 self.mmap_mode = mmap_mode 

426 self.file_handle = file_handle 

427 # filename is required for numpy mmap mode. 

428 self.filename = filename 

429 self.compat_mode = False 

430 self.ensure_native_byte_order = ensure_native_byte_order 

431 Unpickler.__init__(self, self.file_handle) 

432 try: 

433 import numpy as np 

434 except ImportError: 

435 np = None 

436 self.np = np 

437 

438 def load_build(self): 

439 """Called to set the state of a newly created object. 

440 

441 We capture it to replace our place-holder objects, NDArrayWrapper or 

442 NumpyArrayWrapper, by the array we are interested in. We 

443 replace them directly in the stack of pickler. 

444 NDArrayWrapper is used for backward compatibility with joblib <= 0.9. 

445 """ 

446 Unpickler.load_build(self) 

447 

448 # For backward compatibility, we support NDArrayWrapper objects. 

449 if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)): 

450 if self.np is None: 

451 raise ImportError( 

452 "Trying to unpickle an ndarray, but numpy didn't import correctly" 

453 ) 

454 array_wrapper = self.stack.pop() 

455 # If any NDArrayWrapper is found, we switch to compatibility mode, 

456 # this will be used to raise a DeprecationWarning to the user at 

457 # the end of the unpickling. 

458 if isinstance(array_wrapper, NDArrayWrapper): 

459 self.compat_mode = True 

460 _array_payload = array_wrapper.read(self) 

461 else: 

462 _array_payload = array_wrapper.read(self, self.ensure_native_byte_order) 

463 

464 self.stack.append(_array_payload) 

465 

466 # Be careful to register our new method. 

467 dispatch[pickle.BUILD[0]] = load_build 

468 

469 

470############################################################################### 

471# Utility functions 

472 

473 

474def dump(value, filename, compress=0, protocol=None): 

475 """Persist an arbitrary Python object into one file. 

476 

477 Read more in the :ref:`User Guide <persistence>`. 

478 

479 Parameters 

480 ---------- 

481 value: any Python object 

482 The object to store to disk. 

483 filename: str, pathlib.Path, or file object. 

484 The file object or path of the file in which it is to be stored. 

485 The compression method corresponding to one of the supported filename 

486 extensions ('.z', '.gz', '.bz2', '.xz' or '.lzma') will be used 

487 automatically. 

488 compress: int from 0 to 9 or bool or 2-tuple, optional 

489 Optional compression level for the data. 0 or False is no compression. 

490 Higher value means more compression, but also slower read and 

491 write times. Using a value of 3 is often a good compromise. 

492 See the notes for more details. 

493 If compress is True, the compression level used is 3. 

494 If compress is a 2-tuple, the first element must correspond to a string 

495 between supported compressors (e.g 'zlib', 'gzip', 'bz2', 'lzma' 

496 'xz'), the second element must be an integer from 0 to 9, corresponding 

497 to the compression level. 

498 protocol: int, optional 

499 Pickle protocol, see pickle.dump documentation for more details. 

500 

501 Returns 

502 ------- 

503 filenames: list of strings 

504 The list of file names in which the data is stored. If 

505 compress is false, each array is stored in a different file. 

506 

507 See Also 

508 -------- 

509 joblib.load : corresponding loader 

510 

511 Notes 

512 ----- 

513 Memmapping on load cannot be used for compressed files. Thus 

514 using compression can significantly slow down loading. In 

515 addition, compressed files take up extra memory during 

516 dump and load. 

517 

518 """ 

519 

520 if Path is not None and isinstance(filename, Path): 

521 filename = str(filename) 

522 

523 is_filename = isinstance(filename, str) 

524 is_fileobj = hasattr(filename, "write") 

525 

526 compress_method = "zlib" # zlib is the default compression method. 

527 if compress is True: 

528 # By default, if compress is enabled, we want the default compress 

529 # level of the compressor. 

530 compress_level = None 

531 elif isinstance(compress, tuple): 

532 # a 2-tuple was set in compress 

533 if len(compress) != 2: 

534 raise ValueError( 

535 "Compress argument tuple should contain exactly 2 elements: " 

536 "(compress method, compress level), you passed {}".format(compress) 

537 ) 

538 compress_method, compress_level = compress 

539 elif isinstance(compress, str): 

540 compress_method = compress 

541 compress_level = None # Use default compress level 

542 compress = (compress_method, compress_level) 

543 else: 

544 compress_level = compress 

545 

546 if compress_method == "lz4" and lz4 is None: 

547 raise ValueError(LZ4_NOT_INSTALLED_ERROR) 

548 

549 if ( 

550 compress_level is not None 

551 and compress_level is not False 

552 and compress_level not in range(10) 

553 ): 

554 # Raising an error if a non valid compress level is given. 

555 raise ValueError( 

556 'Non valid compress level given: "{}". Possible values are {}.'.format( 

557 compress_level, list(range(10)) 

558 ) 

559 ) 

560 

561 if compress_method not in _COMPRESSORS: 

562 # Raising an error if an unsupported compression method is given. 

563 raise ValueError( 

564 'Non valid compression method given: "{}". Possible values are {}.'.format( 

565 compress_method, _COMPRESSORS 

566 ) 

567 ) 

568 

569 if not is_filename and not is_fileobj: 

570 # People keep inverting arguments, and the resulting error is 

571 # incomprehensible 

572 raise ValueError( 

573 "Second argument should be a filename or a file-like object, " 

574 "%s (type %s) was given." % (filename, type(filename)) 

575 ) 

576 

577 if is_filename and not isinstance(compress, tuple): 

578 # In case no explicit compression was requested using both compression 

579 # method and level in a tuple and the filename has an explicit 

580 # extension, we select the corresponding compressor. 

581 

582 # unset the variable to be sure no compression level is set afterwards. 

583 compress_method = None 

584 for name, compressor in _COMPRESSORS.items(): 

585 if filename.endswith(compressor.extension): 

586 compress_method = name 

587 

588 if compress_method in _COMPRESSORS and compress_level == 0: 

589 # we choose the default compress_level in case it was not given 

590 # as an argument (using compress). 

591 compress_level = None 

592 

593 if compress_level != 0: 

594 with _write_fileobject( 

595 filename, compress=(compress_method, compress_level) 

596 ) as f: 

597 NumpyPickler(f, protocol=protocol).dump(value) 

598 elif is_filename: 

599 with open(filename, "wb") as f: 

600 NumpyPickler(f, protocol=protocol).dump(value) 

601 else: 

602 NumpyPickler(filename, protocol=protocol).dump(value) 

603 

604 # If the target container is a file object, nothing is returned. 

605 if is_fileobj: 

606 return 

607 

608 # For compatibility, the list of created filenames (e.g with one element 

609 # after 0.10.0) is returned by default. 

610 return [filename] 

611 

612 

613def _unpickle(fobj, ensure_native_byte_order, filename="", mmap_mode=None): 

614 """Internal unpickling function.""" 

615 # We are careful to open the file handle early and keep it open to 

616 # avoid race-conditions on renames. 

617 # That said, if data is stored in companion files, which can be 

618 # the case with the old persistence format, moving the directory 

619 # will create a race when joblib tries to access the companion 

620 # files. 

621 unpickler = NumpyUnpickler( 

622 filename, fobj, ensure_native_byte_order, mmap_mode=mmap_mode 

623 ) 

624 obj = None 

625 try: 

626 obj = unpickler.load() 

627 if unpickler.compat_mode: 

628 warnings.warn( 

629 "The file '%s' has been generated with a " 

630 "joblib version less than 0.10. " 

631 "Please regenerate this pickle file." % filename, 

632 DeprecationWarning, 

633 stacklevel=3, 

634 ) 

635 except UnicodeDecodeError as exc: 

636 # More user-friendly error message 

637 new_exc = ValueError( 

638 "You may be trying to read with " 

639 "python 3 a joblib pickle generated with python 2. " 

640 "This feature is not supported by joblib." 

641 ) 

642 new_exc.__cause__ = exc 

643 raise new_exc 

644 return obj 

645 

646 

647def load_temporary_memmap(filename, mmap_mode, unlink_on_gc_collect): 

648 from ._memmapping_reducer import JOBLIB_MMAPS, add_maybe_unlink_finalizer 

649 

650 with open(filename, "rb") as f: 

651 with _validate_fileobject_and_memmap(f, filename, mmap_mode) as ( 

652 fobj, 

653 validated_mmap_mode, 

654 ): 

655 # Memmap are used for interprocess communication, which should 

656 # keep the objects untouched. We pass `ensure_native_byte_order=False` 

657 # to remain consistent with the loading behavior of non-memmaped arrays 

658 # in workers, where the byte order is preserved. 

659 # Note that we do not implement endianness change for memmaps, as this 

660 # would result in inconsistent behavior. 

661 obj = _unpickle( 

662 fobj, 

663 ensure_native_byte_order=False, 

664 filename=filename, 

665 mmap_mode=validated_mmap_mode, 

666 ) 

667 

668 JOBLIB_MMAPS.add(obj.filename) 

669 if unlink_on_gc_collect: 

670 add_maybe_unlink_finalizer(obj) 

671 return obj 

672 

673 

674def load(filename, mmap_mode=None, ensure_native_byte_order="auto"): 

675 """Reconstruct a Python object from a file persisted with joblib.dump. 

676 

677 Read more in the :ref:`User Guide <persistence>`. 

678 

679 WARNING: joblib.load relies on the pickle module and can therefore 

680 execute arbitrary Python code. It should therefore never be used 

681 to load files from untrusted sources. 

682 

683 Parameters 

684 ---------- 

685 filename: str, pathlib.Path, or file object. 

686 The file object or path of the file from which to load the object 

687 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional 

688 If not None, the arrays are memory-mapped from the disk. This 

689 mode has no effect for compressed files. Note that in this 

690 case the reconstructed object might no longer match exactly 

691 the originally pickled object. 

692 ensure_native_byte_order: bool, or 'auto', default=='auto' 

693 If True, ensures that the byte order of the loaded arrays matches the 

694 native byte ordering (or _endianness_) of the host system. This is not 

695 compatible with memory-mapped arrays and using non-null `mmap_mode` 

696 parameter at the same time will raise an error. The default 'auto' 

697 parameter is equivalent to True if `mmap_mode` is None, else False. 

698 

699 Returns 

700 ------- 

701 result: any Python object 

702 The object stored in the file. 

703 

704 See Also 

705 -------- 

706 joblib.dump : function to save an object 

707 

708 Notes 

709 ----- 

710 

711 This function can load numpy array files saved separately during the 

712 dump. If the mmap_mode argument is given, it is passed to np.load and 

713 arrays are loaded as memmaps. As a consequence, the reconstructed 

714 object might not match the original pickled object. Note that if the 

715 file was saved with compression, the arrays cannot be memmapped. 

716 """ 

717 if ensure_native_byte_order == "auto": 

718 ensure_native_byte_order = mmap_mode is None 

719 

720 if ensure_native_byte_order and mmap_mode is not None: 

721 raise ValueError( 

722 "Native byte ordering can only be enforced if 'mmap_mode' parameter " 

723 f"is set to None, but got 'mmap_mode={mmap_mode}' instead." 

724 ) 

725 

726 if Path is not None and isinstance(filename, Path): 

727 filename = str(filename) 

728 

729 if hasattr(filename, "read"): 

730 fobj = filename 

731 filename = getattr(fobj, "name", "") 

732 with _validate_fileobject_and_memmap(fobj, filename, mmap_mode) as (fobj, _): 

733 obj = _unpickle(fobj, ensure_native_byte_order=ensure_native_byte_order) 

734 else: 

735 with open(filename, "rb") as f: 

736 with _validate_fileobject_and_memmap(f, filename, mmap_mode) as ( 

737 fobj, 

738 validated_mmap_mode, 

739 ): 

740 if isinstance(fobj, str): 

741 # if the returned file object is a string, this means we 

742 # try to load a pickle file generated with an version of 

743 # Joblib so we load it with joblib compatibility function. 

744 return load_compatibility(fobj) 

745 

746 # A memory-mapped array has to be mapped with the endianness 

747 # it has been written with. Other arrays are coerced to the 

748 # native endianness of the host system. 

749 obj = _unpickle( 

750 fobj, 

751 ensure_native_byte_order=ensure_native_byte_order, 

752 filename=filename, 

753 mmap_mode=validated_mmap_mode, 

754 ) 

755 

756 return obj