Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/joblib/numpy_pickle.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

234 statements  

1"""Utilities for fast persistence of big data, with optional compression.""" 

2 

3# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> 

4# Copyright (c) 2009 Gael Varoquaux 

5# License: BSD Style, 3 clauses. 

6 

7import pickle 

8import os 

9import warnings 

10import io 

11from pathlib import Path 

12 

13from .compressor import lz4, LZ4_NOT_INSTALLED_ERROR 

14from .compressor import _COMPRESSORS, register_compressor, BinaryZlibFile 

15from .compressor import (ZlibCompressorWrapper, GzipCompressorWrapper, 

16 BZ2CompressorWrapper, LZMACompressorWrapper, 

17 XZCompressorWrapper, LZ4CompressorWrapper) 

18from .numpy_pickle_utils import Unpickler, Pickler 

19from .numpy_pickle_utils import _read_fileobject, _write_fileobject 

20from .numpy_pickle_utils import _read_bytes, BUFFER_SIZE 

21from .numpy_pickle_utils import _ensure_native_byte_order 

22from .numpy_pickle_compat import load_compatibility 

23from .numpy_pickle_compat import NDArrayWrapper 

24# For compatibility with old versions of joblib, we need ZNDArrayWrapper 

25# to be visible in the current namespace. 

26# Explicitly skipping next line from flake8 as it triggers an F401 warning 

27# which we don't care. 

28from .numpy_pickle_compat import ZNDArrayWrapper # noqa 

29from .backports import make_memmap 

30 

31# Register supported compressors 

32register_compressor('zlib', ZlibCompressorWrapper()) 

33register_compressor('gzip', GzipCompressorWrapper()) 

34register_compressor('bz2', BZ2CompressorWrapper()) 

35register_compressor('lzma', LZMACompressorWrapper()) 

36register_compressor('xz', XZCompressorWrapper()) 

37register_compressor('lz4', LZ4CompressorWrapper()) 

38 

39 

40############################################################################### 

41# Utility objects for persistence. 

42 

43# For convenience, 16 bytes are used to be sure to cover all the possible 

44# dtypes' alignments. For reference, see: 

45# https://numpy.org/devdocs/dev/alignment.html 

46NUMPY_ARRAY_ALIGNMENT_BYTES = 16 

47 

48 

49class NumpyArrayWrapper(object): 

50 """An object to be persisted instead of numpy arrays. 

51 

52 This object is used to hack into the pickle machinery and read numpy 

53 array data from our custom persistence format. 

54 More precisely, this object is used for: 

55 * carrying the information of the persisted array: subclass, shape, order, 

56 dtype. Those ndarray metadata are used to correctly reconstruct the array 

57 with low level numpy functions. 

58 * determining if memmap is allowed on the array. 

59 * reading the array bytes from a file. 

60 * reading the array using memorymap from a file. 

61 * writing the array bytes to a file. 

62 

63 Attributes 

64 ---------- 

65 subclass: numpy.ndarray subclass 

66 Determine the subclass of the wrapped array. 

67 shape: numpy.ndarray shape 

68 Determine the shape of the wrapped array. 

69 order: {'C', 'F'} 

70 Determine the order of wrapped array data. 'C' is for C order, 'F' is 

71 for fortran order. 

72 dtype: numpy.ndarray dtype 

73 Determine the data type of the wrapped array. 

74 allow_mmap: bool 

75 Determine if memory mapping is allowed on the wrapped array. 

76 Default: False. 

77 """ 

78 

79 def __init__(self, subclass, shape, order, dtype, allow_mmap=False, 

80 numpy_array_alignment_bytes=NUMPY_ARRAY_ALIGNMENT_BYTES): 

81 """Constructor. Store the useful information for later.""" 

82 self.subclass = subclass 

83 self.shape = shape 

84 self.order = order 

85 self.dtype = dtype 

86 self.allow_mmap = allow_mmap 

87 # We make numpy_array_alignment_bytes an instance attribute to allow us 

88 # to change our mind about the default alignment and still load the old 

89 # pickles (with the previous alignment) correctly 

90 self.numpy_array_alignment_bytes = numpy_array_alignment_bytes 

91 

92 def safe_get_numpy_array_alignment_bytes(self): 

93 # NumpyArrayWrapper instances loaded from joblib <= 1.1 pickles don't 

94 # have an numpy_array_alignment_bytes attribute 

95 return getattr(self, 'numpy_array_alignment_bytes', None) 

96 

97 def write_array(self, array, pickler): 

98 """Write array bytes to pickler file handle. 

99 

100 This function is an adaptation of the numpy write_array function 

101 available in version 1.10.1 in numpy/lib/format.py. 

102 """ 

103 # Set buffer size to 16 MiB to hide the Python loop overhead. 

104 buffersize = max(16 * 1024 ** 2 // array.itemsize, 1) 

105 if array.dtype.hasobject: 

106 # We contain Python objects so we cannot write out the data 

107 # directly. Instead, we will pickle it out with version 2 of the 

108 # pickle protocol. 

109 pickle.dump(array, pickler.file_handle, protocol=2) 

110 else: 

111 numpy_array_alignment_bytes = \ 

112 self.safe_get_numpy_array_alignment_bytes() 

113 if numpy_array_alignment_bytes is not None: 

114 current_pos = pickler.file_handle.tell() 

115 pos_after_padding_byte = current_pos + 1 

116 padding_length = numpy_array_alignment_bytes - ( 

117 pos_after_padding_byte % numpy_array_alignment_bytes) 

118 # A single byte is written that contains the padding length in 

119 # bytes 

120 padding_length_byte = int.to_bytes( 

121 padding_length, length=1, byteorder='little') 

122 pickler.file_handle.write(padding_length_byte) 

123 

124 if padding_length != 0: 

125 padding = b'\xff' * padding_length 

126 pickler.file_handle.write(padding) 

127 

128 for chunk in pickler.np.nditer(array, 

129 flags=['external_loop', 

130 'buffered', 

131 'zerosize_ok'], 

132 buffersize=buffersize, 

133 order=self.order): 

134 pickler.file_handle.write(chunk.tobytes('C')) 

135 

136 def read_array(self, unpickler): 

137 """Read array from unpickler file handle. 

138 

139 This function is an adaptation of the numpy read_array function 

140 available in version 1.10.1 in numpy/lib/format.py. 

141 """ 

142 if len(self.shape) == 0: 

143 count = 1 

144 else: 

145 # joblib issue #859: we cast the elements of self.shape to int64 to 

146 # prevent a potential overflow when computing their product. 

147 shape_int64 = [unpickler.np.int64(x) for x in self.shape] 

148 count = unpickler.np.multiply.reduce(shape_int64) 

149 # Now read the actual data. 

150 if self.dtype.hasobject: 

151 # The array contained Python objects. We need to unpickle the data. 

152 array = pickle.load(unpickler.file_handle) 

153 else: 

154 numpy_array_alignment_bytes = \ 

155 self.safe_get_numpy_array_alignment_bytes() 

156 if numpy_array_alignment_bytes is not None: 

157 padding_byte = unpickler.file_handle.read(1) 

158 padding_length = int.from_bytes( 

159 padding_byte, byteorder='little') 

160 if padding_length != 0: 

161 unpickler.file_handle.read(padding_length) 

162 

163 # This is not a real file. We have to read it the 

164 # memory-intensive way. 

165 # crc32 module fails on reads greater than 2 ** 32 bytes, 

166 # breaking large reads from gzip streams. Chunk reads to 

167 # BUFFER_SIZE bytes to avoid issue and reduce memory overhead 

168 # of the read. In non-chunked case count < max_read_count, so 

169 # only one read is performed. 

170 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, 

171 self.dtype.itemsize) 

172 

173 array = unpickler.np.empty(count, dtype=self.dtype) 

174 for i in range(0, count, max_read_count): 

175 read_count = min(max_read_count, count - i) 

176 read_size = int(read_count * self.dtype.itemsize) 

177 data = _read_bytes(unpickler.file_handle, 

178 read_size, "array data") 

179 array[i:i + read_count] = \ 

180 unpickler.np.frombuffer(data, dtype=self.dtype, 

181 count=read_count) 

182 del data 

183 

184 if self.order == 'F': 

185 array.shape = self.shape[::-1] 

186 array = array.transpose() 

187 else: 

188 array.shape = self.shape 

189 

190 # Detect byte order mismatch and swap as needed. 

191 return _ensure_native_byte_order(array) 

192 

193 def read_mmap(self, unpickler): 

194 """Read an array using numpy memmap.""" 

195 current_pos = unpickler.file_handle.tell() 

196 offset = current_pos 

197 numpy_array_alignment_bytes = \ 

198 self.safe_get_numpy_array_alignment_bytes() 

199 

200 if numpy_array_alignment_bytes is not None: 

201 padding_byte = unpickler.file_handle.read(1) 

202 padding_length = int.from_bytes(padding_byte, byteorder='little') 

203 # + 1 is for the padding byte 

204 offset += padding_length + 1 

205 

206 if unpickler.mmap_mode == 'w+': 

207 unpickler.mmap_mode = 'r+' 

208 

209 marray = make_memmap(unpickler.filename, 

210 dtype=self.dtype, 

211 shape=self.shape, 

212 order=self.order, 

213 mode=unpickler.mmap_mode, 

214 offset=offset) 

215 # update the offset so that it corresponds to the end of the read array 

216 unpickler.file_handle.seek(offset + marray.nbytes) 

217 

218 if (numpy_array_alignment_bytes is None and 

219 current_pos % NUMPY_ARRAY_ALIGNMENT_BYTES != 0): 

220 message = ( 

221 f'The memmapped array {marray} loaded from the file ' 

222 f'{unpickler.file_handle.name} is not byte aligned. ' 

223 'This may cause segmentation faults if this memmapped array ' 

224 'is used in some libraries like BLAS or PyTorch. ' 

225 'To get rid of this warning, regenerate your pickle file ' 

226 'with joblib >= 1.2.0. ' 

227 'See https://github.com/joblib/joblib/issues/563 ' 

228 'for more details' 

229 ) 

230 warnings.warn(message) 

231 

232 return _ensure_native_byte_order(marray) 

233 

234 def read(self, unpickler): 

235 """Read the array corresponding to this wrapper. 

236 

237 Use the unpickler to get all information to correctly read the array. 

238 

239 Parameters 

240 ---------- 

241 unpickler: NumpyUnpickler 

242 

243 Returns 

244 ------- 

245 array: numpy.ndarray 

246 

247 """ 

248 # When requested, only use memmap mode if allowed. 

249 if unpickler.mmap_mode is not None and self.allow_mmap: 

250 array = self.read_mmap(unpickler) 

251 else: 

252 array = self.read_array(unpickler) 

253 

254 # Manage array subclass case 

255 if (hasattr(array, '__array_prepare__') and 

256 self.subclass not in (unpickler.np.ndarray, 

257 unpickler.np.memmap)): 

258 # We need to reconstruct another subclass 

259 new_array = unpickler.np.core.multiarray._reconstruct( 

260 self.subclass, (0,), 'b') 

261 return new_array.__array_prepare__(array) 

262 else: 

263 return array 

264 

265############################################################################### 

266# Pickler classes 

267 

268 

269class NumpyPickler(Pickler): 

270 """A pickler to persist big data efficiently. 

271 

272 The main features of this object are: 

273 * persistence of numpy arrays in a single file. 

274 * optional compression with a special care on avoiding memory copies. 

275 

276 Attributes 

277 ---------- 

278 fp: file 

279 File object handle used for serializing the input object. 

280 protocol: int, optional 

281 Pickle protocol used. Default is pickle.DEFAULT_PROTOCOL. 

282 """ 

283 

284 dispatch = Pickler.dispatch.copy() 

285 

286 def __init__(self, fp, protocol=None): 

287 self.file_handle = fp 

288 self.buffered = isinstance(self.file_handle, BinaryZlibFile) 

289 

290 # By default we want a pickle protocol that only changes with 

291 # the major python version and not the minor one 

292 if protocol is None: 

293 protocol = pickle.DEFAULT_PROTOCOL 

294 

295 Pickler.__init__(self, self.file_handle, protocol=protocol) 

296 # delayed import of numpy, to avoid tight coupling 

297 try: 

298 import numpy as np 

299 except ImportError: 

300 np = None 

301 self.np = np 

302 

303 def _create_array_wrapper(self, array): 

304 """Create and returns a numpy array wrapper from a numpy array.""" 

305 order = 'F' if (array.flags.f_contiguous and 

306 not array.flags.c_contiguous) else 'C' 

307 allow_mmap = not self.buffered and not array.dtype.hasobject 

308 

309 kwargs = {} 

310 try: 

311 self.file_handle.tell() 

312 except io.UnsupportedOperation: 

313 kwargs = {'numpy_array_alignment_bytes': None} 

314 

315 wrapper = NumpyArrayWrapper(type(array), 

316 array.shape, order, array.dtype, 

317 allow_mmap=allow_mmap, 

318 **kwargs) 

319 

320 return wrapper 

321 

322 def save(self, obj): 

323 """Subclass the Pickler `save` method. 

324 

325 This is a total abuse of the Pickler class in order to use the numpy 

326 persistence function `save` instead of the default pickle 

327 implementation. The numpy array is replaced by a custom wrapper in the 

328 pickle persistence stack and the serialized array is written right 

329 after in the file. Warning: the file produced does not follow the 

330 pickle format. As such it can not be read with `pickle.load`. 

331 """ 

332 if self.np is not None and type(obj) in (self.np.ndarray, 

333 self.np.matrix, 

334 self.np.memmap): 

335 if type(obj) is self.np.memmap: 

336 # Pickling doesn't work with memmapped arrays 

337 obj = self.np.asanyarray(obj) 

338 

339 # The array wrapper is pickled instead of the real array. 

340 wrapper = self._create_array_wrapper(obj) 

341 Pickler.save(self, wrapper) 

342 

343 # A framer was introduced with pickle protocol 4 and we want to 

344 # ensure the wrapper object is written before the numpy array 

345 # buffer in the pickle file. 

346 # See https://www.python.org/dev/peps/pep-3154/#framing to get 

347 # more information on the framer behavior. 

348 if self.proto >= 4: 

349 self.framer.commit_frame(force=True) 

350 

351 # And then array bytes are written right after the wrapper. 

352 wrapper.write_array(obj, self) 

353 return 

354 

355 return Pickler.save(self, obj) 

356 

357 

358class NumpyUnpickler(Unpickler): 

359 """A subclass of the Unpickler to unpickle our numpy pickles. 

360 

361 Attributes 

362 ---------- 

363 mmap_mode: str 

364 The memorymap mode to use for reading numpy arrays. 

365 file_handle: file_like 

366 File object to unpickle from. 

367 filename: str 

368 Name of the file to unpickle from. It should correspond to file_handle. 

369 This parameter is required when using mmap_mode. 

370 np: module 

371 Reference to numpy module if numpy is installed else None. 

372 

373 """ 

374 

375 dispatch = Unpickler.dispatch.copy() 

376 

377 def __init__(self, filename, file_handle, mmap_mode=None): 

378 # The next line is for backward compatibility with pickle generated 

379 # with joblib versions less than 0.10. 

380 self._dirname = os.path.dirname(filename) 

381 

382 self.mmap_mode = mmap_mode 

383 self.file_handle = file_handle 

384 # filename is required for numpy mmap mode. 

385 self.filename = filename 

386 self.compat_mode = False 

387 Unpickler.__init__(self, self.file_handle) 

388 try: 

389 import numpy as np 

390 except ImportError: 

391 np = None 

392 self.np = np 

393 

394 def load_build(self): 

395 """Called to set the state of a newly created object. 

396 

397 We capture it to replace our place-holder objects, NDArrayWrapper or 

398 NumpyArrayWrapper, by the array we are interested in. We 

399 replace them directly in the stack of pickler. 

400 NDArrayWrapper is used for backward compatibility with joblib <= 0.9. 

401 """ 

402 Unpickler.load_build(self) 

403 

404 # For backward compatibility, we support NDArrayWrapper objects. 

405 if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)): 

406 if self.np is None: 

407 raise ImportError("Trying to unpickle an ndarray, " 

408 "but numpy didn't import correctly") 

409 array_wrapper = self.stack.pop() 

410 # If any NDArrayWrapper is found, we switch to compatibility mode, 

411 # this will be used to raise a DeprecationWarning to the user at 

412 # the end of the unpickling. 

413 if isinstance(array_wrapper, NDArrayWrapper): 

414 self.compat_mode = True 

415 self.stack.append(array_wrapper.read(self)) 

416 

417 # Be careful to register our new method. 

418 dispatch[pickle.BUILD[0]] = load_build 

419 

420 

421############################################################################### 

422# Utility functions 

423 

424def dump(value, filename, compress=0, protocol=None, cache_size=None): 

425 """Persist an arbitrary Python object into one file. 

426 

427 Read more in the :ref:`User Guide <persistence>`. 

428 

429 Parameters 

430 ---------- 

431 value: any Python object 

432 The object to store to disk. 

433 filename: str, pathlib.Path, or file object. 

434 The file object or path of the file in which it is to be stored. 

435 The compression method corresponding to one of the supported filename 

436 extensions ('.z', '.gz', '.bz2', '.xz' or '.lzma') will be used 

437 automatically. 

438 compress: int from 0 to 9 or bool or 2-tuple, optional 

439 Optional compression level for the data. 0 or False is no compression. 

440 Higher value means more compression, but also slower read and 

441 write times. Using a value of 3 is often a good compromise. 

442 See the notes for more details. 

443 If compress is True, the compression level used is 3. 

444 If compress is a 2-tuple, the first element must correspond to a string 

445 between supported compressors (e.g 'zlib', 'gzip', 'bz2', 'lzma' 

446 'xz'), the second element must be an integer from 0 to 9, corresponding 

447 to the compression level. 

448 protocol: int, optional 

449 Pickle protocol, see pickle.dump documentation for more details. 

450 cache_size: positive int, optional 

451 This option is deprecated in 0.10 and has no effect. 

452 

453 Returns 

454 ------- 

455 filenames: list of strings 

456 The list of file names in which the data is stored. If 

457 compress is false, each array is stored in a different file. 

458 

459 See Also 

460 -------- 

461 joblib.load : corresponding loader 

462 

463 Notes 

464 ----- 

465 Memmapping on load cannot be used for compressed files. Thus 

466 using compression can significantly slow down loading. In 

467 addition, compressed files take up extra memory during 

468 dump and load. 

469 

470 """ 

471 

472 if Path is not None and isinstance(filename, Path): 

473 filename = str(filename) 

474 

475 is_filename = isinstance(filename, str) 

476 is_fileobj = hasattr(filename, "write") 

477 

478 compress_method = 'zlib' # zlib is the default compression method. 

479 if compress is True: 

480 # By default, if compress is enabled, we want the default compress 

481 # level of the compressor. 

482 compress_level = None 

483 elif isinstance(compress, tuple): 

484 # a 2-tuple was set in compress 

485 if len(compress) != 2: 

486 raise ValueError( 

487 'Compress argument tuple should contain exactly 2 elements: ' 

488 '(compress method, compress level), you passed {}' 

489 .format(compress)) 

490 compress_method, compress_level = compress 

491 elif isinstance(compress, str): 

492 compress_method = compress 

493 compress_level = None # Use default compress level 

494 compress = (compress_method, compress_level) 

495 else: 

496 compress_level = compress 

497 

498 if compress_method == 'lz4' and lz4 is None: 

499 raise ValueError(LZ4_NOT_INSTALLED_ERROR) 

500 

501 if (compress_level is not None and 

502 compress_level is not False and 

503 compress_level not in range(10)): 

504 # Raising an error if a non valid compress level is given. 

505 raise ValueError( 

506 'Non valid compress level given: "{}". Possible values are ' 

507 '{}.'.format(compress_level, list(range(10)))) 

508 

509 if compress_method not in _COMPRESSORS: 

510 # Raising an error if an unsupported compression method is given. 

511 raise ValueError( 

512 'Non valid compression method given: "{}". Possible values are ' 

513 '{}.'.format(compress_method, _COMPRESSORS)) 

514 

515 if not is_filename and not is_fileobj: 

516 # People keep inverting arguments, and the resulting error is 

517 # incomprehensible 

518 raise ValueError( 

519 'Second argument should be a filename or a file-like object, ' 

520 '%s (type %s) was given.' 

521 % (filename, type(filename)) 

522 ) 

523 

524 if is_filename and not isinstance(compress, tuple): 

525 # In case no explicit compression was requested using both compression 

526 # method and level in a tuple and the filename has an explicit 

527 # extension, we select the corresponding compressor. 

528 

529 # unset the variable to be sure no compression level is set afterwards. 

530 compress_method = None 

531 for name, compressor in _COMPRESSORS.items(): 

532 if filename.endswith(compressor.extension): 

533 compress_method = name 

534 

535 if compress_method in _COMPRESSORS and compress_level == 0: 

536 # we choose the default compress_level in case it was not given 

537 # as an argument (using compress). 

538 compress_level = None 

539 

540 if cache_size is not None: 

541 # Cache size is deprecated starting from version 0.10 

542 warnings.warn("Please do not set 'cache_size' in joblib.dump, " 

543 "this parameter has no effect and will be removed. " 

544 "You used 'cache_size={}'".format(cache_size), 

545 DeprecationWarning, stacklevel=2) 

546 

547 if compress_level != 0: 

548 with _write_fileobject(filename, compress=(compress_method, 

549 compress_level)) as f: 

550 NumpyPickler(f, protocol=protocol).dump(value) 

551 elif is_filename: 

552 with open(filename, 'wb') as f: 

553 NumpyPickler(f, protocol=protocol).dump(value) 

554 else: 

555 NumpyPickler(filename, protocol=protocol).dump(value) 

556 

557 # If the target container is a file object, nothing is returned. 

558 if is_fileobj: 

559 return 

560 

561 # For compatibility, the list of created filenames (e.g with one element 

562 # after 0.10.0) is returned by default. 

563 return [filename] 

564 

565 

566def _unpickle(fobj, filename="", mmap_mode=None): 

567 """Internal unpickling function.""" 

568 # We are careful to open the file handle early and keep it open to 

569 # avoid race-conditions on renames. 

570 # That said, if data is stored in companion files, which can be 

571 # the case with the old persistence format, moving the directory 

572 # will create a race when joblib tries to access the companion 

573 # files. 

574 unpickler = NumpyUnpickler(filename, fobj, mmap_mode=mmap_mode) 

575 obj = None 

576 try: 

577 obj = unpickler.load() 

578 if unpickler.compat_mode: 

579 warnings.warn("The file '%s' has been generated with a " 

580 "joblib version less than 0.10. " 

581 "Please regenerate this pickle file." 

582 % filename, 

583 DeprecationWarning, stacklevel=3) 

584 except UnicodeDecodeError as exc: 

585 # More user-friendly error message 

586 new_exc = ValueError( 

587 'You may be trying to read with ' 

588 'python 3 a joblib pickle generated with python 2. ' 

589 'This feature is not supported by joblib.') 

590 new_exc.__cause__ = exc 

591 raise new_exc 

592 return obj 

593 

594 

595def load_temporary_memmap(filename, mmap_mode, unlink_on_gc_collect): 

596 from ._memmapping_reducer import JOBLIB_MMAPS, add_maybe_unlink_finalizer 

597 obj = load(filename, mmap_mode) 

598 JOBLIB_MMAPS.add(obj.filename) 

599 if unlink_on_gc_collect: 

600 add_maybe_unlink_finalizer(obj) 

601 return obj 

602 

603 

604def load(filename, mmap_mode=None): 

605 """Reconstruct a Python object from a file persisted with joblib.dump. 

606 

607 Read more in the :ref:`User Guide <persistence>`. 

608 

609 WARNING: joblib.load relies on the pickle module and can therefore 

610 execute arbitrary Python code. It should therefore never be used 

611 to load files from untrusted sources. 

612 

613 Parameters 

614 ---------- 

615 filename: str, pathlib.Path, or file object. 

616 The file object or path of the file from which to load the object 

617 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional 

618 If not None, the arrays are memory-mapped from the disk. This 

619 mode has no effect for compressed files. Note that in this 

620 case the reconstructed object might no longer match exactly 

621 the originally pickled object. 

622 

623 Returns 

624 ------- 

625 result: any Python object 

626 The object stored in the file. 

627 

628 See Also 

629 -------- 

630 joblib.dump : function to save an object 

631 

632 Notes 

633 ----- 

634 

635 This function can load numpy array files saved separately during the 

636 dump. If the mmap_mode argument is given, it is passed to np.load and 

637 arrays are loaded as memmaps. As a consequence, the reconstructed 

638 object might not match the original pickled object. Note that if the 

639 file was saved with compression, the arrays cannot be memmapped. 

640 """ 

641 if Path is not None and isinstance(filename, Path): 

642 filename = str(filename) 

643 

644 if hasattr(filename, "read"): 

645 fobj = filename 

646 filename = getattr(fobj, 'name', '') 

647 with _read_fileobject(fobj, filename, mmap_mode) as fobj: 

648 obj = _unpickle(fobj) 

649 else: 

650 with open(filename, 'rb') as f: 

651 with _read_fileobject(f, filename, mmap_mode) as fobj: 

652 if isinstance(fobj, str): 

653 # if the returned file object is a string, this means we 

654 # try to load a pickle file generated with an version of 

655 # Joblib so we load it with joblib compatibility function. 

656 return load_compatibility(fobj) 

657 

658 obj = _unpickle(fobj, filename, mmap_mode) 

659 return obj