1"""Utilities for fast persistence of big data, with optional compression."""
2
3# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
4# Copyright (c) 2009 Gael Varoquaux
5# License: BSD Style, 3 clauses.
6
7import io
8import os
9import pickle
10import warnings
11from pathlib import Path
12
13from .backports import make_memmap
14from .compressor import (
15 _COMPRESSORS,
16 LZ4_NOT_INSTALLED_ERROR,
17 BinaryZlibFile,
18 BZ2CompressorWrapper,
19 GzipCompressorWrapper,
20 LZ4CompressorWrapper,
21 LZMACompressorWrapper,
22 XZCompressorWrapper,
23 ZlibCompressorWrapper,
24 lz4,
25 register_compressor,
26)
27
28# For compatibility with old versions of joblib, we need ZNDArrayWrapper
29# to be visible in the current namespace.
30from .numpy_pickle_compat import (
31 NDArrayWrapper,
32 ZNDArrayWrapper, # noqa: F401
33 load_compatibility,
34)
35from .numpy_pickle_utils import (
36 BUFFER_SIZE,
37 Pickler,
38 Unpickler,
39 _ensure_native_byte_order,
40 _read_bytes,
41 _reconstruct,
42 _validate_fileobject_and_memmap,
43 _write_fileobject,
44)
45
46# Register supported compressors
47register_compressor("zlib", ZlibCompressorWrapper())
48register_compressor("gzip", GzipCompressorWrapper())
49register_compressor("bz2", BZ2CompressorWrapper())
50register_compressor("lzma", LZMACompressorWrapper())
51register_compressor("xz", XZCompressorWrapper())
52register_compressor("lz4", LZ4CompressorWrapper())
53
54
55###############################################################################
56# Utility objects for persistence.
57
58# For convenience, 16 bytes are used to be sure to cover all the possible
59# dtypes' alignments. For reference, see:
60# https://numpy.org/devdocs/dev/alignment.html
61NUMPY_ARRAY_ALIGNMENT_BYTES = 16
62
63
64class NumpyArrayWrapper(object):
65 """An object to be persisted instead of numpy arrays.
66
67 This object is used to hack into the pickle machinery and read numpy
68 array data from our custom persistence format.
69 More precisely, this object is used for:
70 * carrying the information of the persisted array: subclass, shape, order,
71 dtype. Those ndarray metadata are used to correctly reconstruct the array
72 with low level numpy functions.
73 * determining if memmap is allowed on the array.
74 * reading the array bytes from a file.
75 * reading the array using memorymap from a file.
76 * writing the array bytes to a file.
77
78 Attributes
79 ----------
80 subclass: numpy.ndarray subclass
81 Determine the subclass of the wrapped array.
82 shape: numpy.ndarray shape
83 Determine the shape of the wrapped array.
84 order: {'C', 'F'}
85 Determine the order of wrapped array data. 'C' is for C order, 'F' is
86 for fortran order.
87 dtype: numpy.ndarray dtype
88 Determine the data type of the wrapped array.
89 allow_mmap: bool
90 Determine if memory mapping is allowed on the wrapped array.
91 Default: False.
92 """
93
94 def __init__(
95 self,
96 subclass,
97 shape,
98 order,
99 dtype,
100 allow_mmap=False,
101 numpy_array_alignment_bytes=NUMPY_ARRAY_ALIGNMENT_BYTES,
102 ):
103 """Constructor. Store the useful information for later."""
104 self.subclass = subclass
105 self.shape = shape
106 self.order = order
107 self.dtype = dtype
108 self.allow_mmap = allow_mmap
109 # We make numpy_array_alignment_bytes an instance attribute to allow us
110 # to change our mind about the default alignment and still load the old
111 # pickles (with the previous alignment) correctly
112 self.numpy_array_alignment_bytes = numpy_array_alignment_bytes
113
114 def safe_get_numpy_array_alignment_bytes(self):
115 # NumpyArrayWrapper instances loaded from joblib <= 1.1 pickles don't
116 # have an numpy_array_alignment_bytes attribute
117 return getattr(self, "numpy_array_alignment_bytes", None)
118
119 def write_array(self, array, pickler):
120 """Write array bytes to pickler file handle.
121
122 This function is an adaptation of the numpy write_array function
123 available in version 1.10.1 in numpy/lib/format.py.
124 """
125 # Set buffer size to 16 MiB to hide the Python loop overhead.
126 buffersize = max(16 * 1024**2 // array.itemsize, 1)
127 if array.dtype.hasobject:
128 # We contain Python objects so we cannot write out the data
129 # directly. Instead, we will pickle it out with version 5 of the
130 # pickle protocol.
131 pickle.dump(array, pickler.file_handle, protocol=5)
132 else:
133 numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
134 if numpy_array_alignment_bytes is not None:
135 current_pos = pickler.file_handle.tell()
136 pos_after_padding_byte = current_pos + 1
137 padding_length = numpy_array_alignment_bytes - (
138 pos_after_padding_byte % numpy_array_alignment_bytes
139 )
140 # A single byte is written that contains the padding length in
141 # bytes
142 padding_length_byte = int.to_bytes(
143 padding_length, length=1, byteorder="little"
144 )
145 pickler.file_handle.write(padding_length_byte)
146
147 if padding_length != 0:
148 padding = b"\xff" * padding_length
149 pickler.file_handle.write(padding)
150
151 for chunk in pickler.np.nditer(
152 array,
153 flags=["external_loop", "buffered", "zerosize_ok"],
154 buffersize=buffersize,
155 order=self.order,
156 ):
157 pickler.file_handle.write(chunk.tobytes("C"))
158
159 def read_array(self, unpickler, ensure_native_byte_order):
160 """Read array from unpickler file handle.
161
162 This function is an adaptation of the numpy read_array function
163 available in version 1.10.1 in numpy/lib/format.py.
164 """
165 if len(self.shape) == 0:
166 count = 1
167 else:
168 # joblib issue #859: we cast the elements of self.shape to int64 to
169 # prevent a potential overflow when computing their product.
170 shape_int64 = [unpickler.np.int64(x) for x in self.shape]
171 count = unpickler.np.multiply.reduce(shape_int64)
172 # Now read the actual data.
173 if self.dtype.hasobject:
174 # The array contained Python objects. We need to unpickle the data.
175 array = pickle.load(unpickler.file_handle)
176 else:
177 numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
178 if numpy_array_alignment_bytes is not None:
179 padding_byte = unpickler.file_handle.read(1)
180 padding_length = int.from_bytes(padding_byte, byteorder="little")
181 if padding_length != 0:
182 unpickler.file_handle.read(padding_length)
183
184 # This is not a real file. We have to read it the
185 # memory-intensive way.
186 # crc32 module fails on reads greater than 2 ** 32 bytes,
187 # breaking large reads from gzip streams. Chunk reads to
188 # BUFFER_SIZE bytes to avoid issue and reduce memory overhead
189 # of the read. In non-chunked case count < max_read_count, so
190 # only one read is performed.
191 max_read_count = BUFFER_SIZE // min(BUFFER_SIZE, self.dtype.itemsize)
192
193 array = unpickler.np.empty(count, dtype=self.dtype)
194 for i in range(0, count, max_read_count):
195 read_count = min(max_read_count, count - i)
196 read_size = int(read_count * self.dtype.itemsize)
197 data = _read_bytes(unpickler.file_handle, read_size, "array data")
198 array[i : i + read_count] = unpickler.np.frombuffer(
199 data, dtype=self.dtype, count=read_count
200 )
201 del data
202
203 if self.order == "F":
204 array.shape = self.shape[::-1]
205 array = array.transpose()
206 else:
207 array.shape = self.shape
208
209 if ensure_native_byte_order:
210 # Detect byte order mismatch and swap as needed.
211 array = _ensure_native_byte_order(array)
212
213 return array
214
215 def read_mmap(self, unpickler):
216 """Read an array using numpy memmap."""
217 current_pos = unpickler.file_handle.tell()
218 offset = current_pos
219 numpy_array_alignment_bytes = self.safe_get_numpy_array_alignment_bytes()
220
221 if numpy_array_alignment_bytes is not None:
222 padding_byte = unpickler.file_handle.read(1)
223 padding_length = int.from_bytes(padding_byte, byteorder="little")
224 # + 1 is for the padding byte
225 offset += padding_length + 1
226
227 if unpickler.mmap_mode == "w+":
228 unpickler.mmap_mode = "r+"
229
230 marray = make_memmap(
231 unpickler.filename,
232 dtype=self.dtype,
233 shape=self.shape,
234 order=self.order,
235 mode=unpickler.mmap_mode,
236 offset=offset,
237 )
238 # update the offset so that it corresponds to the end of the read array
239 unpickler.file_handle.seek(offset + marray.nbytes)
240
241 if (
242 numpy_array_alignment_bytes is None
243 and current_pos % NUMPY_ARRAY_ALIGNMENT_BYTES != 0
244 ):
245 message = (
246 f"The memmapped array {marray} loaded from the file "
247 f"{unpickler.file_handle.name} is not byte aligned. "
248 "This may cause segmentation faults if this memmapped array "
249 "is used in some libraries like BLAS or PyTorch. "
250 "To get rid of this warning, regenerate your pickle file "
251 "with joblib >= 1.2.0. "
252 "See https://github.com/joblib/joblib/issues/563 "
253 "for more details"
254 )
255 warnings.warn(message)
256
257 return marray
258
259 def read(self, unpickler, ensure_native_byte_order):
260 """Read the array corresponding to this wrapper.
261
262 Use the unpickler to get all information to correctly read the array.
263
264 Parameters
265 ----------
266 unpickler: NumpyUnpickler
267 ensure_native_byte_order: bool
268 If true, coerce the array to use the native endianness of the
269 host system.
270
271 Returns
272 -------
273 array: numpy.ndarray
274
275 """
276 # When requested, only use memmap mode if allowed.
277 if unpickler.mmap_mode is not None and self.allow_mmap:
278 assert not ensure_native_byte_order, (
279 "Memmaps cannot be coerced to a given byte order, "
280 "this code path is impossible."
281 )
282 array = self.read_mmap(unpickler)
283 else:
284 array = self.read_array(unpickler, ensure_native_byte_order)
285
286 # Manage array subclass case
287 if hasattr(array, "__array_prepare__") and self.subclass not in (
288 unpickler.np.ndarray,
289 unpickler.np.memmap,
290 ):
291 # We need to reconstruct another subclass
292 new_array = _reconstruct(self.subclass, (0,), "b")
293 return new_array.__array_prepare__(array)
294 else:
295 return array
296
297
298###############################################################################
299# Pickler classes
300
301
302class NumpyPickler(Pickler):
303 """A pickler to persist big data efficiently.
304
305 The main features of this object are:
306 * persistence of numpy arrays in a single file.
307 * optional compression with a special care on avoiding memory copies.
308
309 Attributes
310 ----------
311 fp: file
312 File object handle used for serializing the input object.
313 protocol: int, optional
314 Pickle protocol used. Default is pickle.DEFAULT_PROTOCOL.
315 """
316
317 dispatch = Pickler.dispatch.copy()
318
319 def __init__(self, fp, protocol=None):
320 self.file_handle = fp
321 self.buffered = isinstance(self.file_handle, BinaryZlibFile)
322
323 # By default we want a pickle protocol that only changes with
324 # the major python version and not the minor one
325 if protocol is None:
326 protocol = pickle.DEFAULT_PROTOCOL
327
328 Pickler.__init__(self, self.file_handle, protocol=protocol)
329 # delayed import of numpy, to avoid tight coupling
330 try:
331 import numpy as np
332 except ImportError:
333 np = None
334 self.np = np
335
336 def _create_array_wrapper(self, array):
337 """Create and returns a numpy array wrapper from a numpy array."""
338 order = (
339 "F" if (array.flags.f_contiguous and not array.flags.c_contiguous) else "C"
340 )
341 allow_mmap = not self.buffered and not array.dtype.hasobject
342
343 kwargs = {}
344 try:
345 self.file_handle.tell()
346 except io.UnsupportedOperation:
347 kwargs = {"numpy_array_alignment_bytes": None}
348
349 wrapper = NumpyArrayWrapper(
350 type(array),
351 array.shape,
352 order,
353 array.dtype,
354 allow_mmap=allow_mmap,
355 **kwargs,
356 )
357
358 return wrapper
359
360 def save(self, obj):
361 """Subclass the Pickler `save` method.
362
363 This is a total abuse of the Pickler class in order to use the numpy
364 persistence function `save` instead of the default pickle
365 implementation. The numpy array is replaced by a custom wrapper in the
366 pickle persistence stack and the serialized array is written right
367 after in the file. Warning: the file produced does not follow the
368 pickle format. As such it can not be read with `pickle.load`.
369 """
370 if self.np is not None and type(obj) in (
371 self.np.ndarray,
372 self.np.matrix,
373 self.np.memmap,
374 ):
375 if type(obj) is self.np.memmap:
376 # Pickling doesn't work with memmapped arrays
377 obj = self.np.asanyarray(obj)
378
379 # The array wrapper is pickled instead of the real array.
380 wrapper = self._create_array_wrapper(obj)
381 Pickler.save(self, wrapper)
382
383 # A framer was introduced with pickle protocol 4 and we want to
384 # ensure the wrapper object is written before the numpy array
385 # buffer in the pickle file.
386 # See https://www.python.org/dev/peps/pep-3154/#framing to get
387 # more information on the framer behavior.
388 if self.proto >= 4:
389 self.framer.commit_frame(force=True)
390
391 # And then array bytes are written right after the wrapper.
392 wrapper.write_array(obj, self)
393 return
394
395 return Pickler.save(self, obj)
396
397
398class NumpyUnpickler(Unpickler):
399 """A subclass of the Unpickler to unpickle our numpy pickles.
400
401 Attributes
402 ----------
403 mmap_mode: str
404 The memorymap mode to use for reading numpy arrays.
405 file_handle: file_like
406 File object to unpickle from.
407 ensure_native_byte_order: bool
408 If True, coerce the array to use the native endianness of the
409 host system.
410 filename: str
411 Name of the file to unpickle from. It should correspond to file_handle.
412 This parameter is required when using mmap_mode.
413 np: module
414 Reference to numpy module if numpy is installed else None.
415
416 """
417
418 dispatch = Unpickler.dispatch.copy()
419
420 def __init__(self, filename, file_handle, ensure_native_byte_order, mmap_mode=None):
421 # The next line is for backward compatibility with pickle generated
422 # with joblib versions less than 0.10.
423 self._dirname = os.path.dirname(filename)
424
425 self.mmap_mode = mmap_mode
426 self.file_handle = file_handle
427 # filename is required for numpy mmap mode.
428 self.filename = filename
429 self.compat_mode = False
430 self.ensure_native_byte_order = ensure_native_byte_order
431 Unpickler.__init__(self, self.file_handle)
432 try:
433 import numpy as np
434 except ImportError:
435 np = None
436 self.np = np
437
438 def load_build(self):
439 """Called to set the state of a newly created object.
440
441 We capture it to replace our place-holder objects, NDArrayWrapper or
442 NumpyArrayWrapper, by the array we are interested in. We
443 replace them directly in the stack of pickler.
444 NDArrayWrapper is used for backward compatibility with joblib <= 0.9.
445 """
446 Unpickler.load_build(self)
447
448 # For backward compatibility, we support NDArrayWrapper objects.
449 if isinstance(self.stack[-1], (NDArrayWrapper, NumpyArrayWrapper)):
450 if self.np is None:
451 raise ImportError(
452 "Trying to unpickle an ndarray, but numpy didn't import correctly"
453 )
454 array_wrapper = self.stack.pop()
455 # If any NDArrayWrapper is found, we switch to compatibility mode,
456 # this will be used to raise a DeprecationWarning to the user at
457 # the end of the unpickling.
458 if isinstance(array_wrapper, NDArrayWrapper):
459 self.compat_mode = True
460 _array_payload = array_wrapper.read(self)
461 else:
462 _array_payload = array_wrapper.read(self, self.ensure_native_byte_order)
463
464 self.stack.append(_array_payload)
465
466 # Be careful to register our new method.
467 dispatch[pickle.BUILD[0]] = load_build
468
469
470###############################################################################
471# Utility functions
472
473
474def dump(value, filename, compress=0, protocol=None):
475 """Persist an arbitrary Python object into one file.
476
477 Read more in the :ref:`User Guide <persistence>`.
478
479 Parameters
480 ----------
481 value: any Python object
482 The object to store to disk.
483 filename: str, pathlib.Path, or file object.
484 The file object or path of the file in which it is to be stored.
485 The compression method corresponding to one of the supported filename
486 extensions ('.z', '.gz', '.bz2', '.xz' or '.lzma') will be used
487 automatically.
488 compress: int from 0 to 9 or bool or 2-tuple, optional
489 Optional compression level for the data. 0 or False is no compression.
490 Higher value means more compression, but also slower read and
491 write times. Using a value of 3 is often a good compromise.
492 See the notes for more details.
493 If compress is True, the compression level used is 3.
494 If compress is a 2-tuple, the first element must correspond to a string
495 between supported compressors (e.g 'zlib', 'gzip', 'bz2', 'lzma'
496 'xz'), the second element must be an integer from 0 to 9, corresponding
497 to the compression level.
498 protocol: int, optional
499 Pickle protocol, see pickle.dump documentation for more details.
500
501 Returns
502 -------
503 filenames: list of strings
504 The list of file names in which the data is stored. If
505 compress is false, each array is stored in a different file.
506
507 See Also
508 --------
509 joblib.load : corresponding loader
510
511 Notes
512 -----
513 Memmapping on load cannot be used for compressed files. Thus
514 using compression can significantly slow down loading. In
515 addition, compressed files take up extra memory during
516 dump and load.
517
518 """
519
520 if Path is not None and isinstance(filename, Path):
521 filename = str(filename)
522
523 is_filename = isinstance(filename, str)
524 is_fileobj = hasattr(filename, "write")
525
526 compress_method = "zlib" # zlib is the default compression method.
527 if compress is True:
528 # By default, if compress is enabled, we want the default compress
529 # level of the compressor.
530 compress_level = None
531 elif isinstance(compress, tuple):
532 # a 2-tuple was set in compress
533 if len(compress) != 2:
534 raise ValueError(
535 "Compress argument tuple should contain exactly 2 elements: "
536 "(compress method, compress level), you passed {}".format(compress)
537 )
538 compress_method, compress_level = compress
539 elif isinstance(compress, str):
540 compress_method = compress
541 compress_level = None # Use default compress level
542 compress = (compress_method, compress_level)
543 else:
544 compress_level = compress
545
546 if compress_method == "lz4" and lz4 is None:
547 raise ValueError(LZ4_NOT_INSTALLED_ERROR)
548
549 if (
550 compress_level is not None
551 and compress_level is not False
552 and compress_level not in range(10)
553 ):
554 # Raising an error if a non valid compress level is given.
555 raise ValueError(
556 'Non valid compress level given: "{}". Possible values are {}.'.format(
557 compress_level, list(range(10))
558 )
559 )
560
561 if compress_method not in _COMPRESSORS:
562 # Raising an error if an unsupported compression method is given.
563 raise ValueError(
564 'Non valid compression method given: "{}". Possible values are {}.'.format(
565 compress_method, _COMPRESSORS
566 )
567 )
568
569 if not is_filename and not is_fileobj:
570 # People keep inverting arguments, and the resulting error is
571 # incomprehensible
572 raise ValueError(
573 "Second argument should be a filename or a file-like object, "
574 "%s (type %s) was given." % (filename, type(filename))
575 )
576
577 if is_filename and not isinstance(compress, tuple):
578 # In case no explicit compression was requested using both compression
579 # method and level in a tuple and the filename has an explicit
580 # extension, we select the corresponding compressor.
581
582 # unset the variable to be sure no compression level is set afterwards.
583 compress_method = None
584 for name, compressor in _COMPRESSORS.items():
585 if filename.endswith(compressor.extension):
586 compress_method = name
587
588 if compress_method in _COMPRESSORS and compress_level == 0:
589 # we choose the default compress_level in case it was not given
590 # as an argument (using compress).
591 compress_level = None
592
593 if compress_level != 0:
594 with _write_fileobject(
595 filename, compress=(compress_method, compress_level)
596 ) as f:
597 NumpyPickler(f, protocol=protocol).dump(value)
598 elif is_filename:
599 with open(filename, "wb") as f:
600 NumpyPickler(f, protocol=protocol).dump(value)
601 else:
602 NumpyPickler(filename, protocol=protocol).dump(value)
603
604 # If the target container is a file object, nothing is returned.
605 if is_fileobj:
606 return
607
608 # For compatibility, the list of created filenames (e.g with one element
609 # after 0.10.0) is returned by default.
610 return [filename]
611
612
613def _unpickle(fobj, ensure_native_byte_order, filename="", mmap_mode=None):
614 """Internal unpickling function."""
615 # We are careful to open the file handle early and keep it open to
616 # avoid race-conditions on renames.
617 # That said, if data is stored in companion files, which can be
618 # the case with the old persistence format, moving the directory
619 # will create a race when joblib tries to access the companion
620 # files.
621 unpickler = NumpyUnpickler(
622 filename, fobj, ensure_native_byte_order, mmap_mode=mmap_mode
623 )
624 obj = None
625 try:
626 obj = unpickler.load()
627 if unpickler.compat_mode:
628 warnings.warn(
629 "The file '%s' has been generated with a "
630 "joblib version less than 0.10. "
631 "Please regenerate this pickle file." % filename,
632 DeprecationWarning,
633 stacklevel=3,
634 )
635 except UnicodeDecodeError as exc:
636 # More user-friendly error message
637 new_exc = ValueError(
638 "You may be trying to read with "
639 "python 3 a joblib pickle generated with python 2. "
640 "This feature is not supported by joblib."
641 )
642 new_exc.__cause__ = exc
643 raise new_exc
644 return obj
645
646
647def load_temporary_memmap(filename, mmap_mode, unlink_on_gc_collect):
648 from ._memmapping_reducer import JOBLIB_MMAPS, add_maybe_unlink_finalizer
649
650 with open(filename, "rb") as f:
651 with _validate_fileobject_and_memmap(f, filename, mmap_mode) as (
652 fobj,
653 validated_mmap_mode,
654 ):
655 # Memmap are used for interprocess communication, which should
656 # keep the objects untouched. We pass `ensure_native_byte_order=False`
657 # to remain consistent with the loading behavior of non-memmaped arrays
658 # in workers, where the byte order is preserved.
659 # Note that we do not implement endianness change for memmaps, as this
660 # would result in inconsistent behavior.
661 obj = _unpickle(
662 fobj,
663 ensure_native_byte_order=False,
664 filename=filename,
665 mmap_mode=validated_mmap_mode,
666 )
667
668 JOBLIB_MMAPS.add(obj.filename)
669 if unlink_on_gc_collect:
670 add_maybe_unlink_finalizer(obj)
671 return obj
672
673
674def load(filename, mmap_mode=None, ensure_native_byte_order="auto"):
675 """Reconstruct a Python object from a file persisted with joblib.dump.
676
677 Read more in the :ref:`User Guide <persistence>`.
678
679 WARNING: joblib.load relies on the pickle module and can therefore
680 execute arbitrary Python code. It should therefore never be used
681 to load files from untrusted sources.
682
683 Parameters
684 ----------
685 filename: str, pathlib.Path, or file object.
686 The file object or path of the file from which to load the object
687 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
688 If not None, the arrays are memory-mapped from the disk. This
689 mode has no effect for compressed files. Note that in this
690 case the reconstructed object might no longer match exactly
691 the originally pickled object.
692 ensure_native_byte_order: bool, or 'auto', default=='auto'
693 If True, ensures that the byte order of the loaded arrays matches the
694 native byte ordering (or _endianness_) of the host system. This is not
695 compatible with memory-mapped arrays and using non-null `mmap_mode`
696 parameter at the same time will raise an error. The default 'auto'
697 parameter is equivalent to True if `mmap_mode` is None, else False.
698
699 Returns
700 -------
701 result: any Python object
702 The object stored in the file.
703
704 See Also
705 --------
706 joblib.dump : function to save an object
707
708 Notes
709 -----
710
711 This function can load numpy array files saved separately during the
712 dump. If the mmap_mode argument is given, it is passed to np.load and
713 arrays are loaded as memmaps. As a consequence, the reconstructed
714 object might not match the original pickled object. Note that if the
715 file was saved with compression, the arrays cannot be memmapped.
716 """
717 if ensure_native_byte_order == "auto":
718 ensure_native_byte_order = mmap_mode is None
719
720 if ensure_native_byte_order and mmap_mode is not None:
721 raise ValueError(
722 "Native byte ordering can only be enforced if 'mmap_mode' parameter "
723 f"is set to None, but got 'mmap_mode={mmap_mode}' instead."
724 )
725
726 if Path is not None and isinstance(filename, Path):
727 filename = str(filename)
728
729 if hasattr(filename, "read"):
730 fobj = filename
731 filename = getattr(fobj, "name", "")
732 with _validate_fileobject_and_memmap(fobj, filename, mmap_mode) as (fobj, _):
733 obj = _unpickle(fobj, ensure_native_byte_order=ensure_native_byte_order)
734 else:
735 with open(filename, "rb") as f:
736 with _validate_fileobject_and_memmap(f, filename, mmap_mode) as (
737 fobj,
738 validated_mmap_mode,
739 ):
740 if isinstance(fobj, str):
741 # if the returned file object is a string, this means we
742 # try to load a pickle file generated with an version of
743 # Joblib so we load it with joblib compatibility function.
744 return load_compatibility(fobj)
745
746 # A memory-mapped array has to be mapped with the endianness
747 # it has been written with. Other arrays are coerced to the
748 # native endianness of the host system.
749 obj = _unpickle(
750 fobj,
751 ensure_native_byte_order=ensure_native_byte_order,
752 filename=filename,
753 mmap_mode=validated_mmap_mode,
754 )
755
756 return obj