1"""Pickler class to extend the standard pickle.Pickler functionality
2
3The main objective is to make it natural to perform distributed computing on
4clusters (such as PySpark, Dask, Ray...) with interactively defined code
5(functions, classes, ...) written in notebooks or console.
6
7In particular this pickler adds the following features:
8- serialize interactively-defined or locally-defined functions, classes,
9 enums, typevars, lambdas and nested functions to compiled byte code;
10- deal with some other non-serializable objects in an ad-hoc manner where
11 applicable.
12
13This pickler is therefore meant to be used for the communication between short
14lived Python processes running the same version of Python and libraries. In
15particular, it is not meant to be used for long term storage of Python objects.
16
17It does not include an unpickler, as standard Python unpickling suffices.
18
19This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
20<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
21
22Copyright (c) 2012-now, CloudPickle developers and contributors.
23Copyright (c) 2012, Regents of the University of California.
24Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
25All rights reserved.
26
27Redistribution and use in source and binary forms, with or without
28modification, are permitted provided that the following conditions
29are met:
30 * Redistributions of source code must retain the above copyright
31 notice, this list of conditions and the following disclaimer.
32 * Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions and the following disclaimer in the
34 documentation and/or other materials provided with the distribution.
35 * Neither the name of the University of California, Berkeley nor the
36 names of its contributors may be used to endorse or promote
37 products derived from this software without specific prior written
38 permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
47PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
48LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
49NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
50SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51"""
52
53import _collections_abc
54from collections import ChainMap, OrderedDict
55import abc
56import builtins
57import copyreg
58import dataclasses
59import dis
60from enum import Enum
61import io
62import itertools
63import logging
64import opcode
65import pickle
66from pickle import _getattribute as _pickle_getattribute
67import platform
68import struct
69import sys
70import threading
71import types
72import typing
73import uuid
74import warnings
75import weakref
76
77# The following import is required to be imported in the cloudpickle
78# namespace to be able to load pickle files generated with older versions of
79# cloudpickle. See: tests/test_backward_compat.py
80from types import CellType # noqa: F401
81
82
83# cloudpickle is meant for inter process communication: we expect all
84# communicating processes to run the same Python version hence we favor
85# communication speed over compatibility:
86DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
87
88# Names of modules whose resources should be treated as dynamic.
89_PICKLE_BY_VALUE_MODULES = set()
90
91# Track the provenance of reconstructed dynamic classes to make it possible to
92# reconstruct instances from the matching singleton class definition when
93# appropriate and preserve the usual "isinstance" semantics of Python objects.
94_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
95_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
96_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
97
98PYPY = platform.python_implementation() == "PyPy"
99
100builtin_code_type = None
101if PYPY:
102 # builtin-code objects only exist in pypy
103 builtin_code_type = type(float.__new__.__code__)
104
105_extract_code_globals_cache = weakref.WeakKeyDictionary()
106
107
108def _get_or_create_tracker_id(class_def):
109 with _DYNAMIC_CLASS_TRACKER_LOCK:
110 class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
111 if class_tracker_id is None:
112 class_tracker_id = uuid.uuid4().hex
113 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
114 _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
115 return class_tracker_id
116
117
118def _lookup_class_or_track(class_tracker_id, class_def):
119 if class_tracker_id is not None:
120 with _DYNAMIC_CLASS_TRACKER_LOCK:
121 class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
122 class_tracker_id, class_def
123 )
124 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
125 return class_def
126
127
128def register_pickle_by_value(module):
129 """Register a module to make its functions and classes picklable by value.
130
131 By default, functions and classes that are attributes of an importable
132 module are to be pickled by reference, that is relying on re-importing
133 the attribute from the module at load time.
134
135 If `register_pickle_by_value(module)` is called, all its functions and
136 classes are subsequently to be pickled by value, meaning that they can
137 be loaded in Python processes where the module is not importable.
138
139 This is especially useful when developing a module in a distributed
140 execution environment: restarting the client Python process with the new
141 source code is enough: there is no need to re-install the new version
142 of the module on all the worker nodes nor to restart the workers.
143
144 Note: this feature is considered experimental. See the cloudpickle
145 README.md file for more details and limitations.
146 """
147 if not isinstance(module, types.ModuleType):
148 raise ValueError(f"Input should be a module object, got {str(module)} instead")
149 # In the future, cloudpickle may need a way to access any module registered
150 # for pickling by value in order to introspect relative imports inside
151 # functions pickled by value. (see
152 # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
153 # This access can be ensured by checking that module is present in
154 # sys.modules at registering time and assuming that it will still be in
155 # there when accessed during pickling. Another alternative would be to
156 # store a weakref to the module. Even though cloudpickle does not implement
157 # this introspection yet, in order to avoid a possible breaking change
158 # later, we still enforce the presence of module inside sys.modules.
159 if module.__name__ not in sys.modules:
160 raise ValueError(
161 f"{module} was not imported correctly, have you used an "
162 "`import` statement to access it?"
163 )
164 _PICKLE_BY_VALUE_MODULES.add(module.__name__)
165
166
167def unregister_pickle_by_value(module):
168 """Unregister that the input module should be pickled by value."""
169 if not isinstance(module, types.ModuleType):
170 raise ValueError(f"Input should be a module object, got {str(module)} instead")
171 if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
172 raise ValueError(f"{module} is not registered for pickle by value")
173 else:
174 _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
175
176
177def list_registry_pickle_by_value():
178 return _PICKLE_BY_VALUE_MODULES.copy()
179
180
181def _is_registered_pickle_by_value(module):
182 module_name = module.__name__
183 if module_name in _PICKLE_BY_VALUE_MODULES:
184 return True
185 while True:
186 parent_name = module_name.rsplit(".", 1)[0]
187 if parent_name == module_name:
188 break
189 if parent_name in _PICKLE_BY_VALUE_MODULES:
190 return True
191 module_name = parent_name
192 return False
193
194
195if sys.version_info >= (3, 14):
196 def _getattribute(obj, name):
197 return _pickle_getattribute(obj, name.split('.'))
198else:
199 def _getattribute(obj, name):
200 return _pickle_getattribute(obj, name)[0]
201
202
203def _whichmodule(obj, name):
204 """Find the module an object belongs to.
205
206 This function differs from ``pickle.whichmodule`` in two ways:
207 - it does not mangle the cases where obj's module is __main__ and obj was
208 not found in any module.
209 - Errors arising during module introspection are ignored, as those errors
210 are considered unwanted side effects.
211 """
212 module_name = getattr(obj, "__module__", None)
213
214 if module_name is not None:
215 return module_name
216 # Protect the iteration by using a copy of sys.modules against dynamic
217 # modules that trigger imports of other modules upon calls to getattr or
218 # other threads importing at the same time.
219 for module_name, module in sys.modules.copy().items():
220 # Some modules such as coverage can inject non-module objects inside
221 # sys.modules
222 if (
223 module_name == "__main__"
224 or module_name == "__mp_main__"
225 or module is None
226 or not isinstance(module, types.ModuleType)
227 ):
228 continue
229 try:
230 if _getattribute(module, name) is obj:
231 return module_name
232 except Exception:
233 pass
234 return None
235
236
237def _should_pickle_by_reference(obj, name=None):
238 """Test whether an function or a class should be pickled by reference
239
240 Pickling by reference means by that the object (typically a function or a
241 class) is an attribute of a module that is assumed to be importable in the
242 target Python environment. Loading will therefore rely on importing the
243 module and then calling `getattr` on it to access the function or class.
244
245 Pickling by reference is the only option to pickle functions and classes
246 in the standard library. In cloudpickle the alternative option is to
247 pickle by value (for instance for interactively or locally defined
248 functions and classes or for attributes of modules that have been
249 explicitly registered to be pickled by value.
250 """
251 if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
252 module_and_name = _lookup_module_and_qualname(obj, name=name)
253 if module_and_name is None:
254 return False
255 module, name = module_and_name
256 return not _is_registered_pickle_by_value(module)
257
258 elif isinstance(obj, types.ModuleType):
259 # We assume that sys.modules is primarily used as a cache mechanism for
260 # the Python import machinery. Checking if a module has been added in
261 # is sys.modules therefore a cheap and simple heuristic to tell us
262 # whether we can assume that a given module could be imported by name
263 # in another Python process.
264 if _is_registered_pickle_by_value(obj):
265 return False
266 return obj.__name__ in sys.modules
267 else:
268 raise TypeError(
269 "cannot check importability of {} instances".format(type(obj).__name__)
270 )
271
272
273def _lookup_module_and_qualname(obj, name=None):
274 if name is None:
275 name = getattr(obj, "__qualname__", None)
276 if name is None: # pragma: no cover
277 # This used to be needed for Python 2.7 support but is probably not
278 # needed anymore. However we keep the __name__ introspection in case
279 # users of cloudpickle rely on this old behavior for unknown reasons.
280 name = getattr(obj, "__name__", None)
281
282 module_name = _whichmodule(obj, name)
283
284 if module_name is None:
285 # In this case, obj.__module__ is None AND obj was not found in any
286 # imported module. obj is thus treated as dynamic.
287 return None
288
289 if module_name == "__main__":
290 return None
291
292 # Note: if module_name is in sys.modules, the corresponding module is
293 # assumed importable at unpickling time. See #357
294 module = sys.modules.get(module_name, None)
295 if module is None:
296 # The main reason why obj's module would not be imported is that this
297 # module has been dynamically created, using for example
298 # types.ModuleType. The other possibility is that module was removed
299 # from sys.modules after obj was created/imported. But this case is not
300 # supported, as the standard pickle does not support it either.
301 return None
302
303 try:
304 obj2 = _getattribute(module, name)
305 except AttributeError:
306 # obj was not found inside the module it points to
307 return None
308 if obj2 is not obj:
309 return None
310 return module, name
311
312
313def _extract_code_globals(co):
314 """Find all globals names read or written to by codeblock co."""
315 out_names = _extract_code_globals_cache.get(co)
316 if out_names is None:
317 # We use a dict with None values instead of a set to get a
318 # deterministic order and avoid introducing non-deterministic pickle
319 # bytes as a results.
320 out_names = {name: None for name in _walk_global_ops(co)}
321
322 # Declaring a function inside another one using the "def ..." syntax
323 # generates a constant code object corresponding to the one of the
324 # nested function's As the nested function may itself need global
325 # variables, we need to introspect its code, extract its globals, (look
326 # for code object in it's co_consts attribute..) and add the result to
327 # code_globals
328 if co.co_consts:
329 for const in co.co_consts:
330 if isinstance(const, types.CodeType):
331 out_names.update(_extract_code_globals(const))
332
333 _extract_code_globals_cache[co] = out_names
334
335 return out_names
336
337
338def _find_imported_submodules(code, top_level_dependencies):
339 """Find currently imported submodules used by a function.
340
341 Submodules used by a function need to be detected and referenced for the
342 function to work correctly at depickling time. Because submodules can be
343 referenced as attribute of their parent package (``package.submodule``), we
344 need a special introspection technique that does not rely on GLOBAL-related
345 opcodes to find references of them in a code object.
346
347 Example:
348 ```
349 import concurrent.futures
350 import cloudpickle
351 def func():
352 x = concurrent.futures.ThreadPoolExecutor
353 if __name__ == '__main__':
354 cloudpickle.dumps(func)
355 ```
356 The globals extracted by cloudpickle in the function's state include the
357 concurrent package, but not its submodule (here, concurrent.futures), which
358 is the module used by func. Find_imported_submodules will detect the usage
359 of concurrent.futures. Saving this module alongside with func will ensure
360 that calling func once depickled does not fail due to concurrent.futures
361 not being imported
362 """
363
364 subimports = []
365 # check if any known dependency is an imported package
366 for x in top_level_dependencies:
367 if (
368 isinstance(x, types.ModuleType)
369 and hasattr(x, "__package__")
370 and x.__package__
371 ):
372 # check if the package has any currently loaded sub-imports
373 prefix = x.__name__ + "."
374 # A concurrent thread could mutate sys.modules,
375 # make sure we iterate over a copy to avoid exceptions
376 for name in list(sys.modules):
377 # Older versions of pytest will add a "None" module to
378 # sys.modules.
379 if name is not None and name.startswith(prefix):
380 # check whether the function can address the sub-module
381 tokens = set(name[len(prefix) :].split("."))
382 if not tokens - set(code.co_names):
383 subimports.append(sys.modules[name])
384 return subimports
385
386
387# relevant opcodes
388STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
389DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
390LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
391GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
392HAVE_ARGUMENT = dis.HAVE_ARGUMENT
393EXTENDED_ARG = dis.EXTENDED_ARG
394
395
396_BUILTIN_TYPE_NAMES = {}
397for k, v in types.__dict__.items():
398 if type(v) is type:
399 _BUILTIN_TYPE_NAMES[v] = k
400
401
402def _builtin_type(name):
403 if name == "ClassType": # pragma: no cover
404 # Backward compat to load pickle files generated with cloudpickle
405 # < 1.3 even if loading pickle files from older versions is not
406 # officially supported.
407 return type
408 return getattr(types, name)
409
410
411def _walk_global_ops(code):
412 """Yield referenced name for global-referencing instructions in code."""
413 for instr in dis.get_instructions(code):
414 op = instr.opcode
415 if op in GLOBAL_OPS:
416 yield instr.argval
417
418
419def _extract_class_dict(cls):
420 """Retrieve a copy of the dict of a class without the inherited method."""
421 # Hack to circumvent non-predictable memoization caused by string interning.
422 # See the inline comment in _class_setstate for details.
423 clsdict = {"".join(k): cls.__dict__[k] for k in sorted(cls.__dict__)}
424
425 if len(cls.__bases__) == 1:
426 inherited_dict = cls.__bases__[0].__dict__
427 else:
428 inherited_dict = {}
429 for base in reversed(cls.__bases__):
430 inherited_dict.update(base.__dict__)
431 to_remove = []
432 for name, value in clsdict.items():
433 try:
434 base_value = inherited_dict[name]
435 if value is base_value:
436 to_remove.append(name)
437 except KeyError:
438 pass
439 for name in to_remove:
440 clsdict.pop(name)
441 return clsdict
442
443
444def is_tornado_coroutine(func):
445 """Return whether `func` is a Tornado coroutine function.
446
447 Running coroutines are not supported.
448 """
449 warnings.warn(
450 "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
451 "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
452 "directly instead.",
453 category=DeprecationWarning,
454 )
455 if "tornado.gen" not in sys.modules:
456 return False
457 gen = sys.modules["tornado.gen"]
458 if not hasattr(gen, "is_coroutine_function"):
459 # Tornado version is too old
460 return False
461 return gen.is_coroutine_function(func)
462
463
464def subimport(name):
465 # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
466 # the name of a submodule, __import__ will return the top-level root module
467 # of this submodule. For instance, __import__('os.path') returns the `os`
468 # module.
469 __import__(name)
470 return sys.modules[name]
471
472
473def dynamic_subimport(name, vars):
474 mod = types.ModuleType(name)
475 mod.__dict__.update(vars)
476 mod.__dict__["__builtins__"] = builtins.__dict__
477 return mod
478
479
480def _get_cell_contents(cell):
481 try:
482 return cell.cell_contents
483 except ValueError:
484 # Handle empty cells explicitly with a sentinel value.
485 return _empty_cell_value
486
487
488def instance(cls):
489 """Create a new instance of a class.
490
491 Parameters
492 ----------
493 cls : type
494 The class to create an instance of.
495
496 Returns
497 -------
498 instance : cls
499 A new instance of ``cls``.
500 """
501 return cls()
502
503
504@instance
505class _empty_cell_value:
506 """Sentinel for empty closures."""
507
508 @classmethod
509 def __reduce__(cls):
510 return cls.__name__
511
512
513def _make_function(code, globals, name, argdefs, closure):
514 # Setting __builtins__ in globals is needed for nogil CPython.
515 globals["__builtins__"] = __builtins__
516 return types.FunctionType(code, globals, name, argdefs, closure)
517
518
519def _make_empty_cell():
520 if False:
521 # trick the compiler into creating an empty cell in our lambda
522 cell = None
523 raise AssertionError("this route should not be executed")
524
525 return (lambda: cell).__closure__[0]
526
527
528def _make_cell(value=_empty_cell_value):
529 cell = _make_empty_cell()
530 if value is not _empty_cell_value:
531 cell.cell_contents = value
532 return cell
533
534
535def _make_skeleton_class(
536 type_constructor, name, bases, type_kwargs, class_tracker_id, extra
537):
538 """Build dynamic class with an empty __dict__ to be filled once memoized
539
540 If class_tracker_id is not None, try to lookup an existing class definition
541 matching that id. If none is found, track a newly reconstructed class
542 definition under that id so that other instances stemming from the same
543 class id will also reuse this class definition.
544
545 The "extra" variable is meant to be a dict (or None) that can be used for
546 forward compatibility shall the need arise.
547 """
548 # We need to intern the keys of the type_kwargs dict to avoid having
549 # different pickles for the same dynamic class depending on whether it was
550 # dynamically created or reconstructed from a pickled stream.
551 type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()}
552
553 skeleton_class = types.new_class(
554 name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
555 )
556
557 return _lookup_class_or_track(class_tracker_id, skeleton_class)
558
559
560def _make_skeleton_enum(
561 bases, name, qualname, members, module, class_tracker_id, extra
562):
563 """Build dynamic enum with an empty __dict__ to be filled once memoized
564
565 The creation of the enum class is inspired by the code of
566 EnumMeta._create_.
567
568 If class_tracker_id is not None, try to lookup an existing enum definition
569 matching that id. If none is found, track a newly reconstructed enum
570 definition under that id so that other instances stemming from the same
571 class id will also reuse this enum definition.
572
573 The "extra" variable is meant to be a dict (or None) that can be used for
574 forward compatibility shall the need arise.
575 """
576 # enums always inherit from their base Enum class at the last position in
577 # the list of base classes:
578 enum_base = bases[-1]
579 metacls = enum_base.__class__
580 classdict = metacls.__prepare__(name, bases)
581
582 for member_name, member_value in members.items():
583 classdict[member_name] = member_value
584 enum_class = metacls.__new__(metacls, name, bases, classdict)
585 enum_class.__module__ = module
586 enum_class.__qualname__ = qualname
587
588 return _lookup_class_or_track(class_tracker_id, enum_class)
589
590
591def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
592 tv = typing.TypeVar(
593 name,
594 *constraints,
595 bound=bound,
596 covariant=covariant,
597 contravariant=contravariant,
598 )
599 return _lookup_class_or_track(class_tracker_id, tv)
600
601
602def _decompose_typevar(obj):
603 return (
604 obj.__name__,
605 obj.__bound__,
606 obj.__constraints__,
607 obj.__covariant__,
608 obj.__contravariant__,
609 _get_or_create_tracker_id(obj),
610 )
611
612
613def _typevar_reduce(obj):
614 # TypeVar instances require the module information hence why we
615 # are not using the _should_pickle_by_reference directly
616 module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
617
618 if module_and_name is None:
619 return (_make_typevar, _decompose_typevar(obj))
620 elif _is_registered_pickle_by_value(module_and_name[0]):
621 return (_make_typevar, _decompose_typevar(obj))
622
623 return (getattr, module_and_name)
624
625
626def _get_bases(typ):
627 if "__orig_bases__" in getattr(typ, "__dict__", {}):
628 # For generic types (see PEP 560)
629 # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
630 # correct. Subclasses of a fully-parameterized generic class does not
631 # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
632 # will return True because it's defined in the base class.
633 bases_attr = "__orig_bases__"
634 else:
635 # For regular class objects
636 bases_attr = "__bases__"
637 return getattr(typ, bases_attr)
638
639
640def _make_dict_keys(obj, is_ordered=False):
641 if is_ordered:
642 return OrderedDict.fromkeys(obj).keys()
643 else:
644 return dict.fromkeys(obj).keys()
645
646
647def _make_dict_values(obj, is_ordered=False):
648 if is_ordered:
649 return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
650 else:
651 return {i: _ for i, _ in enumerate(obj)}.values()
652
653
654def _make_dict_items(obj, is_ordered=False):
655 if is_ordered:
656 return OrderedDict(obj).items()
657 else:
658 return obj.items()
659
660
661# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
662# -------------------------------------------------
663
664
665def _class_getnewargs(obj):
666 type_kwargs = {}
667 if "__module__" in obj.__dict__:
668 type_kwargs["__module__"] = obj.__module__
669
670 __dict__ = obj.__dict__.get("__dict__", None)
671 if isinstance(__dict__, property):
672 type_kwargs["__dict__"] = __dict__
673
674 return (
675 type(obj),
676 obj.__name__,
677 _get_bases(obj),
678 type_kwargs,
679 _get_or_create_tracker_id(obj),
680 None,
681 )
682
683
684def _enum_getnewargs(obj):
685 members = {e.name: e.value for e in obj}
686 return (
687 obj.__bases__,
688 obj.__name__,
689 obj.__qualname__,
690 members,
691 obj.__module__,
692 _get_or_create_tracker_id(obj),
693 None,
694 )
695
696
697# COLLECTION OF OBJECTS RECONSTRUCTORS
698# ------------------------------------
699def _file_reconstructor(retval):
700 return retval
701
702
703# COLLECTION OF OBJECTS STATE GETTERS
704# -----------------------------------
705
706
707def _function_getstate(func):
708 # - Put func's dynamic attributes (stored in func.__dict__) in state. These
709 # attributes will be restored at unpickling time using
710 # f.__dict__.update(state)
711 # - Put func's members into slotstate. Such attributes will be restored at
712 # unpickling time by iterating over slotstate and calling setattr(func,
713 # slotname, slotvalue)
714 slotstate = {
715 # Hack to circumvent non-predictable memoization caused by string interning.
716 # See the inline comment in _class_setstate for details.
717 "__name__": "".join(func.__name__),
718 "__qualname__": "".join(func.__qualname__),
719 "__annotations__": func.__annotations__,
720 "__kwdefaults__": func.__kwdefaults__,
721 "__defaults__": func.__defaults__,
722 "__module__": func.__module__,
723 "__doc__": func.__doc__,
724 "__closure__": func.__closure__,
725 }
726
727 f_globals_ref = _extract_code_globals(func.__code__)
728 f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
729
730 if func.__closure__ is not None:
731 closure_values = list(map(_get_cell_contents, func.__closure__))
732 else:
733 closure_values = ()
734
735 # Extract currently-imported submodules used by func. Storing these modules
736 # in a smoke _cloudpickle_subimports attribute of the object's state will
737 # trigger the side effect of importing these modules at unpickling time
738 # (which is necessary for func to work correctly once depickled)
739 slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
740 func.__code__, itertools.chain(f_globals.values(), closure_values)
741 )
742 slotstate["__globals__"] = f_globals
743
744 # Hack to circumvent non-predictable memoization caused by string interning.
745 # See the inline comment in _class_setstate for details.
746 state = {"".join(k): v for k, v in func.__dict__.items()}
747 return state, slotstate
748
749
750def _class_getstate(obj):
751 clsdict = _extract_class_dict(obj)
752 clsdict.pop("__weakref__", None)
753
754 if issubclass(type(obj), abc.ABCMeta):
755 # If obj is an instance of an ABCMeta subclass, don't pickle the
756 # cache/negative caches populated during isinstance/issubclass
757 # checks, but pickle the list of registered subclasses of obj.
758 clsdict.pop("_abc_cache", None)
759 clsdict.pop("_abc_negative_cache", None)
760 clsdict.pop("_abc_negative_cache_version", None)
761 registry = clsdict.pop("_abc_registry", None)
762 if registry is None:
763 # The abc caches and registered subclasses of a
764 # class are bundled into the single _abc_impl attribute
765 clsdict.pop("_abc_impl", None)
766 (registry, _, _, _) = abc._get_dump(obj)
767
768 clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
769 else:
770 # In the above if clause, registry is a set of weakrefs -- in
771 # this case, registry is a WeakSet
772 clsdict["_abc_impl"] = [type_ for type_ in registry]
773
774 if "__slots__" in clsdict:
775 # pickle string length optimization: member descriptors of obj are
776 # created automatically from obj's __slots__ attribute, no need to
777 # save them in obj's state
778 if isinstance(obj.__slots__, str):
779 clsdict.pop(obj.__slots__)
780 else:
781 for k in obj.__slots__:
782 clsdict.pop(k, None)
783
784 clsdict.pop("__dict__", None) # unpicklable property object
785
786 return (clsdict, {})
787
788
789def _enum_getstate(obj):
790 clsdict, slotstate = _class_getstate(obj)
791
792 members = {e.name: e.value for e in obj}
793 # Cleanup the clsdict that will be passed to _make_skeleton_enum:
794 # Those attributes are already handled by the metaclass.
795 for attrname in [
796 "_generate_next_value_",
797 "_member_names_",
798 "_member_map_",
799 "_member_type_",
800 "_value2member_map_",
801 ]:
802 clsdict.pop(attrname, None)
803 for member in members:
804 clsdict.pop(member)
805 # Special handling of Enum subclasses
806 return clsdict, slotstate
807
808
809# COLLECTIONS OF OBJECTS REDUCERS
810# -------------------------------
811# A reducer is a function taking a single argument (obj), and that returns a
812# tuple with all the necessary data to re-construct obj. Apart from a few
813# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
814# correctly pickle an object.
815# While many built-in objects (Exceptions objects, instances of the "object"
816# class, etc), are shipped with their own built-in reducer (invoked using
817# obj.__reduce__), some do not. The following methods were created to "fill
818# these holes".
819
820
821def _code_reduce(obj):
822 """code object reducer."""
823 # If you are not sure about the order of arguments, take a look at help
824 # of the specific type from types, for example:
825 # >>> from types import CodeType
826 # >>> help(CodeType)
827
828 # Hack to circumvent non-predictable memoization caused by string interning.
829 # See the inline comment in _class_setstate for details.
830 co_name = "".join(obj.co_name)
831
832 # Create shallow copies of these tuple to make cloudpickle payload deterministic.
833 # When creating a code object during load, copies of these four tuples are
834 # created, while in the main process, these tuples can be shared.
835 # By always creating copies, we make sure the resulting payload is deterministic.
836 co_names = tuple(name for name in obj.co_names)
837 co_varnames = tuple(name for name in obj.co_varnames)
838 co_freevars = tuple(name for name in obj.co_freevars)
839 co_cellvars = tuple(name for name in obj.co_cellvars)
840 if hasattr(obj, "co_exceptiontable"):
841 # Python 3.11 and later: there are some new attributes
842 # related to the enhanced exceptions.
843 args = (
844 obj.co_argcount,
845 obj.co_posonlyargcount,
846 obj.co_kwonlyargcount,
847 obj.co_nlocals,
848 obj.co_stacksize,
849 obj.co_flags,
850 obj.co_code,
851 obj.co_consts,
852 co_names,
853 co_varnames,
854 obj.co_filename,
855 co_name,
856 obj.co_qualname,
857 obj.co_firstlineno,
858 obj.co_linetable,
859 obj.co_exceptiontable,
860 co_freevars,
861 co_cellvars,
862 )
863 elif hasattr(obj, "co_linetable"):
864 # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
865 # expects obj.co_linetable instead.
866 args = (
867 obj.co_argcount,
868 obj.co_posonlyargcount,
869 obj.co_kwonlyargcount,
870 obj.co_nlocals,
871 obj.co_stacksize,
872 obj.co_flags,
873 obj.co_code,
874 obj.co_consts,
875 co_names,
876 co_varnames,
877 obj.co_filename,
878 co_name,
879 obj.co_firstlineno,
880 obj.co_linetable,
881 co_freevars,
882 co_cellvars,
883 )
884 elif hasattr(obj, "co_nmeta"): # pragma: no cover
885 # "nogil" Python: modified attributes from 3.9
886 args = (
887 obj.co_argcount,
888 obj.co_posonlyargcount,
889 obj.co_kwonlyargcount,
890 obj.co_nlocals,
891 obj.co_framesize,
892 obj.co_ndefaultargs,
893 obj.co_nmeta,
894 obj.co_flags,
895 obj.co_code,
896 obj.co_consts,
897 co_varnames,
898 obj.co_filename,
899 co_name,
900 obj.co_firstlineno,
901 obj.co_lnotab,
902 obj.co_exc_handlers,
903 obj.co_jump_table,
904 co_freevars,
905 co_cellvars,
906 obj.co_free2reg,
907 obj.co_cell2reg,
908 )
909 else:
910 # Backward compat for 3.8 and 3.9
911 args = (
912 obj.co_argcount,
913 obj.co_posonlyargcount,
914 obj.co_kwonlyargcount,
915 obj.co_nlocals,
916 obj.co_stacksize,
917 obj.co_flags,
918 obj.co_code,
919 obj.co_consts,
920 co_names,
921 co_varnames,
922 obj.co_filename,
923 co_name,
924 obj.co_firstlineno,
925 obj.co_lnotab,
926 co_freevars,
927 co_cellvars,
928 )
929 return types.CodeType, args
930
931
932def _cell_reduce(obj):
933 """Cell (containing values of a function's free variables) reducer."""
934 try:
935 obj.cell_contents
936 except ValueError: # cell is empty
937 return _make_empty_cell, ()
938 else:
939 return _make_cell, (obj.cell_contents,)
940
941
942def _classmethod_reduce(obj):
943 orig_func = obj.__func__
944 return type(obj), (orig_func,)
945
946
947def _file_reduce(obj):
948 """Save a file."""
949 import io
950
951 if not hasattr(obj, "name") or not hasattr(obj, "mode"):
952 raise pickle.PicklingError(
953 "Cannot pickle files that do not map to an actual file"
954 )
955 if obj is sys.stdout:
956 return getattr, (sys, "stdout")
957 if obj is sys.stderr:
958 return getattr, (sys, "stderr")
959 if obj is sys.stdin:
960 raise pickle.PicklingError("Cannot pickle standard input")
961 if obj.closed:
962 raise pickle.PicklingError("Cannot pickle closed files")
963 if hasattr(obj, "isatty") and obj.isatty():
964 raise pickle.PicklingError("Cannot pickle files that map to tty objects")
965 if "r" not in obj.mode and "+" not in obj.mode:
966 raise pickle.PicklingError(
967 "Cannot pickle files that are not opened for reading: %s" % obj.mode
968 )
969
970 name = obj.name
971
972 retval = io.StringIO()
973
974 try:
975 # Read the whole file
976 curloc = obj.tell()
977 obj.seek(0)
978 contents = obj.read()
979 obj.seek(curloc)
980 except OSError as e:
981 raise pickle.PicklingError(
982 "Cannot pickle file %s as it cannot be read" % name
983 ) from e
984 retval.write(contents)
985 retval.seek(curloc)
986
987 retval.name = name
988 return _file_reconstructor, (retval,)
989
990
991def _getset_descriptor_reduce(obj):
992 return getattr, (obj.__objclass__, obj.__name__)
993
994
995def _mappingproxy_reduce(obj):
996 return types.MappingProxyType, (dict(obj),)
997
998
999def _memoryview_reduce(obj):
1000 return bytes, (obj.tobytes(),)
1001
1002
1003def _module_reduce(obj):
1004 if _should_pickle_by_reference(obj):
1005 return subimport, (obj.__name__,)
1006 else:
1007 # Some external libraries can populate the "__builtins__" entry of a
1008 # module's `__dict__` with unpicklable objects (see #316). For that
1009 # reason, we do not attempt to pickle the "__builtins__" entry, and
1010 # restore a default value for it at unpickling time.
1011 state = obj.__dict__.copy()
1012 state.pop("__builtins__", None)
1013 return dynamic_subimport, (obj.__name__, state)
1014
1015
1016def _method_reduce(obj):
1017 return (types.MethodType, (obj.__func__, obj.__self__))
1018
1019
1020def _logger_reduce(obj):
1021 return logging.getLogger, (obj.name,)
1022
1023
1024def _root_logger_reduce(obj):
1025 return logging.getLogger, ()
1026
1027
1028def _property_reduce(obj):
1029 return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
1030
1031
1032def _weakset_reduce(obj):
1033 return weakref.WeakSet, (list(obj),)
1034
1035
1036def _dynamic_class_reduce(obj):
1037 """Save a class that can't be referenced as a module attribute.
1038
1039 This method is used to serialize classes that are defined inside
1040 functions, or that otherwise can't be serialized as attribute lookups
1041 from importable modules.
1042 """
1043 if Enum is not None and issubclass(obj, Enum):
1044 return (
1045 _make_skeleton_enum,
1046 _enum_getnewargs(obj),
1047 _enum_getstate(obj),
1048 None,
1049 None,
1050 _class_setstate,
1051 )
1052 else:
1053 return (
1054 _make_skeleton_class,
1055 _class_getnewargs(obj),
1056 _class_getstate(obj),
1057 None,
1058 None,
1059 _class_setstate,
1060 )
1061
1062
1063def _class_reduce(obj):
1064 """Select the reducer depending on the dynamic nature of the class obj."""
1065 if obj is type(None): # noqa
1066 return type, (None,)
1067 elif obj is type(Ellipsis):
1068 return type, (Ellipsis,)
1069 elif obj is type(NotImplemented):
1070 return type, (NotImplemented,)
1071 elif obj in _BUILTIN_TYPE_NAMES:
1072 return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
1073 elif not _should_pickle_by_reference(obj):
1074 return _dynamic_class_reduce(obj)
1075 return NotImplemented
1076
1077
1078def _dict_keys_reduce(obj):
1079 # Safer not to ship the full dict as sending the rest might
1080 # be unintended and could potentially cause leaking of
1081 # sensitive information
1082 return _make_dict_keys, (list(obj),)
1083
1084
1085def _dict_values_reduce(obj):
1086 # Safer not to ship the full dict as sending the rest might
1087 # be unintended and could potentially cause leaking of
1088 # sensitive information
1089 return _make_dict_values, (list(obj),)
1090
1091
1092def _dict_items_reduce(obj):
1093 return _make_dict_items, (dict(obj),)
1094
1095
1096def _odict_keys_reduce(obj):
1097 # Safer not to ship the full dict as sending the rest might
1098 # be unintended and could potentially cause leaking of
1099 # sensitive information
1100 return _make_dict_keys, (list(obj), True)
1101
1102
1103def _odict_values_reduce(obj):
1104 # Safer not to ship the full dict as sending the rest might
1105 # be unintended and could potentially cause leaking of
1106 # sensitive information
1107 return _make_dict_values, (list(obj), True)
1108
1109
1110def _odict_items_reduce(obj):
1111 return _make_dict_items, (dict(obj), True)
1112
1113
1114def _dataclass_field_base_reduce(obj):
1115 return _get_dataclass_field_type_sentinel, (obj.name,)
1116
1117
1118# COLLECTIONS OF OBJECTS STATE SETTERS
1119# ------------------------------------
1120# state setters are called at unpickling time, once the object is created and
1121# it has to be updated to how it was at unpickling time.
1122
1123
1124def _function_setstate(obj, state):
1125 """Update the state of a dynamic function.
1126
1127 As __closure__ and __globals__ are readonly attributes of a function, we
1128 cannot rely on the native setstate routine of pickle.load_build, that calls
1129 setattr on items of the slotstate. Instead, we have to modify them inplace.
1130 """
1131 state, slotstate = state
1132 obj.__dict__.update(state)
1133
1134 obj_globals = slotstate.pop("__globals__")
1135 obj_closure = slotstate.pop("__closure__")
1136 # _cloudpickle_subimports is a set of submodules that must be loaded for
1137 # the pickled function to work correctly at unpickling time. Now that these
1138 # submodules are depickled (hence imported), they can be removed from the
1139 # object's state (the object state only served as a reference holder to
1140 # these submodules)
1141 slotstate.pop("_cloudpickle_submodules")
1142
1143 obj.__globals__.update(obj_globals)
1144 obj.__globals__["__builtins__"] = __builtins__
1145
1146 if obj_closure is not None:
1147 for i, cell in enumerate(obj_closure):
1148 try:
1149 value = cell.cell_contents
1150 except ValueError: # cell is empty
1151 continue
1152 obj.__closure__[i].cell_contents = value
1153
1154 for k, v in slotstate.items():
1155 setattr(obj, k, v)
1156
1157
1158def _class_setstate(obj, state):
1159 state, slotstate = state
1160 registry = None
1161 for attrname, attr in state.items():
1162 if attrname == "_abc_impl":
1163 registry = attr
1164 else:
1165 # Note: setting attribute names on a class automatically triggers their
1166 # interning in CPython:
1167 # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
1168 #
1169 # This means that to get deterministic pickling for a dynamic class that
1170 # was initially defined in a different Python process, the pickler
1171 # needs to ensure that dynamic class and function attribute names are
1172 # systematically copied into a non-interned version to avoid
1173 # unpredictable pickle payloads.
1174 #
1175 # Indeed the Pickler's memoizer relies on physical object identity to break
1176 # cycles in the reference graph of the object being serialized.
1177 setattr(obj, attrname, attr)
1178
1179 if sys.version_info >= (3, 13) and "__firstlineno__" in state:
1180 # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it
1181 # will be automatically deleted by the `setattr(obj, attrname, attr)` call
1182 # above when `attrname` is "__firstlineno__". We assume that preserving this
1183 # information might be important for some users and that it not stale in the
1184 # context of cloudpickle usage, hence legitimate to propagate. Furthermore it
1185 # is necessary to do so to keep deterministic chained pickling as tested in
1186 # test_deterministic_str_interning_for_chained_dynamic_class_pickling.
1187 obj.__firstlineno__ = state["__firstlineno__"]
1188
1189 if registry is not None:
1190 for subclass in registry:
1191 obj.register(subclass)
1192
1193 return obj
1194
1195
1196# COLLECTION OF DATACLASS UTILITIES
1197# ---------------------------------
1198# There are some internal sentinel values whose identity must be preserved when
1199# unpickling dataclass fields. Each sentinel value has a unique name that we can
1200# use to retrieve its identity at unpickling time.
1201
1202
1203_DATACLASSE_FIELD_TYPE_SENTINELS = {
1204 dataclasses._FIELD.name: dataclasses._FIELD,
1205 dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
1206 dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
1207}
1208
1209
1210def _get_dataclass_field_type_sentinel(name):
1211 return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
1212
1213
1214class Pickler(pickle.Pickler):
1215 # set of reducers defined and used by cloudpickle (private)
1216 _dispatch_table = {}
1217 _dispatch_table[classmethod] = _classmethod_reduce
1218 _dispatch_table[io.TextIOWrapper] = _file_reduce
1219 _dispatch_table[logging.Logger] = _logger_reduce
1220 _dispatch_table[logging.RootLogger] = _root_logger_reduce
1221 _dispatch_table[memoryview] = _memoryview_reduce
1222 _dispatch_table[property] = _property_reduce
1223 _dispatch_table[staticmethod] = _classmethod_reduce
1224 _dispatch_table[CellType] = _cell_reduce
1225 _dispatch_table[types.CodeType] = _code_reduce
1226 _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
1227 _dispatch_table[types.ModuleType] = _module_reduce
1228 _dispatch_table[types.MethodType] = _method_reduce
1229 _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
1230 _dispatch_table[weakref.WeakSet] = _weakset_reduce
1231 _dispatch_table[typing.TypeVar] = _typevar_reduce
1232 _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
1233 _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
1234 _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
1235 _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
1236 _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
1237 _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
1238 _dispatch_table[abc.abstractmethod] = _classmethod_reduce
1239 _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
1240 _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
1241 _dispatch_table[abc.abstractproperty] = _property_reduce
1242 _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
1243
1244 dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
1245
1246 # function reducers are defined as instance methods of cloudpickle.Pickler
1247 # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
1248 def _dynamic_function_reduce(self, func):
1249 """Reduce a function that is not pickleable via attribute lookup."""
1250 newargs = self._function_getnewargs(func)
1251 state = _function_getstate(func)
1252 return (_make_function, newargs, state, None, None, _function_setstate)
1253
1254 def _function_reduce(self, obj):
1255 """Reducer for function objects.
1256
1257 If obj is a top-level attribute of a file-backed module, this reducer
1258 returns NotImplemented, making the cloudpickle.Pickler fall back to
1259 traditional pickle.Pickler routines to save obj. Otherwise, it reduces
1260 obj using a custom cloudpickle reducer designed specifically to handle
1261 dynamic functions.
1262 """
1263 if _should_pickle_by_reference(obj):
1264 return NotImplemented
1265 else:
1266 return self._dynamic_function_reduce(obj)
1267
1268 def _function_getnewargs(self, func):
1269 code = func.__code__
1270
1271 # base_globals represents the future global namespace of func at
1272 # unpickling time. Looking it up and storing it in
1273 # cloudpickle.Pickler.globals_ref allow functions sharing the same
1274 # globals at pickling time to also share them once unpickled, at one
1275 # condition: since globals_ref is an attribute of a cloudpickle.Pickler
1276 # instance, and that a new cloudpickle.Pickler is created each time
1277 # cloudpickle.dump or cloudpickle.dumps is called, functions also need
1278 # to be saved within the same invocation of
1279 # cloudpickle.dump/cloudpickle.dumps (for example:
1280 # cloudpickle.dumps([f1, f2])). There is no such limitation when using
1281 # cloudpickle.Pickler.dump, as long as the multiple invocations are
1282 # bound to the same cloudpickle.Pickler instance.
1283 base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
1284
1285 if base_globals == {}:
1286 # Add module attributes used to resolve relative imports
1287 # instructions inside func.
1288 for k in ["__package__", "__name__", "__path__", "__file__"]:
1289 if k in func.__globals__:
1290 base_globals[k] = func.__globals__[k]
1291
1292 # Do not bind the free variables before the function is created to
1293 # avoid infinite recursion.
1294 if func.__closure__ is None:
1295 closure = None
1296 else:
1297 closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
1298
1299 return code, base_globals, None, None, closure
1300
1301 def dump(self, obj):
1302 try:
1303 return super().dump(obj)
1304 except RuntimeError as e:
1305 if len(e.args) > 0 and "recursion" in e.args[0]:
1306 msg = "Could not pickle object as excessively deep recursion required."
1307 raise pickle.PicklingError(msg) from e
1308 else:
1309 raise
1310
1311 def __init__(self, file, protocol=None, buffer_callback=None):
1312 if protocol is None:
1313 protocol = DEFAULT_PROTOCOL
1314 super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
1315 # map functions __globals__ attribute ids, to ensure that functions
1316 # sharing the same global namespace at pickling time also share
1317 # their global namespace at unpickling time.
1318 self.globals_ref = {}
1319 self.proto = int(protocol)
1320
1321 if not PYPY:
1322 # pickle.Pickler is the C implementation of the CPython pickler and
1323 # therefore we rely on reduce_override method to customize the pickler
1324 # behavior.
1325
1326 # `cloudpickle.Pickler.dispatch` is only left for backward
1327 # compatibility - note that when using protocol 5,
1328 # `cloudpickle.Pickler.dispatch` is not an extension of
1329 # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
1330 # subclasses the C-implemented `pickle.Pickler`, which does not expose
1331 # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
1332 # used `cloudpickle.Pickler.dispatch` as a class-level attribute
1333 # storing all reducers implemented by cloudpickle, but the attribute
1334 # name was not a great choice given because it would collide with a
1335 # similarly named attribute in the pure-Python `pickle._Pickler`
1336 # implementation in the standard library.
1337 dispatch = dispatch_table
1338
1339 # Implementation of the reducer_override callback, in order to
1340 # efficiently serialize dynamic functions and classes by subclassing
1341 # the C-implemented `pickle.Pickler`.
1342 # TODO: decorrelate reducer_override (which is tied to CPython's
1343 # implementation - would it make sense to backport it to pypy? - and
1344 # pickle's protocol 5 which is implementation agnostic. Currently, the
1345 # availability of both notions coincide on CPython's pickle, but it may
1346 # not be the case anymore when pypy implements protocol 5.
1347
1348 def reducer_override(self, obj):
1349 """Type-agnostic reducing callback for function and classes.
1350
1351 For performance reasons, subclasses of the C `pickle.Pickler` class
1352 cannot register custom reducers for functions and classes in the
1353 dispatch_table attribute. Reducers for such types must instead
1354 implemented via the special `reducer_override` method.
1355
1356 Note that this method will be called for any object except a few
1357 builtin-types (int, lists, dicts etc.), which differs from reducers
1358 in the Pickler's dispatch_table, each of them being invoked for
1359 objects of a specific type only.
1360
1361 This property comes in handy for classes: although most classes are
1362 instances of the ``type`` metaclass, some of them can be instances
1363 of other custom metaclasses (such as enum.EnumMeta for example). In
1364 particular, the metaclass will likely not be known in advance, and
1365 thus cannot be special-cased using an entry in the dispatch_table.
1366 reducer_override, among other things, allows us to register a
1367 reducer that will be called for any class, independently of its
1368 type.
1369
1370 Notes:
1371
1372 * reducer_override has the priority over dispatch_table-registered
1373 reducers.
1374 * reducer_override can be used to fix other limitations of
1375 cloudpickle for other types that suffered from type-specific
1376 reducers, such as Exceptions. See
1377 https://github.com/cloudpipe/cloudpickle/issues/248
1378 """
1379 t = type(obj)
1380 try:
1381 is_anyclass = issubclass(t, type)
1382 except TypeError: # t is not a class (old Boost; see SF #502085)
1383 is_anyclass = False
1384
1385 if is_anyclass:
1386 return _class_reduce(obj)
1387 elif isinstance(obj, types.FunctionType):
1388 return self._function_reduce(obj)
1389 else:
1390 # fallback to save_global, including the Pickler's
1391 # dispatch_table
1392 return NotImplemented
1393
1394 else:
1395 # When reducer_override is not available, hack the pure-Python
1396 # Pickler's types.FunctionType and type savers. Note: the type saver
1397 # must override Pickler.save_global, because pickle.py contains a
1398 # hard-coded call to save_global when pickling meta-classes.
1399 dispatch = pickle.Pickler.dispatch.copy()
1400
1401 def _save_reduce_pickle5(
1402 self,
1403 func,
1404 args,
1405 state=None,
1406 listitems=None,
1407 dictitems=None,
1408 state_setter=None,
1409 obj=None,
1410 ):
1411 save = self.save
1412 write = self.write
1413 self.save_reduce(
1414 func,
1415 args,
1416 state=None,
1417 listitems=listitems,
1418 dictitems=dictitems,
1419 obj=obj,
1420 )
1421 # backport of the Python 3.8 state_setter pickle operations
1422 save(state_setter)
1423 save(obj) # simple BINGET opcode as obj is already memoized.
1424 save(state)
1425 write(pickle.TUPLE2)
1426 # Trigger a state_setter(obj, state) function call.
1427 write(pickle.REDUCE)
1428 # The purpose of state_setter is to carry-out an
1429 # inplace modification of obj. We do not care about what the
1430 # method might return, so its output is eventually removed from
1431 # the stack.
1432 write(pickle.POP)
1433
1434 def save_global(self, obj, name=None, pack=struct.pack):
1435 """Main dispatch method.
1436
1437 The name of this method is somewhat misleading: all types get
1438 dispatched here.
1439 """
1440 if obj is type(None): # noqa
1441 return self.save_reduce(type, (None,), obj=obj)
1442 elif obj is type(Ellipsis):
1443 return self.save_reduce(type, (Ellipsis,), obj=obj)
1444 elif obj is type(NotImplemented):
1445 return self.save_reduce(type, (NotImplemented,), obj=obj)
1446 elif obj in _BUILTIN_TYPE_NAMES:
1447 return self.save_reduce(
1448 _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
1449 )
1450
1451 if name is not None:
1452 super().save_global(obj, name=name)
1453 elif not _should_pickle_by_reference(obj, name=name):
1454 self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
1455 else:
1456 super().save_global(obj, name=name)
1457
1458 dispatch[type] = save_global
1459
1460 def save_function(self, obj, name=None):
1461 """Registered with the dispatch to handle all function types.
1462
1463 Determines what kind of function obj is (e.g. lambda, defined at
1464 interactive prompt, etc) and handles the pickling appropriately.
1465 """
1466 if _should_pickle_by_reference(obj, name=name):
1467 return super().save_global(obj, name=name)
1468 elif PYPY and isinstance(obj.__code__, builtin_code_type):
1469 return self.save_pypy_builtin_func(obj)
1470 else:
1471 return self._save_reduce_pickle5(
1472 *self._dynamic_function_reduce(obj), obj=obj
1473 )
1474
1475 def save_pypy_builtin_func(self, obj):
1476 """Save pypy equivalent of builtin functions.
1477
1478 PyPy does not have the concept of builtin-functions. Instead,
1479 builtin-functions are simple function instances, but with a
1480 builtin-code attribute.
1481 Most of the time, builtin functions should be pickled by attribute.
1482 But PyPy has flaky support for __qualname__, so some builtin
1483 functions such as float.__new__ will be classified as dynamic. For
1484 this reason only, we created this special routine. Because
1485 builtin-functions are not expected to have closure or globals,
1486 there is no additional hack (compared the one already implemented
1487 in pickle) to protect ourselves from reference cycles. A simple
1488 (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
1489 also that PyPy improved their support for __qualname__ in v3.6, so
1490 this routing should be removed when cloudpickle supports only PyPy
1491 3.6 and later.
1492 """
1493 rv = (
1494 types.FunctionType,
1495 (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
1496 obj.__dict__,
1497 )
1498 self.save_reduce(*rv, obj=obj)
1499
1500 dispatch[types.FunctionType] = save_function
1501
1502
1503# Shorthands similar to pickle.dump/pickle.dumps
1504
1505
1506def dump(obj, file, protocol=None, buffer_callback=None):
1507 """Serialize obj as bytes streamed into file
1508
1509 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1510 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1511 speed between processes running the same Python version.
1512
1513 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1514 compatibility with older versions of Python (although this is not always
1515 guaranteed to work because cloudpickle relies on some internal
1516 implementation details that can change from one Python version to the
1517 next).
1518 """
1519 Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
1520
1521
1522def dumps(obj, protocol=None, buffer_callback=None):
1523 """Serialize obj as a string of bytes allocated in memory
1524
1525 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1526 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1527 speed between processes running the same Python version.
1528
1529 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1530 compatibility with older versions of Python (although this is not always
1531 guaranteed to work because cloudpickle relies on some internal
1532 implementation details that can change from one Python version to the
1533 next).
1534 """
1535 with io.BytesIO() as file:
1536 cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
1537 cp.dump(obj)
1538 return file.getvalue()
1539
1540
1541# Include pickles unloading functions in this namespace for convenience.
1542load, loads = pickle.load, pickle.loads
1543
1544# Backward compat alias.
1545CloudPickler = Pickler