1"""Pickler class to extend the standard pickle.Pickler functionality
2
3The main objective is to make it natural to perform distributed computing on
4clusters (such as PySpark, Dask, Ray...) with interactively defined code
5(functions, classes, ...) written in notebooks or console.
6
7In particular this pickler adds the following features:
8- serialize interactively-defined or locally-defined functions, classes,
9 enums, typevars, lambdas and nested functions to compiled byte code;
10- deal with some other non-serializable objects in an ad-hoc manner where
11 applicable.
12
13This pickler is therefore meant to be used for the communication between short
14lived Python processes running the same version of Python and libraries. In
15particular, it is not meant to be used for long term storage of Python objects.
16
17It does not include an unpickler, as standard Python unpickling suffices.
18
19This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
20<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
21
22Copyright (c) 2012-now, CloudPickle developers and contributors.
23Copyright (c) 2012, Regents of the University of California.
24Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
25All rights reserved.
26
27Redistribution and use in source and binary forms, with or without
28modification, are permitted provided that the following conditions
29are met:
30 * Redistributions of source code must retain the above copyright
31 notice, this list of conditions and the following disclaimer.
32 * Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions and the following disclaimer in the
34 documentation and/or other materials provided with the distribution.
35 * Neither the name of the University of California, Berkeley nor the
36 names of its contributors may be used to endorse or promote
37 products derived from this software without specific prior written
38 permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
47PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
48LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
49NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
50SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51"""
52
53import _collections_abc
54from collections import ChainMap, OrderedDict
55import abc
56import builtins
57import copyreg
58import dataclasses
59import dis
60from enum import Enum
61import io
62import itertools
63import logging
64import opcode
65import pickle
66from pickle import _getattribute as _pickle_getattribute
67import platform
68import struct
69import sys
70import threading
71import types
72import typing
73import uuid
74import warnings
75import weakref
76
77# The following import is required to be imported in the cloudpickle
78# namespace to be able to load pickle files generated with older versions of
79# cloudpickle. See: tests/test_backward_compat.py
80from types import CellType # noqa: F401
81
82
83# cloudpickle is meant for inter process communication: we expect all
84# communicating processes to run the same Python version hence we favor
85# communication speed over compatibility:
86DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
87
88# Names of modules whose resources should be treated as dynamic.
89_PICKLE_BY_VALUE_MODULES = set()
90
91# Track the provenance of reconstructed dynamic classes to make it possible to
92# reconstruct instances from the matching singleton class definition when
93# appropriate and preserve the usual "isinstance" semantics of Python objects.
94_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
95_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
96_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
97
98PYPY = platform.python_implementation() == "PyPy"
99
100builtin_code_type = None
101if PYPY:
102 # builtin-code objects only exist in pypy
103 builtin_code_type = type(float.__new__.__code__)
104
105_extract_code_globals_cache = weakref.WeakKeyDictionary()
106
107
108def _get_or_create_tracker_id(class_def):
109 with _DYNAMIC_CLASS_TRACKER_LOCK:
110 class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
111 if class_tracker_id is None:
112 class_tracker_id = uuid.uuid4().hex
113 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
114 _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
115 return class_tracker_id
116
117
118def _lookup_class_or_track(class_tracker_id, class_def):
119 if class_tracker_id is not None:
120 with _DYNAMIC_CLASS_TRACKER_LOCK:
121 class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
122 class_tracker_id, class_def
123 )
124 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
125 return class_def
126
127
128def register_pickle_by_value(module):
129 """Register a module to make its functions and classes picklable by value.
130
131 By default, functions and classes that are attributes of an importable
132 module are to be pickled by reference, that is relying on re-importing
133 the attribute from the module at load time.
134
135 If `register_pickle_by_value(module)` is called, all its functions and
136 classes are subsequently to be pickled by value, meaning that they can
137 be loaded in Python processes where the module is not importable.
138
139 This is especially useful when developing a module in a distributed
140 execution environment: restarting the client Python process with the new
141 source code is enough: there is no need to re-install the new version
142 of the module on all the worker nodes nor to restart the workers.
143
144 Note: this feature is considered experimental. See the cloudpickle
145 README.md file for more details and limitations.
146 """
147 if not isinstance(module, types.ModuleType):
148 raise ValueError(f"Input should be a module object, got {str(module)} instead")
149 # In the future, cloudpickle may need a way to access any module registered
150 # for pickling by value in order to introspect relative imports inside
151 # functions pickled by value. (see
152 # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
153 # This access can be ensured by checking that module is present in
154 # sys.modules at registering time and assuming that it will still be in
155 # there when accessed during pickling. Another alternative would be to
156 # store a weakref to the module. Even though cloudpickle does not implement
157 # this introspection yet, in order to avoid a possible breaking change
158 # later, we still enforce the presence of module inside sys.modules.
159 if module.__name__ not in sys.modules:
160 raise ValueError(
161 f"{module} was not imported correctly, have you used an "
162 "`import` statement to access it?"
163 )
164 _PICKLE_BY_VALUE_MODULES.add(module.__name__)
165
166
167def unregister_pickle_by_value(module):
168 """Unregister that the input module should be pickled by value."""
169 if not isinstance(module, types.ModuleType):
170 raise ValueError(f"Input should be a module object, got {str(module)} instead")
171 if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
172 raise ValueError(f"{module} is not registered for pickle by value")
173 else:
174 _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
175
176
177def list_registry_pickle_by_value():
178 return _PICKLE_BY_VALUE_MODULES.copy()
179
180
181def _is_registered_pickle_by_value(module):
182 module_name = module.__name__
183 if module_name in _PICKLE_BY_VALUE_MODULES:
184 return True
185 while True:
186 parent_name = module_name.rsplit(".", 1)[0]
187 if parent_name == module_name:
188 break
189 if parent_name in _PICKLE_BY_VALUE_MODULES:
190 return True
191 module_name = parent_name
192 return False
193
194
195if sys.version_info >= (3, 14):
196 def _getattribute(obj, name):
197 return _pickle_getattribute(obj, name.split('.'))
198else:
199 def _getattribute(obj, name):
200 return _pickle_getattribute(obj, name)[0]
201
202
203def _whichmodule(obj, name):
204 """Find the module an object belongs to.
205
206 This function differs from ``pickle.whichmodule`` in two ways:
207 - it does not mangle the cases where obj's module is __main__ and obj was
208 not found in any module.
209 - Errors arising during module introspection are ignored, as those errors
210 are considered unwanted side effects.
211 """
212 module_name = getattr(obj, "__module__", None)
213
214 if module_name is not None:
215 return module_name
216 # Protect the iteration by using a copy of sys.modules against dynamic
217 # modules that trigger imports of other modules upon calls to getattr or
218 # other threads importing at the same time.
219 for module_name, module in sys.modules.copy().items():
220 # Some modules such as coverage can inject non-module objects inside
221 # sys.modules
222 if (
223 module_name == "__main__"
224 or module_name == "__mp_main__"
225 or module is None
226 or not isinstance(module, types.ModuleType)
227 ):
228 continue
229 try:
230 if _getattribute(module, name) is obj:
231 return module_name
232 except Exception:
233 pass
234 return None
235
236
237def _should_pickle_by_reference(obj, name=None):
238 """Test whether an function or a class should be pickled by reference
239
240 Pickling by reference means by that the object (typically a function or a
241 class) is an attribute of a module that is assumed to be importable in the
242 target Python environment. Loading will therefore rely on importing the
243 module and then calling `getattr` on it to access the function or class.
244
245 Pickling by reference is the only option to pickle functions and classes
246 in the standard library. In cloudpickle the alternative option is to
247 pickle by value (for instance for interactively or locally defined
248 functions and classes or for attributes of modules that have been
249 explicitly registered to be pickled by value.
250 """
251 if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
252 module_and_name = _lookup_module_and_qualname(obj, name=name)
253 if module_and_name is None:
254 return False
255 module, name = module_and_name
256 return not _is_registered_pickle_by_value(module)
257
258 elif isinstance(obj, types.ModuleType):
259 # We assume that sys.modules is primarily used as a cache mechanism for
260 # the Python import machinery. Checking if a module has been added in
261 # is sys.modules therefore a cheap and simple heuristic to tell us
262 # whether we can assume that a given module could be imported by name
263 # in another Python process.
264 if _is_registered_pickle_by_value(obj):
265 return False
266 return obj.__name__ in sys.modules
267 else:
268 raise TypeError(
269 "cannot check importability of {} instances".format(type(obj).__name__)
270 )
271
272
273def _lookup_module_and_qualname(obj, name=None):
274 if name is None:
275 name = getattr(obj, "__qualname__", None)
276 if name is None: # pragma: no cover
277 # This used to be needed for Python 2.7 support but is probably not
278 # needed anymore. However we keep the __name__ introspection in case
279 # users of cloudpickle rely on this old behavior for unknown reasons.
280 name = getattr(obj, "__name__", None)
281
282 module_name = _whichmodule(obj, name)
283
284 if module_name is None:
285 # In this case, obj.__module__ is None AND obj was not found in any
286 # imported module. obj is thus treated as dynamic.
287 return None
288
289 if module_name == "__main__":
290 return None
291
292 # Note: if module_name is in sys.modules, the corresponding module is
293 # assumed importable at unpickling time. See #357
294 module = sys.modules.get(module_name, None)
295 if module is None:
296 # The main reason why obj's module would not be imported is that this
297 # module has been dynamically created, using for example
298 # types.ModuleType. The other possibility is that module was removed
299 # from sys.modules after obj was created/imported. But this case is not
300 # supported, as the standard pickle does not support it either.
301 return None
302
303 try:
304 obj2 = _getattribute(module, name)
305 except AttributeError:
306 # obj was not found inside the module it points to
307 return None
308 if obj2 is not obj:
309 return None
310 return module, name
311
312
313def _extract_code_globals(co):
314 """Find all globals names read or written to by codeblock co."""
315 out_names = _extract_code_globals_cache.get(co)
316 if out_names is None:
317 # We use a dict with None values instead of a set to get a
318 # deterministic order and avoid introducing non-deterministic pickle
319 # bytes as a results.
320 out_names = {name: None for name in _walk_global_ops(co)}
321
322 # Declaring a function inside another one using the "def ..." syntax
323 # generates a constant code object corresponding to the one of the
324 # nested function's As the nested function may itself need global
325 # variables, we need to introspect its code, extract its globals, (look
326 # for code object in it's co_consts attribute..) and add the result to
327 # code_globals
328 if co.co_consts:
329 for const in co.co_consts:
330 if isinstance(const, types.CodeType):
331 out_names.update(_extract_code_globals(const))
332
333 _extract_code_globals_cache[co] = out_names
334
335 return out_names
336
337
338def _find_imported_submodules(code, top_level_dependencies):
339 """Find currently imported submodules used by a function.
340
341 Submodules used by a function need to be detected and referenced for the
342 function to work correctly at depickling time. Because submodules can be
343 referenced as attribute of their parent package (``package.submodule``), we
344 need a special introspection technique that does not rely on GLOBAL-related
345 opcodes to find references of them in a code object.
346
347 Example:
348 ```
349 import concurrent.futures
350 import cloudpickle
351 def func():
352 x = concurrent.futures.ThreadPoolExecutor
353 if __name__ == '__main__':
354 cloudpickle.dumps(func)
355 ```
356 The globals extracted by cloudpickle in the function's state include the
357 concurrent package, but not its submodule (here, concurrent.futures), which
358 is the module used by func. Find_imported_submodules will detect the usage
359 of concurrent.futures. Saving this module alongside with func will ensure
360 that calling func once depickled does not fail due to concurrent.futures
361 not being imported
362 """
363
364 subimports = []
365 # check if any known dependency is an imported package
366 for x in top_level_dependencies:
367 if (
368 isinstance(x, types.ModuleType)
369 and hasattr(x, "__package__")
370 and x.__package__
371 ):
372 # check if the package has any currently loaded sub-imports
373 prefix = x.__name__ + "."
374 # A concurrent thread could mutate sys.modules,
375 # make sure we iterate over a copy to avoid exceptions
376 for name in list(sys.modules):
377 # Older versions of pytest will add a "None" module to
378 # sys.modules.
379 if name is not None and name.startswith(prefix):
380 # check whether the function can address the sub-module
381 tokens = set(name[len(prefix) :].split("."))
382 if not tokens - set(code.co_names):
383 subimports.append(sys.modules[name])
384 return subimports
385
386
387# relevant opcodes
388STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
389DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
390LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
391GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
392HAVE_ARGUMENT = dis.HAVE_ARGUMENT
393EXTENDED_ARG = dis.EXTENDED_ARG
394
395
396_BUILTIN_TYPE_NAMES = {}
397for k, v in types.__dict__.items():
398 if type(v) is type:
399 _BUILTIN_TYPE_NAMES[v] = k
400
401
402def _builtin_type(name):
403 if name == "ClassType": # pragma: no cover
404 # Backward compat to load pickle files generated with cloudpickle
405 # < 1.3 even if loading pickle files from older versions is not
406 # officially supported.
407 return type
408 return getattr(types, name)
409
410
411def _walk_global_ops(code):
412 """Yield referenced name for global-referencing instructions in code."""
413 for instr in dis.get_instructions(code):
414 op = instr.opcode
415 if op in GLOBAL_OPS:
416 yield instr.argval
417
418
419def _extract_class_dict(cls):
420 """Retrieve a copy of the dict of a class without the inherited method."""
421 # Hack to circumvent non-predictable memoization caused by string interning.
422 # See the inline comment in _class_setstate for details.
423 clsdict = {"".join(k): cls.__dict__[k] for k in sorted(cls.__dict__)}
424
425 if len(cls.__bases__) == 1:
426 inherited_dict = cls.__bases__[0].__dict__
427 else:
428 inherited_dict = {}
429 for base in reversed(cls.__bases__):
430 inherited_dict.update(base.__dict__)
431 to_remove = []
432 for name, value in clsdict.items():
433 try:
434 base_value = inherited_dict[name]
435 if value is base_value:
436 to_remove.append(name)
437 except KeyError:
438 pass
439 for name in to_remove:
440 clsdict.pop(name)
441 return clsdict
442
443
444def is_tornado_coroutine(func):
445 """Return whether `func` is a Tornado coroutine function.
446
447 Running coroutines are not supported.
448 """
449 warnings.warn(
450 "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
451 "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
452 "directly instead.",
453 category=DeprecationWarning,
454 )
455 if "tornado.gen" not in sys.modules:
456 return False
457 gen = sys.modules["tornado.gen"]
458 if not hasattr(gen, "is_coroutine_function"):
459 # Tornado version is too old
460 return False
461 return gen.is_coroutine_function(func)
462
463
464def subimport(name):
465 # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
466 # the name of a submodule, __import__ will return the top-level root module
467 # of this submodule. For instance, __import__('os.path') returns the `os`
468 # module.
469 __import__(name)
470 return sys.modules[name]
471
472
473def dynamic_subimport(name, vars):
474 mod = types.ModuleType(name)
475 mod.__dict__.update(vars)
476 mod.__dict__["__builtins__"] = builtins.__dict__
477 return mod
478
479
480def _get_cell_contents(cell):
481 try:
482 return cell.cell_contents
483 except ValueError:
484 # Handle empty cells explicitly with a sentinel value.
485 return _empty_cell_value
486
487
488def instance(cls):
489 """Create a new instance of a class.
490
491 Parameters
492 ----------
493 cls : type
494 The class to create an instance of.
495
496 Returns
497 -------
498 instance : cls
499 A new instance of ``cls``.
500 """
501 return cls()
502
503
504@instance
505class _empty_cell_value:
506 """Sentinel for empty closures."""
507
508 @classmethod
509 def __reduce__(cls):
510 return cls.__name__
511
512
513def _make_function(code, globals, name, argdefs, closure):
514 # Setting __builtins__ in globals is needed for nogil CPython.
515 globals["__builtins__"] = __builtins__
516 return types.FunctionType(code, globals, name, argdefs, closure)
517
518
519def _make_empty_cell():
520 if False:
521 # trick the compiler into creating an empty cell in our lambda
522 cell = None
523 raise AssertionError("this route should not be executed")
524
525 return (lambda: cell).__closure__[0]
526
527
528def _make_cell(value=_empty_cell_value):
529 cell = _make_empty_cell()
530 if value is not _empty_cell_value:
531 cell.cell_contents = value
532 return cell
533
534
535def _make_skeleton_class(
536 type_constructor, name, bases, type_kwargs, class_tracker_id, extra
537):
538 """Build dynamic class with an empty __dict__ to be filled once memoized
539
540 If class_tracker_id is not None, try to lookup an existing class definition
541 matching that id. If none is found, track a newly reconstructed class
542 definition under that id so that other instances stemming from the same
543 class id will also reuse this class definition.
544
545 The "extra" variable is meant to be a dict (or None) that can be used for
546 forward compatibility shall the need arise.
547 """
548 # We need to intern the keys of the type_kwargs dict to avoid having
549 # different pickles for the same dynamic class depending on whether it was
550 # dynamically created or reconstructed from a pickled stream.
551 type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()}
552
553 skeleton_class = types.new_class(
554 name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
555 )
556
557 return _lookup_class_or_track(class_tracker_id, skeleton_class)
558
559
560def _make_skeleton_enum(
561 bases, name, qualname, members, module, class_tracker_id, extra
562):
563 """Build dynamic enum with an empty __dict__ to be filled once memoized
564
565 The creation of the enum class is inspired by the code of
566 EnumMeta._create_.
567
568 If class_tracker_id is not None, try to lookup an existing enum definition
569 matching that id. If none is found, track a newly reconstructed enum
570 definition under that id so that other instances stemming from the same
571 class id will also reuse this enum definition.
572
573 The "extra" variable is meant to be a dict (or None) that can be used for
574 forward compatibility shall the need arise.
575 """
576 # enums always inherit from their base Enum class at the last position in
577 # the list of base classes:
578 enum_base = bases[-1]
579 metacls = enum_base.__class__
580 classdict = metacls.__prepare__(name, bases)
581
582 for member_name, member_value in members.items():
583 classdict[member_name] = member_value
584 enum_class = metacls.__new__(metacls, name, bases, classdict)
585 enum_class.__module__ = module
586 enum_class.__qualname__ = qualname
587
588 return _lookup_class_or_track(class_tracker_id, enum_class)
589
590
591def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
592 tv = typing.TypeVar(
593 name,
594 *constraints,
595 bound=bound,
596 covariant=covariant,
597 contravariant=contravariant,
598 )
599 return _lookup_class_or_track(class_tracker_id, tv)
600
601
602def _decompose_typevar(obj):
603 return (
604 obj.__name__,
605 obj.__bound__,
606 obj.__constraints__,
607 obj.__covariant__,
608 obj.__contravariant__,
609 _get_or_create_tracker_id(obj),
610 )
611
612
613def _typevar_reduce(obj):
614 # TypeVar instances require the module information hence why we
615 # are not using the _should_pickle_by_reference directly
616 module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
617
618 if module_and_name is None:
619 return (_make_typevar, _decompose_typevar(obj))
620 elif _is_registered_pickle_by_value(module_and_name[0]):
621 return (_make_typevar, _decompose_typevar(obj))
622
623 return (getattr, module_and_name)
624
625
626def _get_bases(typ):
627 if "__orig_bases__" in getattr(typ, "__dict__", {}):
628 # For generic types (see PEP 560)
629 # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
630 # correct. Subclasses of a fully-parameterized generic class does not
631 # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
632 # will return True because it's defined in the base class.
633 bases_attr = "__orig_bases__"
634 else:
635 # For regular class objects
636 bases_attr = "__bases__"
637 return getattr(typ, bases_attr)
638
639
640def _make_dict_keys(obj, is_ordered=False):
641 if is_ordered:
642 return OrderedDict.fromkeys(obj).keys()
643 else:
644 return dict.fromkeys(obj).keys()
645
646
647def _make_dict_values(obj, is_ordered=False):
648 if is_ordered:
649 return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
650 else:
651 return {i: _ for i, _ in enumerate(obj)}.values()
652
653
654def _make_dict_items(obj, is_ordered=False):
655 if is_ordered:
656 return OrderedDict(obj).items()
657 else:
658 return obj.items()
659
660
661# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
662# -------------------------------------------------
663
664
665def _class_getnewargs(obj):
666 type_kwargs = {}
667 if "__module__" in obj.__dict__:
668 type_kwargs["__module__"] = obj.__module__
669
670 __dict__ = obj.__dict__.get("__dict__", None)
671 if isinstance(__dict__, property):
672 type_kwargs["__dict__"] = __dict__
673
674 return (
675 type(obj),
676 obj.__name__,
677 _get_bases(obj),
678 type_kwargs,
679 _get_or_create_tracker_id(obj),
680 None,
681 )
682
683
684def _enum_getnewargs(obj):
685 members = {e.name: e.value for e in obj}
686 return (
687 obj.__bases__,
688 obj.__name__,
689 obj.__qualname__,
690 members,
691 obj.__module__,
692 _get_or_create_tracker_id(obj),
693 None,
694 )
695
696
697# COLLECTION OF OBJECTS RECONSTRUCTORS
698# ------------------------------------
699def _file_reconstructor(retval):
700 return retval
701
702
703# COLLECTION OF OBJECTS STATE GETTERS
704# -----------------------------------
705
706
707def _function_getstate(func):
708 # - Put func's dynamic attributes (stored in func.__dict__) in state. These
709 # attributes will be restored at unpickling time using
710 # f.__dict__.update(state)
711 # - Put func's members into slotstate. Such attributes will be restored at
712 # unpickling time by iterating over slotstate and calling setattr(func,
713 # slotname, slotvalue)
714 slotstate = {
715 # Hack to circumvent non-predictable memoization caused by string interning.
716 # See the inline comment in _class_setstate for details.
717 "__name__": "".join(func.__name__),
718 "__qualname__": "".join(func.__qualname__),
719 "__annotations__": func.__annotations__,
720 "__kwdefaults__": func.__kwdefaults__,
721 "__defaults__": func.__defaults__,
722 "__module__": func.__module__,
723 "__doc__": func.__doc__,
724 "__closure__": func.__closure__,
725 }
726
727 f_globals_ref = _extract_code_globals(func.__code__)
728 f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
729
730 if func.__closure__ is not None:
731 closure_values = list(map(_get_cell_contents, func.__closure__))
732 else:
733 closure_values = ()
734
735 # Extract currently-imported submodules used by func. Storing these modules
736 # in a smoke _cloudpickle_subimports attribute of the object's state will
737 # trigger the side effect of importing these modules at unpickling time
738 # (which is necessary for func to work correctly once depickled)
739 slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
740 func.__code__, itertools.chain(f_globals.values(), closure_values)
741 )
742 slotstate["__globals__"] = f_globals
743
744 # Hack to circumvent non-predictable memoization caused by string interning.
745 # See the inline comment in _class_setstate for details.
746 state = {"".join(k): v for k, v in func.__dict__.items()}
747 return state, slotstate
748
749
750def _class_getstate(obj):
751 clsdict = _extract_class_dict(obj)
752 clsdict.pop("__weakref__", None)
753
754 if issubclass(type(obj), abc.ABCMeta):
755 # If obj is an instance of an ABCMeta subclass, don't pickle the
756 # cache/negative caches populated during isinstance/issubclass
757 # checks, but pickle the list of registered subclasses of obj.
758 clsdict.pop("_abc_cache", None)
759 clsdict.pop("_abc_negative_cache", None)
760 clsdict.pop("_abc_negative_cache_version", None)
761 registry = clsdict.pop("_abc_registry", None)
762 if registry is None:
763 # The abc caches and registered subclasses of a
764 # class are bundled into the single _abc_impl attribute
765 clsdict.pop("_abc_impl", None)
766 (registry, _, _, _) = abc._get_dump(obj)
767
768 clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
769 else:
770 # In the above if clause, registry is a set of weakrefs -- in
771 # this case, registry is a WeakSet
772 clsdict["_abc_impl"] = [type_ for type_ in registry]
773
774 if "__slots__" in clsdict:
775 # pickle string length optimization: member descriptors of obj are
776 # created automatically from obj's __slots__ attribute, no need to
777 # save them in obj's state
778 if isinstance(obj.__slots__, str):
779 clsdict.pop(obj.__slots__)
780 else:
781 for k in obj.__slots__:
782 clsdict.pop(k, None)
783
784 clsdict.pop("__dict__", None) # unpicklable property object
785
786 if sys.version_info >= (3, 14):
787 # PEP-649/749: __annotate_func__ contains a closure that references the class
788 # dict. We need to exclude it from pickling. Python will recreate it when
789 # __annotations__ is accessed at unpickling time.
790 clsdict.pop("__annotate_func__", None)
791
792 return (clsdict, {})
793
794
795def _enum_getstate(obj):
796 clsdict, slotstate = _class_getstate(obj)
797
798 members = {e.name: e.value for e in obj}
799 # Cleanup the clsdict that will be passed to _make_skeleton_enum:
800 # Those attributes are already handled by the metaclass.
801 for attrname in [
802 "_generate_next_value_",
803 "_member_names_",
804 "_member_map_",
805 "_member_type_",
806 "_value2member_map_",
807 ]:
808 clsdict.pop(attrname, None)
809 for member in members:
810 clsdict.pop(member)
811 # Special handling of Enum subclasses
812 return clsdict, slotstate
813
814
815# COLLECTIONS OF OBJECTS REDUCERS
816# -------------------------------
817# A reducer is a function taking a single argument (obj), and that returns a
818# tuple with all the necessary data to re-construct obj. Apart from a few
819# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
820# correctly pickle an object.
821# While many built-in objects (Exceptions objects, instances of the "object"
822# class, etc), are shipped with their own built-in reducer (invoked using
823# obj.__reduce__), some do not. The following methods were created to "fill
824# these holes".
825
826
827def _code_reduce(obj):
828 """code object reducer."""
829 # If you are not sure about the order of arguments, take a look at help
830 # of the specific type from types, for example:
831 # >>> from types import CodeType
832 # >>> help(CodeType)
833
834 # Hack to circumvent non-predictable memoization caused by string interning.
835 # See the inline comment in _class_setstate for details.
836 co_name = "".join(obj.co_name)
837
838 # Create shallow copies of these tuple to make cloudpickle payload deterministic.
839 # When creating a code object during load, copies of these four tuples are
840 # created, while in the main process, these tuples can be shared.
841 # By always creating copies, we make sure the resulting payload is deterministic.
842 co_names = tuple(name for name in obj.co_names)
843 co_varnames = tuple(name for name in obj.co_varnames)
844 co_freevars = tuple(name for name in obj.co_freevars)
845 co_cellvars = tuple(name for name in obj.co_cellvars)
846 if hasattr(obj, "co_exceptiontable"):
847 # Python 3.11 and later: there are some new attributes
848 # related to the enhanced exceptions.
849 args = (
850 obj.co_argcount,
851 obj.co_posonlyargcount,
852 obj.co_kwonlyargcount,
853 obj.co_nlocals,
854 obj.co_stacksize,
855 obj.co_flags,
856 obj.co_code,
857 obj.co_consts,
858 co_names,
859 co_varnames,
860 obj.co_filename,
861 co_name,
862 obj.co_qualname,
863 obj.co_firstlineno,
864 obj.co_linetable,
865 obj.co_exceptiontable,
866 co_freevars,
867 co_cellvars,
868 )
869 elif hasattr(obj, "co_linetable"):
870 # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
871 # expects obj.co_linetable instead.
872 args = (
873 obj.co_argcount,
874 obj.co_posonlyargcount,
875 obj.co_kwonlyargcount,
876 obj.co_nlocals,
877 obj.co_stacksize,
878 obj.co_flags,
879 obj.co_code,
880 obj.co_consts,
881 co_names,
882 co_varnames,
883 obj.co_filename,
884 co_name,
885 obj.co_firstlineno,
886 obj.co_linetable,
887 co_freevars,
888 co_cellvars,
889 )
890 elif hasattr(obj, "co_nmeta"): # pragma: no cover
891 # "nogil" Python: modified attributes from 3.9
892 args = (
893 obj.co_argcount,
894 obj.co_posonlyargcount,
895 obj.co_kwonlyargcount,
896 obj.co_nlocals,
897 obj.co_framesize,
898 obj.co_ndefaultargs,
899 obj.co_nmeta,
900 obj.co_flags,
901 obj.co_code,
902 obj.co_consts,
903 co_varnames,
904 obj.co_filename,
905 co_name,
906 obj.co_firstlineno,
907 obj.co_lnotab,
908 obj.co_exc_handlers,
909 obj.co_jump_table,
910 co_freevars,
911 co_cellvars,
912 obj.co_free2reg,
913 obj.co_cell2reg,
914 )
915 else:
916 # Backward compat for 3.8 and 3.9
917 args = (
918 obj.co_argcount,
919 obj.co_posonlyargcount,
920 obj.co_kwonlyargcount,
921 obj.co_nlocals,
922 obj.co_stacksize,
923 obj.co_flags,
924 obj.co_code,
925 obj.co_consts,
926 co_names,
927 co_varnames,
928 obj.co_filename,
929 co_name,
930 obj.co_firstlineno,
931 obj.co_lnotab,
932 co_freevars,
933 co_cellvars,
934 )
935 return types.CodeType, args
936
937
938def _cell_reduce(obj):
939 """Cell (containing values of a function's free variables) reducer."""
940 try:
941 obj.cell_contents
942 except ValueError: # cell is empty
943 return _make_empty_cell, ()
944 else:
945 return _make_cell, (obj.cell_contents,)
946
947
948def _classmethod_reduce(obj):
949 orig_func = obj.__func__
950 return type(obj), (orig_func,)
951
952
953def _file_reduce(obj):
954 """Save a file."""
955 import io
956
957 if not hasattr(obj, "name") or not hasattr(obj, "mode"):
958 raise pickle.PicklingError(
959 "Cannot pickle files that do not map to an actual file"
960 )
961 if obj is sys.stdout:
962 return getattr, (sys, "stdout")
963 if obj is sys.stderr:
964 return getattr, (sys, "stderr")
965 if obj is sys.stdin:
966 raise pickle.PicklingError("Cannot pickle standard input")
967 if obj.closed:
968 raise pickle.PicklingError("Cannot pickle closed files")
969 if hasattr(obj, "isatty") and obj.isatty():
970 raise pickle.PicklingError("Cannot pickle files that map to tty objects")
971 if "r" not in obj.mode and "+" not in obj.mode:
972 raise pickle.PicklingError(
973 "Cannot pickle files that are not opened for reading: %s" % obj.mode
974 )
975
976 name = obj.name
977
978 retval = io.StringIO()
979
980 try:
981 # Read the whole file
982 curloc = obj.tell()
983 obj.seek(0)
984 contents = obj.read()
985 obj.seek(curloc)
986 except OSError as e:
987 raise pickle.PicklingError(
988 "Cannot pickle file %s as it cannot be read" % name
989 ) from e
990 retval.write(contents)
991 retval.seek(curloc)
992
993 retval.name = name
994 return _file_reconstructor, (retval,)
995
996
997def _getset_descriptor_reduce(obj):
998 return getattr, (obj.__objclass__, obj.__name__)
999
1000
1001def _mappingproxy_reduce(obj):
1002 return types.MappingProxyType, (dict(obj),)
1003
1004
1005def _memoryview_reduce(obj):
1006 return bytes, (obj.tobytes(),)
1007
1008
1009def _module_reduce(obj):
1010 if _should_pickle_by_reference(obj):
1011 return subimport, (obj.__name__,)
1012 else:
1013 # Some external libraries can populate the "__builtins__" entry of a
1014 # module's `__dict__` with unpicklable objects (see #316). For that
1015 # reason, we do not attempt to pickle the "__builtins__" entry, and
1016 # restore a default value for it at unpickling time.
1017 state = obj.__dict__.copy()
1018 state.pop("__builtins__", None)
1019 return dynamic_subimport, (obj.__name__, state)
1020
1021
1022def _method_reduce(obj):
1023 return (types.MethodType, (obj.__func__, obj.__self__))
1024
1025
1026def _logger_reduce(obj):
1027 return logging.getLogger, (obj.name,)
1028
1029
1030def _root_logger_reduce(obj):
1031 return logging.getLogger, ()
1032
1033
1034def _property_reduce(obj):
1035 return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
1036
1037
1038def _weakset_reduce(obj):
1039 return weakref.WeakSet, (list(obj),)
1040
1041
1042def _dynamic_class_reduce(obj):
1043 """Save a class that can't be referenced as a module attribute.
1044
1045 This method is used to serialize classes that are defined inside
1046 functions, or that otherwise can't be serialized as attribute lookups
1047 from importable modules.
1048 """
1049 if Enum is not None and issubclass(obj, Enum):
1050 return (
1051 _make_skeleton_enum,
1052 _enum_getnewargs(obj),
1053 _enum_getstate(obj),
1054 None,
1055 None,
1056 _class_setstate,
1057 )
1058 else:
1059 return (
1060 _make_skeleton_class,
1061 _class_getnewargs(obj),
1062 _class_getstate(obj),
1063 None,
1064 None,
1065 _class_setstate,
1066 )
1067
1068
1069def _class_reduce(obj):
1070 """Select the reducer depending on the dynamic nature of the class obj."""
1071 if obj is type(None): # noqa
1072 return type, (None,)
1073 elif obj is type(Ellipsis):
1074 return type, (Ellipsis,)
1075 elif obj is type(NotImplemented):
1076 return type, (NotImplemented,)
1077 elif obj in _BUILTIN_TYPE_NAMES:
1078 return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
1079 elif not _should_pickle_by_reference(obj):
1080 return _dynamic_class_reduce(obj)
1081 return NotImplemented
1082
1083
1084def _dict_keys_reduce(obj):
1085 # Safer not to ship the full dict as sending the rest might
1086 # be unintended and could potentially cause leaking of
1087 # sensitive information
1088 return _make_dict_keys, (list(obj),)
1089
1090
1091def _dict_values_reduce(obj):
1092 # Safer not to ship the full dict as sending the rest might
1093 # be unintended and could potentially cause leaking of
1094 # sensitive information
1095 return _make_dict_values, (list(obj),)
1096
1097
1098def _dict_items_reduce(obj):
1099 return _make_dict_items, (dict(obj),)
1100
1101
1102def _odict_keys_reduce(obj):
1103 # Safer not to ship the full dict as sending the rest might
1104 # be unintended and could potentially cause leaking of
1105 # sensitive information
1106 return _make_dict_keys, (list(obj), True)
1107
1108
1109def _odict_values_reduce(obj):
1110 # Safer not to ship the full dict as sending the rest might
1111 # be unintended and could potentially cause leaking of
1112 # sensitive information
1113 return _make_dict_values, (list(obj), True)
1114
1115
1116def _odict_items_reduce(obj):
1117 return _make_dict_items, (dict(obj), True)
1118
1119
1120def _dataclass_field_base_reduce(obj):
1121 return _get_dataclass_field_type_sentinel, (obj.name,)
1122
1123
1124# COLLECTIONS OF OBJECTS STATE SETTERS
1125# ------------------------------------
1126# state setters are called at unpickling time, once the object is created and
1127# it has to be updated to how it was at unpickling time.
1128
1129
1130def _function_setstate(obj, state):
1131 """Update the state of a dynamic function.
1132
1133 As __closure__ and __globals__ are readonly attributes of a function, we
1134 cannot rely on the native setstate routine of pickle.load_build, that calls
1135 setattr on items of the slotstate. Instead, we have to modify them inplace.
1136 """
1137 state, slotstate = state
1138 obj.__dict__.update(state)
1139
1140 obj_globals = slotstate.pop("__globals__")
1141 obj_closure = slotstate.pop("__closure__")
1142 # _cloudpickle_subimports is a set of submodules that must be loaded for
1143 # the pickled function to work correctly at unpickling time. Now that these
1144 # submodules are depickled (hence imported), they can be removed from the
1145 # object's state (the object state only served as a reference holder to
1146 # these submodules)
1147 slotstate.pop("_cloudpickle_submodules")
1148
1149 obj.__globals__.update(obj_globals)
1150 obj.__globals__["__builtins__"] = __builtins__
1151
1152 if obj_closure is not None:
1153 for i, cell in enumerate(obj_closure):
1154 try:
1155 value = cell.cell_contents
1156 except ValueError: # cell is empty
1157 continue
1158 obj.__closure__[i].cell_contents = value
1159
1160 for k, v in slotstate.items():
1161 setattr(obj, k, v)
1162
1163
1164def _class_setstate(obj, state):
1165 state, slotstate = state
1166 registry = None
1167 for attrname, attr in state.items():
1168 if attrname == "_abc_impl":
1169 registry = attr
1170 else:
1171 # Note: setting attribute names on a class automatically triggers their
1172 # interning in CPython:
1173 # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
1174 #
1175 # This means that to get deterministic pickling for a dynamic class that
1176 # was initially defined in a different Python process, the pickler
1177 # needs to ensure that dynamic class and function attribute names are
1178 # systematically copied into a non-interned version to avoid
1179 # unpredictable pickle payloads.
1180 #
1181 # Indeed the Pickler's memoizer relies on physical object identity to break
1182 # cycles in the reference graph of the object being serialized.
1183 setattr(obj, attrname, attr)
1184
1185 if sys.version_info >= (3, 13) and "__firstlineno__" in state:
1186 # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it
1187 # will be automatically deleted by the `setattr(obj, attrname, attr)` call
1188 # above when `attrname` is "__firstlineno__". We assume that preserving this
1189 # information might be important for some users and that it not stale in the
1190 # context of cloudpickle usage, hence legitimate to propagate. Furthermore it
1191 # is necessary to do so to keep deterministic chained pickling as tested in
1192 # test_deterministic_str_interning_for_chained_dynamic_class_pickling.
1193 obj.__firstlineno__ = state["__firstlineno__"]
1194
1195 if registry is not None:
1196 for subclass in registry:
1197 obj.register(subclass)
1198
1199 # PEP-649/749: During pickling, we excluded the __annotate_func__ attribute but it
1200 # will be created by Python. Subsequently, annotations will be recreated when
1201 # __annotations__ is accessed.
1202
1203 return obj
1204
1205
1206# COLLECTION OF DATACLASS UTILITIES
1207# ---------------------------------
1208# There are some internal sentinel values whose identity must be preserved when
1209# unpickling dataclass fields. Each sentinel value has a unique name that we can
1210# use to retrieve its identity at unpickling time.
1211
1212
1213_DATACLASSE_FIELD_TYPE_SENTINELS = {
1214 dataclasses._FIELD.name: dataclasses._FIELD,
1215 dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
1216 dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
1217}
1218
1219
1220def _get_dataclass_field_type_sentinel(name):
1221 return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
1222
1223
1224class Pickler(pickle.Pickler):
1225 # set of reducers defined and used by cloudpickle (private)
1226 _dispatch_table = {}
1227 _dispatch_table[classmethod] = _classmethod_reduce
1228 _dispatch_table[io.TextIOWrapper] = _file_reduce
1229 _dispatch_table[logging.Logger] = _logger_reduce
1230 _dispatch_table[logging.RootLogger] = _root_logger_reduce
1231 _dispatch_table[memoryview] = _memoryview_reduce
1232 _dispatch_table[property] = _property_reduce
1233 _dispatch_table[staticmethod] = _classmethod_reduce
1234 _dispatch_table[CellType] = _cell_reduce
1235 _dispatch_table[types.CodeType] = _code_reduce
1236 _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
1237 _dispatch_table[types.ModuleType] = _module_reduce
1238 _dispatch_table[types.MethodType] = _method_reduce
1239 _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
1240 _dispatch_table[weakref.WeakSet] = _weakset_reduce
1241 _dispatch_table[typing.TypeVar] = _typevar_reduce
1242 _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
1243 _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
1244 _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
1245 _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
1246 _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
1247 _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
1248 _dispatch_table[abc.abstractmethod] = _classmethod_reduce
1249 _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
1250 _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
1251 _dispatch_table[abc.abstractproperty] = _property_reduce
1252 _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
1253
1254 dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
1255
1256 # function reducers are defined as instance methods of cloudpickle.Pickler
1257 # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
1258 def _dynamic_function_reduce(self, func):
1259 """Reduce a function that is not pickleable via attribute lookup."""
1260 newargs = self._function_getnewargs(func)
1261 state = _function_getstate(func)
1262 return (_make_function, newargs, state, None, None, _function_setstate)
1263
1264 def _function_reduce(self, obj):
1265 """Reducer for function objects.
1266
1267 If obj is a top-level attribute of a file-backed module, this reducer
1268 returns NotImplemented, making the cloudpickle.Pickler fall back to
1269 traditional pickle.Pickler routines to save obj. Otherwise, it reduces
1270 obj using a custom cloudpickle reducer designed specifically to handle
1271 dynamic functions.
1272 """
1273 if _should_pickle_by_reference(obj):
1274 return NotImplemented
1275 else:
1276 return self._dynamic_function_reduce(obj)
1277
1278 def _function_getnewargs(self, func):
1279 code = func.__code__
1280
1281 # base_globals represents the future global namespace of func at
1282 # unpickling time. Looking it up and storing it in
1283 # cloudpickle.Pickler.globals_ref allow functions sharing the same
1284 # globals at pickling time to also share them once unpickled, at one
1285 # condition: since globals_ref is an attribute of a cloudpickle.Pickler
1286 # instance, and that a new cloudpickle.Pickler is created each time
1287 # cloudpickle.dump or cloudpickle.dumps is called, functions also need
1288 # to be saved within the same invocation of
1289 # cloudpickle.dump/cloudpickle.dumps (for example:
1290 # cloudpickle.dumps([f1, f2])). There is no such limitation when using
1291 # cloudpickle.Pickler.dump, as long as the multiple invocations are
1292 # bound to the same cloudpickle.Pickler instance.
1293 base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
1294
1295 if base_globals == {}:
1296 # Add module attributes used to resolve relative imports
1297 # instructions inside func.
1298 for k in ["__package__", "__name__", "__path__", "__file__"]:
1299 if k in func.__globals__:
1300 base_globals[k] = func.__globals__[k]
1301
1302 # Do not bind the free variables before the function is created to
1303 # avoid infinite recursion.
1304 if func.__closure__ is None:
1305 closure = None
1306 else:
1307 closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
1308
1309 return code, base_globals, None, None, closure
1310
1311 def dump(self, obj):
1312 try:
1313 return super().dump(obj)
1314 except RecursionError as e:
1315 msg = "Could not pickle object as excessively deep recursion required."
1316 raise pickle.PicklingError(msg) from e
1317
1318 def __init__(self, file, protocol=None, buffer_callback=None):
1319 if protocol is None:
1320 protocol = DEFAULT_PROTOCOL
1321 super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
1322 # map functions __globals__ attribute ids, to ensure that functions
1323 # sharing the same global namespace at pickling time also share
1324 # their global namespace at unpickling time.
1325 self.globals_ref = {}
1326 self.proto = int(protocol)
1327
1328 if not PYPY:
1329 # pickle.Pickler is the C implementation of the CPython pickler and
1330 # therefore we rely on reduce_override method to customize the pickler
1331 # behavior.
1332
1333 # `cloudpickle.Pickler.dispatch` is only left for backward
1334 # compatibility - note that when using protocol 5,
1335 # `cloudpickle.Pickler.dispatch` is not an extension of
1336 # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
1337 # subclasses the C-implemented `pickle.Pickler`, which does not expose
1338 # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
1339 # used `cloudpickle.Pickler.dispatch` as a class-level attribute
1340 # storing all reducers implemented by cloudpickle, but the attribute
1341 # name was not a great choice given because it would collide with a
1342 # similarly named attribute in the pure-Python `pickle._Pickler`
1343 # implementation in the standard library.
1344 dispatch = dispatch_table
1345
1346 # Implementation of the reducer_override callback, in order to
1347 # efficiently serialize dynamic functions and classes by subclassing
1348 # the C-implemented `pickle.Pickler`.
1349 # TODO: decorrelate reducer_override (which is tied to CPython's
1350 # implementation - would it make sense to backport it to pypy? - and
1351 # pickle's protocol 5 which is implementation agnostic. Currently, the
1352 # availability of both notions coincide on CPython's pickle, but it may
1353 # not be the case anymore when pypy implements protocol 5.
1354
1355 def reducer_override(self, obj):
1356 """Type-agnostic reducing callback for function and classes.
1357
1358 For performance reasons, subclasses of the C `pickle.Pickler` class
1359 cannot register custom reducers for functions and classes in the
1360 dispatch_table attribute. Reducers for such types must instead
1361 implemented via the special `reducer_override` method.
1362
1363 Note that this method will be called for any object except a few
1364 builtin-types (int, lists, dicts etc.), which differs from reducers
1365 in the Pickler's dispatch_table, each of them being invoked for
1366 objects of a specific type only.
1367
1368 This property comes in handy for classes: although most classes are
1369 instances of the ``type`` metaclass, some of them can be instances
1370 of other custom metaclasses (such as enum.EnumMeta for example). In
1371 particular, the metaclass will likely not be known in advance, and
1372 thus cannot be special-cased using an entry in the dispatch_table.
1373 reducer_override, among other things, allows us to register a
1374 reducer that will be called for any class, independently of its
1375 type.
1376
1377 Notes:
1378
1379 * reducer_override has the priority over dispatch_table-registered
1380 reducers.
1381 * reducer_override can be used to fix other limitations of
1382 cloudpickle for other types that suffered from type-specific
1383 reducers, such as Exceptions. See
1384 https://github.com/cloudpipe/cloudpickle/issues/248
1385 """
1386 t = type(obj)
1387 try:
1388 is_anyclass = issubclass(t, type)
1389 except TypeError: # t is not a class (old Boost; see SF #502085)
1390 is_anyclass = False
1391
1392 if is_anyclass:
1393 return _class_reduce(obj)
1394 elif isinstance(obj, types.FunctionType):
1395 return self._function_reduce(obj)
1396 else:
1397 # fallback to save_global, including the Pickler's
1398 # dispatch_table
1399 return NotImplemented
1400
1401 else:
1402 # When reducer_override is not available, hack the pure-Python
1403 # Pickler's types.FunctionType and type savers. Note: the type saver
1404 # must override Pickler.save_global, because pickle.py contains a
1405 # hard-coded call to save_global when pickling meta-classes.
1406 dispatch = pickle.Pickler.dispatch.copy()
1407
1408 def _save_reduce_pickle5(
1409 self,
1410 func,
1411 args,
1412 state=None,
1413 listitems=None,
1414 dictitems=None,
1415 state_setter=None,
1416 obj=None,
1417 ):
1418 save = self.save
1419 write = self.write
1420 self.save_reduce(
1421 func,
1422 args,
1423 state=None,
1424 listitems=listitems,
1425 dictitems=dictitems,
1426 obj=obj,
1427 )
1428 # backport of the Python 3.8 state_setter pickle operations
1429 save(state_setter)
1430 save(obj) # simple BINGET opcode as obj is already memoized.
1431 save(state)
1432 write(pickle.TUPLE2)
1433 # Trigger a state_setter(obj, state) function call.
1434 write(pickle.REDUCE)
1435 # The purpose of state_setter is to carry-out an
1436 # inplace modification of obj. We do not care about what the
1437 # method might return, so its output is eventually removed from
1438 # the stack.
1439 write(pickle.POP)
1440
1441 def save_global(self, obj, name=None, pack=struct.pack):
1442 """Main dispatch method.
1443
1444 The name of this method is somewhat misleading: all types get
1445 dispatched here.
1446 """
1447 if obj is type(None): # noqa
1448 return self.save_reduce(type, (None,), obj=obj)
1449 elif obj is type(Ellipsis):
1450 return self.save_reduce(type, (Ellipsis,), obj=obj)
1451 elif obj is type(NotImplemented):
1452 return self.save_reduce(type, (NotImplemented,), obj=obj)
1453 elif obj in _BUILTIN_TYPE_NAMES:
1454 return self.save_reduce(
1455 _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
1456 )
1457
1458 if name is not None:
1459 super().save_global(obj, name=name)
1460 elif not _should_pickle_by_reference(obj, name=name):
1461 self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
1462 else:
1463 super().save_global(obj, name=name)
1464
1465 dispatch[type] = save_global
1466
1467 def save_function(self, obj, name=None):
1468 """Registered with the dispatch to handle all function types.
1469
1470 Determines what kind of function obj is (e.g. lambda, defined at
1471 interactive prompt, etc) and handles the pickling appropriately.
1472 """
1473 if _should_pickle_by_reference(obj, name=name):
1474 return super().save_global(obj, name=name)
1475 elif PYPY and isinstance(obj.__code__, builtin_code_type):
1476 return self.save_pypy_builtin_func(obj)
1477 else:
1478 return self._save_reduce_pickle5(
1479 *self._dynamic_function_reduce(obj), obj=obj
1480 )
1481
1482 def save_pypy_builtin_func(self, obj):
1483 """Save pypy equivalent of builtin functions.
1484
1485 PyPy does not have the concept of builtin-functions. Instead,
1486 builtin-functions are simple function instances, but with a
1487 builtin-code attribute.
1488 Most of the time, builtin functions should be pickled by attribute.
1489 But PyPy has flaky support for __qualname__, so some builtin
1490 functions such as float.__new__ will be classified as dynamic. For
1491 this reason only, we created this special routine. Because
1492 builtin-functions are not expected to have closure or globals,
1493 there is no additional hack (compared the one already implemented
1494 in pickle) to protect ourselves from reference cycles. A simple
1495 (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
1496 also that PyPy improved their support for __qualname__ in v3.6, so
1497 this routing should be removed when cloudpickle supports only PyPy
1498 3.6 and later.
1499 """
1500 rv = (
1501 types.FunctionType,
1502 (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
1503 obj.__dict__,
1504 )
1505 self.save_reduce(*rv, obj=obj)
1506
1507 dispatch[types.FunctionType] = save_function
1508
1509
1510# Shorthands similar to pickle.dump/pickle.dumps
1511
1512
1513def dump(obj, file, protocol=None, buffer_callback=None):
1514 """Serialize obj as bytes streamed into file
1515
1516 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1517 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1518 speed between processes running the same Python version.
1519
1520 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1521 compatibility with older versions of Python (although this is not always
1522 guaranteed to work because cloudpickle relies on some internal
1523 implementation details that can change from one Python version to the
1524 next).
1525 """
1526 Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
1527
1528
1529def dumps(obj, protocol=None, buffer_callback=None):
1530 """Serialize obj as a string of bytes allocated in memory
1531
1532 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1533 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1534 speed between processes running the same Python version.
1535
1536 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1537 compatibility with older versions of Python (although this is not always
1538 guaranteed to work because cloudpickle relies on some internal
1539 implementation details that can change from one Python version to the
1540 next).
1541 """
1542 with io.BytesIO() as file:
1543 cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
1544 cp.dump(obj)
1545 return file.getvalue()
1546
1547
1548# Include pickles unloading functions in this namespace for convenience.
1549load, loads = pickle.load, pickle.loads
1550
1551# Backward compat alias.
1552CloudPickler = Pickler