1"""
2This is a modified version of the cloudpickle module.
3Patches:
4- https://github.com/numba/numba/pull/7388
5 Avoid resetting class state of dynamic classes.
6
7Original module docstring:
8
9Pickler class to extend the standard pickle.Pickler functionality
10
11The main objective is to make it natural to perform distributed computing on
12clusters (such as PySpark, Dask, Ray...) with interactively defined code
13(functions, classes, ...) written in notebooks or console.
14
15In particular this pickler adds the following features:
16- serialize interactively-defined or locally-defined functions, classes,
17 enums, typevars, lambdas and nested functions to compiled byte code;
18- deal with some other non-serializable objects in an ad-hoc manner where
19 applicable.
20
21This pickler is therefore meant to be used for the communication between short
22lived Python processes running the same version of Python and libraries. In
23particular, it is not meant to be used for long term storage of Python objects.
24
25It does not include an unpickler, as standard Python unpickling suffices.
26
27This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
28<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
29
30Copyright (c) 2012-now, CloudPickle developers and contributors.
31Copyright (c) 2012, Regents of the University of California.
32Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
33All rights reserved.
34
35Redistribution and use in source and binary forms, with or without
36modification, are permitted provided that the following conditions
37are met:
38 * Redistributions of source code must retain the above copyright
39 notice, this list of conditions and the following disclaimer.
40 * Redistributions in binary form must reproduce the above copyright
41 notice, this list of conditions and the following disclaimer in the
42 documentation and/or other materials provided with the distribution.
43 * Neither the name of the University of California, Berkeley nor the
44 names of its contributors may be used to endorse or promote
45 products derived from this software without specific prior written
46 permission.
47
48THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
49"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
50LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
51A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
52HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
54TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
55PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
56LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
57NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
58SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59"""
60
61import _collections_abc
62from collections import ChainMap, OrderedDict
63import abc
64import builtins
65import copyreg
66import dataclasses
67import dis
68from enum import Enum
69import io
70import itertools
71import logging
72import opcode
73import pickle
74from pickle import _getattribute
75import platform
76import struct
77import sys
78import threading
79import types
80import typing
81import uuid
82import warnings
83import weakref
84
85# The following import is required to be imported in the cloudpickle
86# namespace to be able to load pickle files generated with older versions of
87# cloudpickle. See: tests/test_backward_compat.py
88from types import CellType # noqa: F401
89
90
91# cloudpickle is meant for inter process communication: we expect all
92# communicating processes to run the same Python version hence we favor
93# communication speed over compatibility:
94DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
95
96# Names of modules whose resources should be treated as dynamic.
97_PICKLE_BY_VALUE_MODULES = set()
98
99# Track the provenance of reconstructed dynamic classes to make it possible to
100# reconstruct instances from the matching singleton class definition when
101# appropriate and preserve the usual "isinstance" semantics of Python objects.
102_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
103_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
104_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
105_DYNAMIC_CLASS_TRACKER_REUSING = weakref.WeakSet()
106
107PYPY = platform.python_implementation() == "PyPy"
108
109builtin_code_type = None
110if PYPY:
111 # builtin-code objects only exist in pypy
112 builtin_code_type = type(float.__new__.__code__)
113
114_extract_code_globals_cache = weakref.WeakKeyDictionary()
115
116
117def _get_or_create_tracker_id(class_def):
118 with _DYNAMIC_CLASS_TRACKER_LOCK:
119 class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
120 if class_tracker_id is None:
121 class_tracker_id = uuid.uuid4().hex
122 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
123 _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
124 return class_tracker_id
125
126
127def _lookup_class_or_track(class_tracker_id, class_def):
128 if class_tracker_id is not None:
129 with _DYNAMIC_CLASS_TRACKER_LOCK:
130 orig_class_def = class_def
131 class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
132 class_tracker_id, class_def
133 )
134 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
135 # Check if we are reusing a previous class_def
136 if orig_class_def is not class_def:
137 # Remember the class_def is being reused
138 _DYNAMIC_CLASS_TRACKER_REUSING.add(class_def)
139 return class_def
140
141
142def register_pickle_by_value(module):
143 """Register a module to make it functions and classes picklable by value.
144
145 By default, functions and classes that are attributes of an importable
146 module are to be pickled by reference, that is relying on re-importing
147 the attribute from the module at load time.
148
149 If `register_pickle_by_value(module)` is called, all its functions and
150 classes are subsequently to be pickled by value, meaning that they can
151 be loaded in Python processes where the module is not importable.
152
153 This is especially useful when developing a module in a distributed
154 execution environment: restarting the client Python process with the new
155 source code is enough: there is no need to re-install the new version
156 of the module on all the worker nodes nor to restart the workers.
157
158 Note: this feature is considered experimental. See the cloudpickle
159 README.md file for more details and limitations.
160 """
161 if not isinstance(module, types.ModuleType):
162 raise ValueError(f"Input should be a module object, got {str(module)} instead")
163 # In the future, cloudpickle may need a way to access any module registered
164 # for pickling by value in order to introspect relative imports inside
165 # functions pickled by value. (see
166 # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
167 # This access can be ensured by checking that module is present in
168 # sys.modules at registering time and assuming that it will still be in
169 # there when accessed during pickling. Another alternative would be to
170 # store a weakref to the module. Even though cloudpickle does not implement
171 # this introspection yet, in order to avoid a possible breaking change
172 # later, we still enforce the presence of module inside sys.modules.
173 if module.__name__ not in sys.modules:
174 raise ValueError(
175 f"{module} was not imported correctly, have you used an "
176 "`import` statement to access it?"
177 )
178 _PICKLE_BY_VALUE_MODULES.add(module.__name__)
179
180
181def unregister_pickle_by_value(module):
182 """Unregister that the input module should be pickled by value."""
183 if not isinstance(module, types.ModuleType):
184 raise ValueError(f"Input should be a module object, got {str(module)} instead")
185 if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
186 raise ValueError(f"{module} is not registered for pickle by value")
187 else:
188 _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
189
190
191def list_registry_pickle_by_value():
192 return _PICKLE_BY_VALUE_MODULES.copy()
193
194
195def _is_registered_pickle_by_value(module):
196 module_name = module.__name__
197 if module_name in _PICKLE_BY_VALUE_MODULES:
198 return True
199 while True:
200 parent_name = module_name.rsplit(".", 1)[0]
201 if parent_name == module_name:
202 break
203 if parent_name in _PICKLE_BY_VALUE_MODULES:
204 return True
205 module_name = parent_name
206 return False
207
208
209def _whichmodule(obj, name):
210 """Find the module an object belongs to.
211
212 This function differs from ``pickle.whichmodule`` in two ways:
213 - it does not mangle the cases where obj's module is __main__ and obj was
214 not found in any module.
215 - Errors arising during module introspection are ignored, as those errors
216 are considered unwanted side effects.
217 """
218 module_name = getattr(obj, "__module__", None)
219
220 if module_name is not None:
221 return module_name
222 # Protect the iteration by using a copy of sys.modules against dynamic
223 # modules that trigger imports of other modules upon calls to getattr or
224 # other threads importing at the same time.
225 for module_name, module in sys.modules.copy().items():
226 # Some modules such as coverage can inject non-module objects inside
227 # sys.modules
228 if (
229 module_name == "__main__"
230 or module is None
231 or not isinstance(module, types.ModuleType)
232 ):
233 continue
234 try:
235 if _getattribute(module, name)[0] is obj:
236 return module_name
237 except Exception:
238 pass
239 return None
240
241
242def _should_pickle_by_reference(obj, name=None):
243 """Test whether an function or a class should be pickled by reference
244
245 Pickling by reference means by that the object (typically a function or a
246 class) is an attribute of a module that is assumed to be importable in the
247 target Python environment. Loading will therefore rely on importing the
248 module and then calling `getattr` on it to access the function or class.
249
250 Pickling by reference is the only option to pickle functions and classes
251 in the standard library. In cloudpickle the alternative option is to
252 pickle by value (for instance for interactively or locally defined
253 functions and classes or for attributes of modules that have been
254 explicitly registered to be pickled by value.
255 """
256 if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
257 module_and_name = _lookup_module_and_qualname(obj, name=name)
258 if module_and_name is None:
259 return False
260 module, name = module_and_name
261 return not _is_registered_pickle_by_value(module)
262
263 elif isinstance(obj, types.ModuleType):
264 # We assume that sys.modules is primarily used as a cache mechanism for
265 # the Python import machinery. Checking if a module has been added in
266 # is sys.modules therefore a cheap and simple heuristic to tell us
267 # whether we can assume that a given module could be imported by name
268 # in another Python process.
269 if _is_registered_pickle_by_value(obj):
270 return False
271 return obj.__name__ in sys.modules
272 else:
273 raise TypeError(
274 "cannot check importability of {} instances".format(type(obj).__name__)
275 )
276
277
278def _lookup_module_and_qualname(obj, name=None):
279 if name is None:
280 name = getattr(obj, "__qualname__", None)
281 if name is None: # pragma: no cover
282 # This used to be needed for Python 2.7 support but is probably not
283 # needed anymore. However we keep the __name__ introspection in case
284 # users of cloudpickle rely on this old behavior for unknown reasons.
285 name = getattr(obj, "__name__", None)
286
287 module_name = _whichmodule(obj, name)
288
289 if module_name is None:
290 # In this case, obj.__module__ is None AND obj was not found in any
291 # imported module. obj is thus treated as dynamic.
292 return None
293
294 if module_name == "__main__":
295 return None
296
297 # Note: if module_name is in sys.modules, the corresponding module is
298 # assumed importable at unpickling time. See #357
299 module = sys.modules.get(module_name, None)
300 if module is None:
301 # The main reason why obj's module would not be imported is that this
302 # module has been dynamically created, using for example
303 # types.ModuleType. The other possibility is that module was removed
304 # from sys.modules after obj was created/imported. But this case is not
305 # supported, as the standard pickle does not support it either.
306 return None
307
308 try:
309 obj2, parent = _getattribute(module, name)
310 except AttributeError:
311 # obj was not found inside the module it points to
312 return None
313 if obj2 is not obj:
314 return None
315 return module, name
316
317
318def _extract_code_globals(co):
319 """Find all globals names read or written to by codeblock co."""
320 out_names = _extract_code_globals_cache.get(co)
321 if out_names is None:
322 # We use a dict with None values instead of a set to get a
323 # deterministic order and avoid introducing non-deterministic pickle
324 # bytes as a results.
325 out_names = {name: None for name in _walk_global_ops(co)}
326
327 # Declaring a function inside another one using the "def ..." syntax
328 # generates a constant code object corresponding to the one of the
329 # nested function's As the nested function may itself need global
330 # variables, we need to introspect its code, extract its globals, (look
331 # for code object in it's co_consts attribute..) and add the result to
332 # code_globals
333 if co.co_consts:
334 for const in co.co_consts:
335 if isinstance(const, types.CodeType):
336 out_names.update(_extract_code_globals(const))
337
338 _extract_code_globals_cache[co] = out_names
339
340 return out_names
341
342
343def _find_imported_submodules(code, top_level_dependencies):
344 """Find currently imported submodules used by a function.
345
346 Submodules used by a function need to be detected and referenced for the
347 function to work correctly at depickling time. Because submodules can be
348 referenced as attribute of their parent package (``package.submodule``), we
349 need a special introspection technique that does not rely on GLOBAL-related
350 opcodes to find references of them in a code object.
351
352 Example:
353 ```
354 import concurrent.futures
355 import cloudpickle
356 def func():
357 x = concurrent.futures.ThreadPoolExecutor
358 if __name__ == '__main__':
359 cloudpickle.dumps(func)
360 ```
361 The globals extracted by cloudpickle in the function's state include the
362 concurrent package, but not its submodule (here, concurrent.futures), which
363 is the module used by func. Find_imported_submodules will detect the usage
364 of concurrent.futures. Saving this module alongside with func will ensure
365 that calling func once depickled does not fail due to concurrent.futures
366 not being imported
367 """
368
369 subimports = []
370 # check if any known dependency is an imported package
371 for x in top_level_dependencies:
372 if (
373 isinstance(x, types.ModuleType)
374 and hasattr(x, "__package__")
375 and x.__package__
376 ):
377 # check if the package has any currently loaded sub-imports
378 prefix = x.__name__ + "."
379 # A concurrent thread could mutate sys.modules,
380 # make sure we iterate over a copy to avoid exceptions
381 for name in list(sys.modules):
382 # Older versions of pytest will add a "None" module to
383 # sys.modules.
384 if name is not None and name.startswith(prefix):
385 # check whether the function can address the sub-module
386 tokens = set(name[len(prefix) :].split("."))
387 if not tokens - set(code.co_names):
388 subimports.append(sys.modules[name])
389 return subimports
390
391
392# relevant opcodes
393STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
394DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
395LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
396GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
397HAVE_ARGUMENT = dis.HAVE_ARGUMENT
398EXTENDED_ARG = dis.EXTENDED_ARG
399
400
401_BUILTIN_TYPE_NAMES = {}
402for k, v in types.__dict__.items():
403 if type(v) is type:
404 _BUILTIN_TYPE_NAMES[v] = k
405
406
407def _builtin_type(name):
408 if name == "ClassType": # pragma: no cover
409 # Backward compat to load pickle files generated with cloudpickle
410 # < 1.3 even if loading pickle files from older versions is not
411 # officially supported.
412 return type
413 return getattr(types, name)
414
415
416def _walk_global_ops(code):
417 """Yield referenced name for global-referencing instructions in code."""
418 for instr in dis.get_instructions(code):
419 op = instr.opcode
420 if op in GLOBAL_OPS:
421 yield instr.argval
422
423
424def _extract_class_dict(cls):
425 """Retrieve a copy of the dict of a class without the inherited method."""
426 clsdict = dict(cls.__dict__) # copy dict proxy to a dict
427 if len(cls.__bases__) == 1:
428 inherited_dict = cls.__bases__[0].__dict__
429 else:
430 inherited_dict = {}
431 for base in reversed(cls.__bases__):
432 inherited_dict.update(base.__dict__)
433 to_remove = []
434 for name, value in clsdict.items():
435 try:
436 base_value = inherited_dict[name]
437 if value is base_value:
438 to_remove.append(name)
439 except KeyError:
440 pass
441 for name in to_remove:
442 clsdict.pop(name)
443 return clsdict
444
445
446def is_tornado_coroutine(func):
447 """Return whether `func` is a Tornado coroutine function.
448
449 Running coroutines are not supported.
450 """
451 warnings.warn(
452 "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
453 "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
454 "directly instead.",
455 category=DeprecationWarning,
456 )
457 if "tornado.gen" not in sys.modules:
458 return False
459 gen = sys.modules["tornado.gen"]
460 if not hasattr(gen, "is_coroutine_function"):
461 # Tornado version is too old
462 return False
463 return gen.is_coroutine_function(func)
464
465
466def subimport(name):
467 # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
468 # the name of a submodule, __import__ will return the top-level root module
469 # of this submodule. For instance, __import__('os.path') returns the `os`
470 # module.
471 __import__(name)
472 return sys.modules[name]
473
474
475def dynamic_subimport(name, vars):
476 mod = types.ModuleType(name)
477 mod.__dict__.update(vars)
478 mod.__dict__["__builtins__"] = builtins.__dict__
479 return mod
480
481
482def _get_cell_contents(cell):
483 try:
484 return cell.cell_contents
485 except ValueError:
486 # Handle empty cells explicitly with a sentinel value.
487 return _empty_cell_value
488
489
490def instance(cls):
491 """Create a new instance of a class.
492
493 Parameters
494 ----------
495 cls : type
496 The class to create an instance of.
497
498 Returns
499 -------
500 instance : cls
501 A new instance of ``cls``.
502 """
503 return cls()
504
505
506@instance
507class _empty_cell_value:
508 """Sentinel for empty closures."""
509
510 @classmethod
511 def __reduce__(cls):
512 return cls.__name__
513
514
515def _make_function(code, globals, name, argdefs, closure):
516 # Setting __builtins__ in globals is needed for nogil CPython.
517 globals["__builtins__"] = __builtins__
518 return types.FunctionType(code, globals, name, argdefs, closure)
519
520
521def _make_empty_cell():
522 if False:
523 # trick the compiler into creating an empty cell in our lambda
524 cell = None
525 raise AssertionError("this route should not be executed")
526
527 return (lambda: cell).__closure__[0]
528
529
530def _make_cell(value=_empty_cell_value):
531 cell = _make_empty_cell()
532 if value is not _empty_cell_value:
533 cell.cell_contents = value
534 return cell
535
536
537def _make_skeleton_class(
538 type_constructor, name, bases, type_kwargs, class_tracker_id, extra
539):
540 """Build dynamic class with an empty __dict__ to be filled once memoized
541
542 If class_tracker_id is not None, try to lookup an existing class definition
543 matching that id. If none is found, track a newly reconstructed class
544 definition under that id so that other instances stemming from the same
545 class id will also reuse this class definition.
546
547 The "extra" variable is meant to be a dict (or None) that can be used for
548 forward compatibility shall the need arise.
549 """
550 skeleton_class = types.new_class(
551 name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
552 )
553 return _lookup_class_or_track(class_tracker_id, skeleton_class)
554
555
556def _make_skeleton_enum(
557 bases, name, qualname, members, module, class_tracker_id, extra
558):
559 """Build dynamic enum with an empty __dict__ to be filled once memoized
560
561 The creation of the enum class is inspired by the code of
562 EnumMeta._create_.
563
564 If class_tracker_id is not None, try to lookup an existing enum definition
565 matching that id. If none is found, track a newly reconstructed enum
566 definition under that id so that other instances stemming from the same
567 class id will also reuse this enum definition.
568
569 The "extra" variable is meant to be a dict (or None) that can be used for
570 forward compatibility shall the need arise.
571 """
572 # enums always inherit from their base Enum class at the last position in
573 # the list of base classes:
574 enum_base = bases[-1]
575 metacls = enum_base.__class__
576 classdict = metacls.__prepare__(name, bases)
577
578 for member_name, member_value in members.items():
579 classdict[member_name] = member_value
580 enum_class = metacls.__new__(metacls, name, bases, classdict)
581 enum_class.__module__ = module
582 enum_class.__qualname__ = qualname
583
584 return _lookup_class_or_track(class_tracker_id, enum_class)
585
586
587def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
588 tv = typing.TypeVar(
589 name,
590 *constraints,
591 bound=bound,
592 covariant=covariant,
593 contravariant=contravariant,
594 )
595 return _lookup_class_or_track(class_tracker_id, tv)
596
597
598def _decompose_typevar(obj):
599 return (
600 obj.__name__,
601 obj.__bound__,
602 obj.__constraints__,
603 obj.__covariant__,
604 obj.__contravariant__,
605 _get_or_create_tracker_id(obj),
606 )
607
608
609def _typevar_reduce(obj):
610 # TypeVar instances require the module information hence why we
611 # are not using the _should_pickle_by_reference directly
612 module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
613
614 if module_and_name is None:
615 return (_make_typevar, _decompose_typevar(obj))
616 elif _is_registered_pickle_by_value(module_and_name[0]):
617 return (_make_typevar, _decompose_typevar(obj))
618
619 return (getattr, module_and_name)
620
621
622def _get_bases(typ):
623 if "__orig_bases__" in getattr(typ, "__dict__", {}):
624 # For generic types (see PEP 560)
625 # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
626 # correct. Subclasses of a fully-parameterized generic class does not
627 # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
628 # will return True because it's defined in the base class.
629 bases_attr = "__orig_bases__"
630 else:
631 # For regular class objects
632 bases_attr = "__bases__"
633 return getattr(typ, bases_attr)
634
635
636def _make_dict_keys(obj, is_ordered=False):
637 if is_ordered:
638 return OrderedDict.fromkeys(obj).keys()
639 else:
640 return dict.fromkeys(obj).keys()
641
642
643def _make_dict_values(obj, is_ordered=False):
644 if is_ordered:
645 return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
646 else:
647 return {i: _ for i, _ in enumerate(obj)}.values()
648
649
650def _make_dict_items(obj, is_ordered=False):
651 if is_ordered:
652 return OrderedDict(obj).items()
653 else:
654 return obj.items()
655
656
657# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
658# -------------------------------------------------
659
660
661def _class_getnewargs(obj):
662 type_kwargs = {}
663 if "__module__" in obj.__dict__:
664 type_kwargs["__module__"] = obj.__module__
665
666 __dict__ = obj.__dict__.get("__dict__", None)
667 if isinstance(__dict__, property):
668 type_kwargs["__dict__"] = __dict__
669
670 return (
671 type(obj),
672 obj.__name__,
673 _get_bases(obj),
674 type_kwargs,
675 _get_or_create_tracker_id(obj),
676 None,
677 )
678
679
680def _enum_getnewargs(obj):
681 members = {e.name: e.value for e in obj}
682 return (
683 obj.__bases__,
684 obj.__name__,
685 obj.__qualname__,
686 members,
687 obj.__module__,
688 _get_or_create_tracker_id(obj),
689 None,
690 )
691
692
693# COLLECTION OF OBJECTS RECONSTRUCTORS
694# ------------------------------------
695def _file_reconstructor(retval):
696 return retval
697
698
699# COLLECTION OF OBJECTS STATE GETTERS
700# -----------------------------------
701
702
703def _function_getstate(func):
704 # - Put func's dynamic attributes (stored in func.__dict__) in state. These
705 # attributes will be restored at unpickling time using
706 # f.__dict__.update(state)
707 # - Put func's members into slotstate. Such attributes will be restored at
708 # unpickling time by iterating over slotstate and calling setattr(func,
709 # slotname, slotvalue)
710 slotstate = {
711 "__name__": func.__name__,
712 "__qualname__": func.__qualname__,
713 "__annotations__": func.__annotations__,
714 "__kwdefaults__": func.__kwdefaults__,
715 "__defaults__": func.__defaults__,
716 "__module__": func.__module__,
717 "__doc__": func.__doc__,
718 "__closure__": func.__closure__,
719 }
720
721 f_globals_ref = _extract_code_globals(func.__code__)
722 f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
723
724 if func.__closure__ is not None:
725 closure_values = list(map(_get_cell_contents, func.__closure__))
726 else:
727 closure_values = ()
728
729 # Extract currently-imported submodules used by func. Storing these modules
730 # in a smoke _cloudpickle_subimports attribute of the object's state will
731 # trigger the side effect of importing these modules at unpickling time
732 # (which is necessary for func to work correctly once depickled)
733 slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
734 func.__code__, itertools.chain(f_globals.values(), closure_values)
735 )
736 slotstate["__globals__"] = f_globals
737
738 state = func.__dict__
739 return state, slotstate
740
741
742def _class_getstate(obj):
743 clsdict = _extract_class_dict(obj)
744 clsdict.pop("__weakref__", None)
745
746 if issubclass(type(obj), abc.ABCMeta):
747 # If obj is an instance of an ABCMeta subclass, don't pickle the
748 # cache/negative caches populated during isinstance/issubclass
749 # checks, but pickle the list of registered subclasses of obj.
750 clsdict.pop("_abc_cache", None)
751 clsdict.pop("_abc_negative_cache", None)
752 clsdict.pop("_abc_negative_cache_version", None)
753 registry = clsdict.pop("_abc_registry", None)
754 if registry is None:
755 # The abc caches and registered subclasses of a
756 # class are bundled into the single _abc_impl attribute
757 clsdict.pop("_abc_impl", None)
758 (registry, _, _, _) = abc._get_dump(obj)
759
760 clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
761 else:
762 # In the above if clause, registry is a set of weakrefs -- in
763 # this case, registry is a WeakSet
764 clsdict["_abc_impl"] = [type_ for type_ in registry]
765
766 if "__slots__" in clsdict:
767 # pickle string length optimization: member descriptors of obj are
768 # created automatically from obj's __slots__ attribute, no need to
769 # save them in obj's state
770 if isinstance(obj.__slots__, str):
771 clsdict.pop(obj.__slots__)
772 else:
773 for k in obj.__slots__:
774 clsdict.pop(k, None)
775
776 clsdict.pop("__dict__", None) # unpicklable property object
777
778 return (clsdict, {})
779
780
781def _enum_getstate(obj):
782 clsdict, slotstate = _class_getstate(obj)
783
784 members = {e.name: e.value for e in obj}
785 # Cleanup the clsdict that will be passed to _make_skeleton_enum:
786 # Those attributes are already handled by the metaclass.
787 for attrname in [
788 "_generate_next_value_",
789 "_member_names_",
790 "_member_map_",
791 "_member_type_",
792 "_value2member_map_",
793 ]:
794 clsdict.pop(attrname, None)
795 for member in members:
796 clsdict.pop(member)
797 # Special handling of Enum subclasses
798 return clsdict, slotstate
799
800
801# COLLECTIONS OF OBJECTS REDUCERS
802# -------------------------------
803# A reducer is a function taking a single argument (obj), and that returns a
804# tuple with all the necessary data to re-construct obj. Apart from a few
805# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
806# correctly pickle an object.
807# While many built-in objects (Exceptions objects, instances of the "object"
808# class, etc), are shipped with their own built-in reducer (invoked using
809# obj.__reduce__), some do not. The following methods were created to "fill
810# these holes".
811
812
813def _code_reduce(obj):
814 """code object reducer."""
815 # If you are not sure about the order of arguments, take a look at help
816 # of the specific type from types, for example:
817 # >>> from types import CodeType
818 # >>> help(CodeType)
819 if hasattr(obj, "co_exceptiontable"):
820 # Python 3.11 and later: there are some new attributes
821 # related to the enhanced exceptions.
822 args = (
823 obj.co_argcount,
824 obj.co_posonlyargcount,
825 obj.co_kwonlyargcount,
826 obj.co_nlocals,
827 obj.co_stacksize,
828 obj.co_flags,
829 obj.co_code,
830 obj.co_consts,
831 obj.co_names,
832 obj.co_varnames,
833 obj.co_filename,
834 obj.co_name,
835 obj.co_qualname,
836 obj.co_firstlineno,
837 obj.co_linetable,
838 obj.co_exceptiontable,
839 obj.co_freevars,
840 obj.co_cellvars,
841 )
842 elif hasattr(obj, "co_linetable"):
843 # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
844 # expects obj.co_linetable instead.
845 args = (
846 obj.co_argcount,
847 obj.co_posonlyargcount,
848 obj.co_kwonlyargcount,
849 obj.co_nlocals,
850 obj.co_stacksize,
851 obj.co_flags,
852 obj.co_code,
853 obj.co_consts,
854 obj.co_names,
855 obj.co_varnames,
856 obj.co_filename,
857 obj.co_name,
858 obj.co_firstlineno,
859 obj.co_linetable,
860 obj.co_freevars,
861 obj.co_cellvars,
862 )
863 elif hasattr(obj, "co_nmeta"): # pragma: no cover
864 # "nogil" Python: modified attributes from 3.9
865 args = (
866 obj.co_argcount,
867 obj.co_posonlyargcount,
868 obj.co_kwonlyargcount,
869 obj.co_nlocals,
870 obj.co_framesize,
871 obj.co_ndefaultargs,
872 obj.co_nmeta,
873 obj.co_flags,
874 obj.co_code,
875 obj.co_consts,
876 obj.co_varnames,
877 obj.co_filename,
878 obj.co_name,
879 obj.co_firstlineno,
880 obj.co_lnotab,
881 obj.co_exc_handlers,
882 obj.co_jump_table,
883 obj.co_freevars,
884 obj.co_cellvars,
885 obj.co_free2reg,
886 obj.co_cell2reg,
887 )
888 else:
889 # Backward compat for 3.8 and 3.9
890 args = (
891 obj.co_argcount,
892 obj.co_posonlyargcount,
893 obj.co_kwonlyargcount,
894 obj.co_nlocals,
895 obj.co_stacksize,
896 obj.co_flags,
897 obj.co_code,
898 obj.co_consts,
899 obj.co_names,
900 obj.co_varnames,
901 obj.co_filename,
902 obj.co_name,
903 obj.co_firstlineno,
904 obj.co_lnotab,
905 obj.co_freevars,
906 obj.co_cellvars,
907 )
908 return types.CodeType, args
909
910
911def _cell_reduce(obj):
912 """Cell (containing values of a function's free variables) reducer."""
913 try:
914 obj.cell_contents
915 except ValueError: # cell is empty
916 return _make_empty_cell, ()
917 else:
918 return _make_cell, (obj.cell_contents,)
919
920
921def _classmethod_reduce(obj):
922 orig_func = obj.__func__
923 return type(obj), (orig_func,)
924
925
926def _file_reduce(obj):
927 """Save a file."""
928 import io
929
930 if not hasattr(obj, "name") or not hasattr(obj, "mode"):
931 raise pickle.PicklingError(
932 "Cannot pickle files that do not map to an actual file"
933 )
934 if obj is sys.stdout:
935 return getattr, (sys, "stdout")
936 if obj is sys.stderr:
937 return getattr, (sys, "stderr")
938 if obj is sys.stdin:
939 raise pickle.PicklingError("Cannot pickle standard input")
940 if obj.closed:
941 raise pickle.PicklingError("Cannot pickle closed files")
942 if hasattr(obj, "isatty") and obj.isatty():
943 raise pickle.PicklingError("Cannot pickle files that map to tty objects")
944 if "r" not in obj.mode and "+" not in obj.mode:
945 raise pickle.PicklingError(
946 "Cannot pickle files that are not opened for reading: %s" % obj.mode
947 )
948
949 name = obj.name
950
951 retval = io.StringIO()
952
953 try:
954 # Read the whole file
955 curloc = obj.tell()
956 obj.seek(0)
957 contents = obj.read()
958 obj.seek(curloc)
959 except OSError as e:
960 raise pickle.PicklingError(
961 "Cannot pickle file %s as it cannot be read" % name
962 ) from e
963 retval.write(contents)
964 retval.seek(curloc)
965
966 retval.name = name
967 return _file_reconstructor, (retval,)
968
969
970def _getset_descriptor_reduce(obj):
971 return getattr, (obj.__objclass__, obj.__name__)
972
973
974def _mappingproxy_reduce(obj):
975 return types.MappingProxyType, (dict(obj),)
976
977
978def _memoryview_reduce(obj):
979 return bytes, (obj.tobytes(),)
980
981
982def _module_reduce(obj):
983 if _should_pickle_by_reference(obj):
984 return subimport, (obj.__name__,)
985 else:
986 # Some external libraries can populate the "__builtins__" entry of a
987 # module's `__dict__` with unpicklable objects (see #316). For that
988 # reason, we do not attempt to pickle the "__builtins__" entry, and
989 # restore a default value for it at unpickling time.
990 state = obj.__dict__.copy()
991 state.pop("__builtins__", None)
992 return dynamic_subimport, (obj.__name__, state)
993
994
995def _method_reduce(obj):
996 return (types.MethodType, (obj.__func__, obj.__self__))
997
998
999def _logger_reduce(obj):
1000 return logging.getLogger, (obj.name,)
1001
1002
1003def _root_logger_reduce(obj):
1004 return logging.getLogger, ()
1005
1006
1007def _property_reduce(obj):
1008 return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
1009
1010
1011def _weakset_reduce(obj):
1012 return weakref.WeakSet, (list(obj),)
1013
1014
1015def _dynamic_class_reduce(obj):
1016 """Save a class that can't be referenced as a module attribute.
1017
1018 This method is used to serialize classes that are defined inside
1019 functions, or that otherwise can't be serialized as attribute lookups
1020 from importable modules.
1021 """
1022 if Enum is not None and issubclass(obj, Enum):
1023 return (
1024 _make_skeleton_enum,
1025 _enum_getnewargs(obj),
1026 _enum_getstate(obj),
1027 None,
1028 None,
1029 _class_setstate,
1030 )
1031 else:
1032 return (
1033 _make_skeleton_class,
1034 _class_getnewargs(obj),
1035 _class_getstate(obj),
1036 None,
1037 None,
1038 _class_setstate,
1039 )
1040
1041
1042def _class_reduce(obj):
1043 """Select the reducer depending on the dynamic nature of the class obj."""
1044 if obj is type(None): # noqa
1045 return type, (None,)
1046 elif obj is type(Ellipsis):
1047 return type, (Ellipsis,)
1048 elif obj is type(NotImplemented):
1049 return type, (NotImplemented,)
1050 elif obj in _BUILTIN_TYPE_NAMES:
1051 return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
1052 elif not _should_pickle_by_reference(obj):
1053 return _dynamic_class_reduce(obj)
1054 return NotImplemented
1055
1056
1057def _dict_keys_reduce(obj):
1058 # Safer not to ship the full dict as sending the rest might
1059 # be unintended and could potentially cause leaking of
1060 # sensitive information
1061 return _make_dict_keys, (list(obj),)
1062
1063
1064def _dict_values_reduce(obj):
1065 # Safer not to ship the full dict as sending the rest might
1066 # be unintended and could potentially cause leaking of
1067 # sensitive information
1068 return _make_dict_values, (list(obj),)
1069
1070
1071def _dict_items_reduce(obj):
1072 return _make_dict_items, (dict(obj),)
1073
1074
1075def _odict_keys_reduce(obj):
1076 # Safer not to ship the full dict as sending the rest might
1077 # be unintended and could potentially cause leaking of
1078 # sensitive information
1079 return _make_dict_keys, (list(obj), True)
1080
1081
1082def _odict_values_reduce(obj):
1083 # Safer not to ship the full dict as sending the rest might
1084 # be unintended and could potentially cause leaking of
1085 # sensitive information
1086 return _make_dict_values, (list(obj), True)
1087
1088
1089def _odict_items_reduce(obj):
1090 return _make_dict_items, (dict(obj), True)
1091
1092
1093def _dataclass_field_base_reduce(obj):
1094 return _get_dataclass_field_type_sentinel, (obj.name,)
1095
1096
1097# COLLECTIONS OF OBJECTS STATE SETTERS
1098# ------------------------------------
1099# state setters are called at unpickling time, once the object is created and
1100# it has to be updated to how it was at unpickling time.
1101
1102
1103def _function_setstate(obj, state):
1104 """Update the state of a dynamic function.
1105
1106 As __closure__ and __globals__ are readonly attributes of a function, we
1107 cannot rely on the native setstate routine of pickle.load_build, that calls
1108 setattr on items of the slotstate. Instead, we have to modify them inplace.
1109 """
1110 state, slotstate = state
1111 obj.__dict__.update(state)
1112
1113 obj_globals = slotstate.pop("__globals__")
1114 obj_closure = slotstate.pop("__closure__")
1115 # _cloudpickle_subimports is a set of submodules that must be loaded for
1116 # the pickled function to work correctly at unpickling time. Now that these
1117 # submodules are depickled (hence imported), they can be removed from the
1118 # object's state (the object state only served as a reference holder to
1119 # these submodules)
1120 slotstate.pop("_cloudpickle_submodules")
1121
1122 obj.__globals__.update(obj_globals)
1123 obj.__globals__["__builtins__"] = __builtins__
1124
1125 if obj_closure is not None:
1126 for i, cell in enumerate(obj_closure):
1127 try:
1128 value = cell.cell_contents
1129 except ValueError: # cell is empty
1130 continue
1131 obj.__closure__[i].cell_contents = value
1132
1133 for k, v in slotstate.items():
1134 setattr(obj, k, v)
1135
1136
1137def _class_setstate(obj, state):
1138 # Check if class is being reused and needs bypass setstate logic.
1139 if obj in _DYNAMIC_CLASS_TRACKER_REUSING:
1140 return obj
1141 state, slotstate = state
1142 registry = None
1143 for attrname, attr in state.items():
1144 if attrname == "_abc_impl":
1145 registry = attr
1146 else:
1147 setattr(obj, attrname, attr)
1148 if registry is not None:
1149 for subclass in registry:
1150 obj.register(subclass)
1151
1152 return obj
1153
1154
1155# COLLECTION OF DATACLASS UTILITIES
1156# ---------------------------------
1157# There are some internal sentinel values whose identity must be preserved when
1158# unpickling dataclass fields. Each sentinel value has a unique name that we can
1159# use to retrieve its identity at unpickling time.
1160
1161
1162_DATACLASSE_FIELD_TYPE_SENTINELS = {
1163 dataclasses._FIELD.name: dataclasses._FIELD,
1164 dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
1165 dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
1166}
1167
1168
1169def _get_dataclass_field_type_sentinel(name):
1170 return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
1171
1172
1173class Pickler(pickle.Pickler):
1174 # set of reducers defined and used by cloudpickle (private)
1175 _dispatch_table = {}
1176 _dispatch_table[classmethod] = _classmethod_reduce
1177 _dispatch_table[io.TextIOWrapper] = _file_reduce
1178 _dispatch_table[logging.Logger] = _logger_reduce
1179 _dispatch_table[logging.RootLogger] = _root_logger_reduce
1180 _dispatch_table[memoryview] = _memoryview_reduce
1181 _dispatch_table[property] = _property_reduce
1182 _dispatch_table[staticmethod] = _classmethod_reduce
1183 _dispatch_table[CellType] = _cell_reduce
1184 _dispatch_table[types.CodeType] = _code_reduce
1185 _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
1186 _dispatch_table[types.ModuleType] = _module_reduce
1187 _dispatch_table[types.MethodType] = _method_reduce
1188 _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
1189 _dispatch_table[weakref.WeakSet] = _weakset_reduce
1190 _dispatch_table[typing.TypeVar] = _typevar_reduce
1191 _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
1192 _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
1193 _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
1194 _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
1195 _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
1196 _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
1197 _dispatch_table[abc.abstractmethod] = _classmethod_reduce
1198 _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
1199 _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
1200 _dispatch_table[abc.abstractproperty] = _property_reduce
1201 _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
1202
1203 dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
1204
1205 # function reducers are defined as instance methods of cloudpickle.Pickler
1206 # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
1207 def _dynamic_function_reduce(self, func):
1208 """Reduce a function that is not pickleable via attribute lookup."""
1209 newargs = self._function_getnewargs(func)
1210 state = _function_getstate(func)
1211 return (_make_function, newargs, state, None, None, _function_setstate)
1212
1213 def _function_reduce(self, obj):
1214 """Reducer for function objects.
1215
1216 If obj is a top-level attribute of a file-backed module, this reducer
1217 returns NotImplemented, making the cloudpickle.Pickler fall back to
1218 traditional pickle.Pickler routines to save obj. Otherwise, it reduces
1219 obj using a custom cloudpickle reducer designed specifically to handle
1220 dynamic functions.
1221 """
1222 if _should_pickle_by_reference(obj):
1223 return NotImplemented
1224 else:
1225 return self._dynamic_function_reduce(obj)
1226
1227 def _function_getnewargs(self, func):
1228 code = func.__code__
1229
1230 # base_globals represents the future global namespace of func at
1231 # unpickling time. Looking it up and storing it in
1232 # cloudpickle.Pickler.globals_ref allow functions sharing the same
1233 # globals at pickling time to also share them once unpickled, at one
1234 # condition: since globals_ref is an attribute of a cloudpickle.Pickler
1235 # instance, and that a new cloudpickle.Pickler is created each time
1236 # cloudpickle.dump or cloudpickle.dumps is called, functions also need
1237 # to be saved within the same invocation of
1238 # cloudpickle.dump/cloudpickle.dumps (for example:
1239 # cloudpickle.dumps([f1, f2])). There is no such limitation when using
1240 # cloudpickle.Pickler.dump, as long as the multiple invocations are
1241 # bound to the same cloudpickle.Pickler instance.
1242 base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
1243
1244 if base_globals == {}:
1245 # Add module attributes used to resolve relative imports
1246 # instructions inside func.
1247 for k in ["__package__", "__name__", "__path__", "__file__"]:
1248 if k in func.__globals__:
1249 base_globals[k] = func.__globals__[k]
1250
1251 # Do not bind the free variables before the function is created to
1252 # avoid infinite recursion.
1253 if func.__closure__ is None:
1254 closure = None
1255 else:
1256 closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
1257
1258 return code, base_globals, None, None, closure
1259
1260 def dump(self, obj):
1261 try:
1262 return super().dump(obj)
1263 except RuntimeError as e:
1264 if len(e.args) > 0 and "recursion" in e.args[0]:
1265 msg = "Could not pickle object as excessively deep recursion required."
1266 raise pickle.PicklingError(msg) from e
1267 else:
1268 raise
1269
1270 def __init__(self, file, protocol=None, buffer_callback=None):
1271 if protocol is None:
1272 protocol = DEFAULT_PROTOCOL
1273 super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
1274 # map functions __globals__ attribute ids, to ensure that functions
1275 # sharing the same global namespace at pickling time also share
1276 # their global namespace at unpickling time.
1277 self.globals_ref = {}
1278 self.proto = int(protocol)
1279
1280 if not PYPY:
1281 # pickle.Pickler is the C implementation of the CPython pickler and
1282 # therefore we rely on reduce_override method to customize the pickler
1283 # behavior.
1284
1285 # `cloudpickle.Pickler.dispatch` is only left for backward
1286 # compatibility - note that when using protocol 5,
1287 # `cloudpickle.Pickler.dispatch` is not an extension of
1288 # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
1289 # subclasses the C-implemented `pickle.Pickler`, which does not expose
1290 # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
1291 # used `cloudpickle.Pickler.dispatch` as a class-level attribute
1292 # storing all reducers implemented by cloudpickle, but the attribute
1293 # name was not a great choice given because it would collide with a
1294 # similarly named attribute in the pure-Python `pickle._Pickler`
1295 # implementation in the standard library.
1296 dispatch = dispatch_table
1297
1298 # Implementation of the reducer_override callback, in order to
1299 # efficiently serialize dynamic functions and classes by subclassing
1300 # the C-implemented `pickle.Pickler`.
1301 # TODO: decorrelate reducer_override (which is tied to CPython's
1302 # implementation - would it make sense to backport it to pypy? - and
1303 # pickle's protocol 5 which is implementation agnostic. Currently, the
1304 # availability of both notions coincide on CPython's pickle, but it may
1305 # not be the case anymore when pypy implements protocol 5.
1306
1307 def reducer_override(self, obj):
1308 """Type-agnostic reducing callback for function and classes.
1309
1310 For performance reasons, subclasses of the C `pickle.Pickler` class
1311 cannot register custom reducers for functions and classes in the
1312 dispatch_table attribute. Reducers for such types must instead
1313 implemented via the special `reducer_override` method.
1314
1315 Note that this method will be called for any object except a few
1316 builtin-types (int, lists, dicts etc.), which differs from reducers
1317 in the Pickler's dispatch_table, each of them being invoked for
1318 objects of a specific type only.
1319
1320 This property comes in handy for classes: although most classes are
1321 instances of the ``type`` metaclass, some of them can be instances
1322 of other custom metaclasses (such as enum.EnumMeta for example). In
1323 particular, the metaclass will likely not be known in advance, and
1324 thus cannot be special-cased using an entry in the dispatch_table.
1325 reducer_override, among other things, allows us to register a
1326 reducer that will be called for any class, independently of its
1327 type.
1328
1329 Notes:
1330
1331 * reducer_override has the priority over dispatch_table-registered
1332 reducers.
1333 * reducer_override can be used to fix other limitations of
1334 cloudpickle for other types that suffered from type-specific
1335 reducers, such as Exceptions. See
1336 https://github.com/cloudpipe/cloudpickle/issues/248
1337 """
1338 t = type(obj)
1339 try:
1340 is_anyclass = issubclass(t, type)
1341 except TypeError: # t is not a class (old Boost; see SF #502085)
1342 is_anyclass = False
1343
1344 if is_anyclass:
1345 return _class_reduce(obj)
1346 elif isinstance(obj, types.FunctionType):
1347 return self._function_reduce(obj)
1348 else:
1349 # fallback to save_global, including the Pickler's
1350 # dispatch_table
1351 return NotImplemented
1352
1353 else:
1354 # When reducer_override is not available, hack the pure-Python
1355 # Pickler's types.FunctionType and type savers. Note: the type saver
1356 # must override Pickler.save_global, because pickle.py contains a
1357 # hard-coded call to save_global when pickling meta-classes.
1358 dispatch = pickle.Pickler.dispatch.copy()
1359
1360 def _save_reduce_pickle5(
1361 self,
1362 func,
1363 args,
1364 state=None,
1365 listitems=None,
1366 dictitems=None,
1367 state_setter=None,
1368 obj=None,
1369 ):
1370 save = self.save
1371 write = self.write
1372 self.save_reduce(
1373 func,
1374 args,
1375 state=None,
1376 listitems=listitems,
1377 dictitems=dictitems,
1378 obj=obj,
1379 )
1380 # backport of the Python 3.8 state_setter pickle operations
1381 save(state_setter)
1382 save(obj) # simple BINGET opcode as obj is already memoized.
1383 save(state)
1384 write(pickle.TUPLE2)
1385 # Trigger a state_setter(obj, state) function call.
1386 write(pickle.REDUCE)
1387 # The purpose of state_setter is to carry-out an
1388 # inplace modification of obj. We do not care about what the
1389 # method might return, so its output is eventually removed from
1390 # the stack.
1391 write(pickle.POP)
1392
1393 def save_global(self, obj, name=None, pack=struct.pack):
1394 """Main dispatch method.
1395
1396 The name of this method is somewhat misleading: all types get
1397 dispatched here.
1398 """
1399 if obj is type(None): # noqa
1400 return self.save_reduce(type, (None,), obj=obj)
1401 elif obj is type(Ellipsis):
1402 return self.save_reduce(type, (Ellipsis,), obj=obj)
1403 elif obj is type(NotImplemented):
1404 return self.save_reduce(type, (NotImplemented,), obj=obj)
1405 elif obj in _BUILTIN_TYPE_NAMES:
1406 return self.save_reduce(
1407 _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
1408 )
1409
1410 if name is not None:
1411 super().save_global(obj, name=name)
1412 elif not _should_pickle_by_reference(obj, name=name):
1413 self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
1414 else:
1415 super().save_global(obj, name=name)
1416
1417 dispatch[type] = save_global
1418
1419 def save_function(self, obj, name=None):
1420 """Registered with the dispatch to handle all function types.
1421
1422 Determines what kind of function obj is (e.g. lambda, defined at
1423 interactive prompt, etc) and handles the pickling appropriately.
1424 """
1425 if _should_pickle_by_reference(obj, name=name):
1426 return super().save_global(obj, name=name)
1427 elif PYPY and isinstance(obj.__code__, builtin_code_type):
1428 return self.save_pypy_builtin_func(obj)
1429 else:
1430 return self._save_reduce_pickle5(
1431 *self._dynamic_function_reduce(obj), obj=obj
1432 )
1433
1434 def save_pypy_builtin_func(self, obj):
1435 """Save pypy equivalent of builtin functions.
1436
1437 PyPy does not have the concept of builtin-functions. Instead,
1438 builtin-functions are simple function instances, but with a
1439 builtin-code attribute.
1440 Most of the time, builtin functions should be pickled by attribute.
1441 But PyPy has flaky support for __qualname__, so some builtin
1442 functions such as float.__new__ will be classified as dynamic. For
1443 this reason only, we created this special routine. Because
1444 builtin-functions are not expected to have closure or globals,
1445 there is no additional hack (compared the one already implemented
1446 in pickle) to protect ourselves from reference cycles. A simple
1447 (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
1448 also that PyPy improved their support for __qualname__ in v3.6, so
1449 this routing should be removed when cloudpickle supports only PyPy
1450 3.6 and later.
1451 """
1452 rv = (
1453 types.FunctionType,
1454 (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
1455 obj.__dict__,
1456 )
1457 self.save_reduce(*rv, obj=obj)
1458
1459 dispatch[types.FunctionType] = save_function
1460
1461
1462# Shorthands similar to pickle.dump/pickle.dumps
1463
1464
1465def dump(obj, file, protocol=None, buffer_callback=None):
1466 """Serialize obj as bytes streamed into file
1467
1468 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1469 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1470 speed between processes running the same Python version.
1471
1472 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1473 compatibility with older versions of Python (although this is not always
1474 guaranteed to work because cloudpickle relies on some internal
1475 implementation details that can change from one Python version to the
1476 next).
1477 """
1478 Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
1479
1480
1481def dumps(obj, protocol=None, buffer_callback=None):
1482 """Serialize obj as a string of bytes allocated in memory
1483
1484 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1485 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1486 speed between processes running the same Python version.
1487
1488 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1489 compatibility with older versions of Python (although this is not always
1490 guaranteed to work because cloudpickle relies on some internal
1491 implementation details that can change from one Python version to the
1492 next).
1493 """
1494 with io.BytesIO() as file:
1495 cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
1496 cp.dump(obj)
1497 return file.getvalue()
1498
1499
1500# Include pickles unloading functions in this namespace for convenience.
1501load, loads = pickle.load, pickle.loads
1502
1503# Backward compat alias.
1504CloudPickler = Pickler