1"""Pickler class to extend the standard pickle.Pickler functionality
2
3The main objective is to make it natural to perform distributed computing on
4clusters (such as PySpark, Dask, Ray...) with interactively defined code
5(functions, classes, ...) written in notebooks or console.
6
7In particular this pickler adds the following features:
8- serialize interactively-defined or locally-defined functions, classes,
9 enums, typevars, lambdas and nested functions to compiled byte code;
10- deal with some other non-serializable objects in an ad-hoc manner where
11 applicable.
12
13This pickler is therefore meant to be used for the communication between short
14lived Python processes running the same version of Python and libraries. In
15particular, it is not meant to be used for long term storage of Python objects.
16
17It does not include an unpickler, as standard Python unpickling suffices.
18
19This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
20<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
21
22Copyright (c) 2012-now, CloudPickle developers and contributors.
23Copyright (c) 2012, Regents of the University of California.
24Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
25All rights reserved.
26
27Redistribution and use in source and binary forms, with or without
28modification, are permitted provided that the following conditions
29are met:
30 * Redistributions of source code must retain the above copyright
31 notice, this list of conditions and the following disclaimer.
32 * Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions and the following disclaimer in the
34 documentation and/or other materials provided with the distribution.
35 * Neither the name of the University of California, Berkeley nor the
36 names of its contributors may be used to endorse or promote
37 products derived from this software without specific prior written
38 permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
47PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
48LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
49NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
50SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51"""
52
53import _collections_abc
54from collections import ChainMap, OrderedDict
55import abc
56import builtins
57import copyreg
58import dataclasses
59import dis
60from enum import Enum
61import io
62import itertools
63import logging
64import opcode
65import pickle
66from pickle import _getattribute
67import platform
68import struct
69import sys
70import threading
71import types
72import typing
73import uuid
74import warnings
75import weakref
76
77# The following import is required to be imported in the cloudpickle
78# namespace to be able to load pickle files generated with older versions of
79# cloudpickle. See: tests/test_backward_compat.py
80from types import CellType # noqa: F401
81
82
83# cloudpickle is meant for inter process communication: we expect all
84# communicating processes to run the same Python version hence we favor
85# communication speed over compatibility:
86DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
87
88# Names of modules whose resources should be treated as dynamic.
89_PICKLE_BY_VALUE_MODULES = set()
90
91# Track the provenance of reconstructed dynamic classes to make it possible to
92# reconstruct instances from the matching singleton class definition when
93# appropriate and preserve the usual "isinstance" semantics of Python objects.
94_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
95_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
96_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
97
98PYPY = platform.python_implementation() == "PyPy"
99
100builtin_code_type = None
101if PYPY:
102 # builtin-code objects only exist in pypy
103 builtin_code_type = type(float.__new__.__code__)
104
105_extract_code_globals_cache = weakref.WeakKeyDictionary()
106
107
108def _get_or_create_tracker_id(class_def):
109 with _DYNAMIC_CLASS_TRACKER_LOCK:
110 class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
111 if class_tracker_id is None:
112 class_tracker_id = uuid.uuid4().hex
113 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
114 _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
115 return class_tracker_id
116
117
118def _lookup_class_or_track(class_tracker_id, class_def):
119 if class_tracker_id is not None:
120 with _DYNAMIC_CLASS_TRACKER_LOCK:
121 class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
122 class_tracker_id, class_def
123 )
124 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
125 return class_def
126
127
128def register_pickle_by_value(module):
129 """Register a module to make it functions and classes picklable by value.
130
131 By default, functions and classes that are attributes of an importable
132 module are to be pickled by reference, that is relying on re-importing
133 the attribute from the module at load time.
134
135 If `register_pickle_by_value(module)` is called, all its functions and
136 classes are subsequently to be pickled by value, meaning that they can
137 be loaded in Python processes where the module is not importable.
138
139 This is especially useful when developing a module in a distributed
140 execution environment: restarting the client Python process with the new
141 source code is enough: there is no need to re-install the new version
142 of the module on all the worker nodes nor to restart the workers.
143
144 Note: this feature is considered experimental. See the cloudpickle
145 README.md file for more details and limitations.
146 """
147 if not isinstance(module, types.ModuleType):
148 raise ValueError(f"Input should be a module object, got {str(module)} instead")
149 # In the future, cloudpickle may need a way to access any module registered
150 # for pickling by value in order to introspect relative imports inside
151 # functions pickled by value. (see
152 # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
153 # This access can be ensured by checking that module is present in
154 # sys.modules at registering time and assuming that it will still be in
155 # there when accessed during pickling. Another alternative would be to
156 # store a weakref to the module. Even though cloudpickle does not implement
157 # this introspection yet, in order to avoid a possible breaking change
158 # later, we still enforce the presence of module inside sys.modules.
159 if module.__name__ not in sys.modules:
160 raise ValueError(
161 f"{module} was not imported correctly, have you used an "
162 "`import` statement to access it?"
163 )
164 _PICKLE_BY_VALUE_MODULES.add(module.__name__)
165
166
167def unregister_pickle_by_value(module):
168 """Unregister that the input module should be pickled by value."""
169 if not isinstance(module, types.ModuleType):
170 raise ValueError(f"Input should be a module object, got {str(module)} instead")
171 if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
172 raise ValueError(f"{module} is not registered for pickle by value")
173 else:
174 _PICKLE_BY_VALUE_MODULES.remove(module.__name__)
175
176
177def list_registry_pickle_by_value():
178 return _PICKLE_BY_VALUE_MODULES.copy()
179
180
181def _is_registered_pickle_by_value(module):
182 module_name = module.__name__
183 if module_name in _PICKLE_BY_VALUE_MODULES:
184 return True
185 while True:
186 parent_name = module_name.rsplit(".", 1)[0]
187 if parent_name == module_name:
188 break
189 if parent_name in _PICKLE_BY_VALUE_MODULES:
190 return True
191 module_name = parent_name
192 return False
193
194
195def _whichmodule(obj, name):
196 """Find the module an object belongs to.
197
198 This function differs from ``pickle.whichmodule`` in two ways:
199 - it does not mangle the cases where obj's module is __main__ and obj was
200 not found in any module.
201 - Errors arising during module introspection are ignored, as those errors
202 are considered unwanted side effects.
203 """
204 module_name = getattr(obj, "__module__", None)
205
206 if module_name is not None:
207 return module_name
208 # Protect the iteration by using a copy of sys.modules against dynamic
209 # modules that trigger imports of other modules upon calls to getattr or
210 # other threads importing at the same time.
211 for module_name, module in sys.modules.copy().items():
212 # Some modules such as coverage can inject non-module objects inside
213 # sys.modules
214 if (
215 module_name == "__main__"
216 or module is None
217 or not isinstance(module, types.ModuleType)
218 ):
219 continue
220 try:
221 if _getattribute(module, name)[0] is obj:
222 return module_name
223 except Exception:
224 pass
225 return None
226
227
228def _should_pickle_by_reference(obj, name=None):
229 """Test whether an function or a class should be pickled by reference
230
231 Pickling by reference means by that the object (typically a function or a
232 class) is an attribute of a module that is assumed to be importable in the
233 target Python environment. Loading will therefore rely on importing the
234 module and then calling `getattr` on it to access the function or class.
235
236 Pickling by reference is the only option to pickle functions and classes
237 in the standard library. In cloudpickle the alternative option is to
238 pickle by value (for instance for interactively or locally defined
239 functions and classes or for attributes of modules that have been
240 explicitly registered to be pickled by value.
241 """
242 if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
243 module_and_name = _lookup_module_and_qualname(obj, name=name)
244 if module_and_name is None:
245 return False
246 module, name = module_and_name
247 return not _is_registered_pickle_by_value(module)
248
249 elif isinstance(obj, types.ModuleType):
250 # We assume that sys.modules is primarily used as a cache mechanism for
251 # the Python import machinery. Checking if a module has been added in
252 # is sys.modules therefore a cheap and simple heuristic to tell us
253 # whether we can assume that a given module could be imported by name
254 # in another Python process.
255 if _is_registered_pickle_by_value(obj):
256 return False
257 return obj.__name__ in sys.modules
258 else:
259 raise TypeError(
260 "cannot check importability of {} instances".format(type(obj).__name__)
261 )
262
263
264def _lookup_module_and_qualname(obj, name=None):
265 if name is None:
266 name = getattr(obj, "__qualname__", None)
267 if name is None: # pragma: no cover
268 # This used to be needed for Python 2.7 support but is probably not
269 # needed anymore. However we keep the __name__ introspection in case
270 # users of cloudpickle rely on this old behavior for unknown reasons.
271 name = getattr(obj, "__name__", None)
272
273 module_name = _whichmodule(obj, name)
274
275 if module_name is None:
276 # In this case, obj.__module__ is None AND obj was not found in any
277 # imported module. obj is thus treated as dynamic.
278 return None
279
280 if module_name == "__main__":
281 return None
282
283 # Note: if module_name is in sys.modules, the corresponding module is
284 # assumed importable at unpickling time. See #357
285 module = sys.modules.get(module_name, None)
286 if module is None:
287 # The main reason why obj's module would not be imported is that this
288 # module has been dynamically created, using for example
289 # types.ModuleType. The other possibility is that module was removed
290 # from sys.modules after obj was created/imported. But this case is not
291 # supported, as the standard pickle does not support it either.
292 return None
293
294 try:
295 obj2, parent = _getattribute(module, name)
296 except AttributeError:
297 # obj was not found inside the module it points to
298 return None
299 if obj2 is not obj:
300 return None
301 return module, name
302
303
304def _extract_code_globals(co):
305 """Find all globals names read or written to by codeblock co."""
306 out_names = _extract_code_globals_cache.get(co)
307 if out_names is None:
308 # We use a dict with None values instead of a set to get a
309 # deterministic order and avoid introducing non-deterministic pickle
310 # bytes as a results.
311 out_names = {name: None for name in _walk_global_ops(co)}
312
313 # Declaring a function inside another one using the "def ..." syntax
314 # generates a constant code object corresponding to the one of the
315 # nested function's As the nested function may itself need global
316 # variables, we need to introspect its code, extract its globals, (look
317 # for code object in it's co_consts attribute..) and add the result to
318 # code_globals
319 if co.co_consts:
320 for const in co.co_consts:
321 if isinstance(const, types.CodeType):
322 out_names.update(_extract_code_globals(const))
323
324 _extract_code_globals_cache[co] = out_names
325
326 return out_names
327
328
329def _find_imported_submodules(code, top_level_dependencies):
330 """Find currently imported submodules used by a function.
331
332 Submodules used by a function need to be detected and referenced for the
333 function to work correctly at depickling time. Because submodules can be
334 referenced as attribute of their parent package (``package.submodule``), we
335 need a special introspection technique that does not rely on GLOBAL-related
336 opcodes to find references of them in a code object.
337
338 Example:
339 ```
340 import concurrent.futures
341 import cloudpickle
342 def func():
343 x = concurrent.futures.ThreadPoolExecutor
344 if __name__ == '__main__':
345 cloudpickle.dumps(func)
346 ```
347 The globals extracted by cloudpickle in the function's state include the
348 concurrent package, but not its submodule (here, concurrent.futures), which
349 is the module used by func. Find_imported_submodules will detect the usage
350 of concurrent.futures. Saving this module alongside with func will ensure
351 that calling func once depickled does not fail due to concurrent.futures
352 not being imported
353 """
354
355 subimports = []
356 # check if any known dependency is an imported package
357 for x in top_level_dependencies:
358 if (
359 isinstance(x, types.ModuleType)
360 and hasattr(x, "__package__")
361 and x.__package__
362 ):
363 # check if the package has any currently loaded sub-imports
364 prefix = x.__name__ + "."
365 # A concurrent thread could mutate sys.modules,
366 # make sure we iterate over a copy to avoid exceptions
367 for name in list(sys.modules):
368 # Older versions of pytest will add a "None" module to
369 # sys.modules.
370 if name is not None and name.startswith(prefix):
371 # check whether the function can address the sub-module
372 tokens = set(name[len(prefix) :].split("."))
373 if not tokens - set(code.co_names):
374 subimports.append(sys.modules[name])
375 return subimports
376
377
378# relevant opcodes
379STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
380DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
381LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
382GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
383HAVE_ARGUMENT = dis.HAVE_ARGUMENT
384EXTENDED_ARG = dis.EXTENDED_ARG
385
386
387_BUILTIN_TYPE_NAMES = {}
388for k, v in types.__dict__.items():
389 if type(v) is type:
390 _BUILTIN_TYPE_NAMES[v] = k
391
392
393def _builtin_type(name):
394 if name == "ClassType": # pragma: no cover
395 # Backward compat to load pickle files generated with cloudpickle
396 # < 1.3 even if loading pickle files from older versions is not
397 # officially supported.
398 return type
399 return getattr(types, name)
400
401
402def _walk_global_ops(code):
403 """Yield referenced name for global-referencing instructions in code."""
404 for instr in dis.get_instructions(code):
405 op = instr.opcode
406 if op in GLOBAL_OPS:
407 yield instr.argval
408
409
410def _extract_class_dict(cls):
411 """Retrieve a copy of the dict of a class without the inherited method."""
412 clsdict = dict(cls.__dict__) # copy dict proxy to a dict
413 if len(cls.__bases__) == 1:
414 inherited_dict = cls.__bases__[0].__dict__
415 else:
416 inherited_dict = {}
417 for base in reversed(cls.__bases__):
418 inherited_dict.update(base.__dict__)
419 to_remove = []
420 for name, value in clsdict.items():
421 try:
422 base_value = inherited_dict[name]
423 if value is base_value:
424 to_remove.append(name)
425 except KeyError:
426 pass
427 for name in to_remove:
428 clsdict.pop(name)
429 return clsdict
430
431
432def is_tornado_coroutine(func):
433 """Return whether `func` is a Tornado coroutine function.
434
435 Running coroutines are not supported.
436 """
437 warnings.warn(
438 "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
439 "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
440 "directly instead.",
441 category=DeprecationWarning,
442 )
443 if "tornado.gen" not in sys.modules:
444 return False
445 gen = sys.modules["tornado.gen"]
446 if not hasattr(gen, "is_coroutine_function"):
447 # Tornado version is too old
448 return False
449 return gen.is_coroutine_function(func)
450
451
452def subimport(name):
453 # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
454 # the name of a submodule, __import__ will return the top-level root module
455 # of this submodule. For instance, __import__('os.path') returns the `os`
456 # module.
457 __import__(name)
458 return sys.modules[name]
459
460
461def dynamic_subimport(name, vars):
462 mod = types.ModuleType(name)
463 mod.__dict__.update(vars)
464 mod.__dict__["__builtins__"] = builtins.__dict__
465 return mod
466
467
468def _get_cell_contents(cell):
469 try:
470 return cell.cell_contents
471 except ValueError:
472 # Handle empty cells explicitly with a sentinel value.
473 return _empty_cell_value
474
475
476def instance(cls):
477 """Create a new instance of a class.
478
479 Parameters
480 ----------
481 cls : type
482 The class to create an instance of.
483
484 Returns
485 -------
486 instance : cls
487 A new instance of ``cls``.
488 """
489 return cls()
490
491
492@instance
493class _empty_cell_value:
494 """Sentinel for empty closures."""
495
496 @classmethod
497 def __reduce__(cls):
498 return cls.__name__
499
500
501def _make_function(code, globals, name, argdefs, closure):
502 # Setting __builtins__ in globals is needed for nogil CPython.
503 globals["__builtins__"] = __builtins__
504 return types.FunctionType(code, globals, name, argdefs, closure)
505
506
507def _make_empty_cell():
508 if False:
509 # trick the compiler into creating an empty cell in our lambda
510 cell = None
511 raise AssertionError("this route should not be executed")
512
513 return (lambda: cell).__closure__[0]
514
515
516def _make_cell(value=_empty_cell_value):
517 cell = _make_empty_cell()
518 if value is not _empty_cell_value:
519 cell.cell_contents = value
520 return cell
521
522
523def _make_skeleton_class(
524 type_constructor, name, bases, type_kwargs, class_tracker_id, extra
525):
526 """Build dynamic class with an empty __dict__ to be filled once memoized
527
528 If class_tracker_id is not None, try to lookup an existing class definition
529 matching that id. If none is found, track a newly reconstructed class
530 definition under that id so that other instances stemming from the same
531 class id will also reuse this class definition.
532
533 The "extra" variable is meant to be a dict (or None) that can be used for
534 forward compatibility shall the need arise.
535 """
536 skeleton_class = types.new_class(
537 name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
538 )
539 return _lookup_class_or_track(class_tracker_id, skeleton_class)
540
541
542def _make_skeleton_enum(
543 bases, name, qualname, members, module, class_tracker_id, extra
544):
545 """Build dynamic enum with an empty __dict__ to be filled once memoized
546
547 The creation of the enum class is inspired by the code of
548 EnumMeta._create_.
549
550 If class_tracker_id is not None, try to lookup an existing enum definition
551 matching that id. If none is found, track a newly reconstructed enum
552 definition under that id so that other instances stemming from the same
553 class id will also reuse this enum definition.
554
555 The "extra" variable is meant to be a dict (or None) that can be used for
556 forward compatibility shall the need arise.
557 """
558 # enums always inherit from their base Enum class at the last position in
559 # the list of base classes:
560 enum_base = bases[-1]
561 metacls = enum_base.__class__
562 classdict = metacls.__prepare__(name, bases)
563
564 for member_name, member_value in members.items():
565 classdict[member_name] = member_value
566 enum_class = metacls.__new__(metacls, name, bases, classdict)
567 enum_class.__module__ = module
568 enum_class.__qualname__ = qualname
569
570 return _lookup_class_or_track(class_tracker_id, enum_class)
571
572
573def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
574 tv = typing.TypeVar(
575 name,
576 *constraints,
577 bound=bound,
578 covariant=covariant,
579 contravariant=contravariant,
580 )
581 return _lookup_class_or_track(class_tracker_id, tv)
582
583
584def _decompose_typevar(obj):
585 return (
586 obj.__name__,
587 obj.__bound__,
588 obj.__constraints__,
589 obj.__covariant__,
590 obj.__contravariant__,
591 _get_or_create_tracker_id(obj),
592 )
593
594
595def _typevar_reduce(obj):
596 # TypeVar instances require the module information hence why we
597 # are not using the _should_pickle_by_reference directly
598 module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
599
600 if module_and_name is None:
601 return (_make_typevar, _decompose_typevar(obj))
602 elif _is_registered_pickle_by_value(module_and_name[0]):
603 return (_make_typevar, _decompose_typevar(obj))
604
605 return (getattr, module_and_name)
606
607
608def _get_bases(typ):
609 if "__orig_bases__" in getattr(typ, "__dict__", {}):
610 # For generic types (see PEP 560)
611 # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
612 # correct. Subclasses of a fully-parameterized generic class does not
613 # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
614 # will return True because it's defined in the base class.
615 bases_attr = "__orig_bases__"
616 else:
617 # For regular class objects
618 bases_attr = "__bases__"
619 return getattr(typ, bases_attr)
620
621
622def _make_dict_keys(obj, is_ordered=False):
623 if is_ordered:
624 return OrderedDict.fromkeys(obj).keys()
625 else:
626 return dict.fromkeys(obj).keys()
627
628
629def _make_dict_values(obj, is_ordered=False):
630 if is_ordered:
631 return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
632 else:
633 return {i: _ for i, _ in enumerate(obj)}.values()
634
635
636def _make_dict_items(obj, is_ordered=False):
637 if is_ordered:
638 return OrderedDict(obj).items()
639 else:
640 return obj.items()
641
642
643# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
644# -------------------------------------------------
645
646
647def _class_getnewargs(obj):
648 type_kwargs = {}
649 if "__module__" in obj.__dict__:
650 type_kwargs["__module__"] = obj.__module__
651
652 __dict__ = obj.__dict__.get("__dict__", None)
653 if isinstance(__dict__, property):
654 type_kwargs["__dict__"] = __dict__
655
656 return (
657 type(obj),
658 obj.__name__,
659 _get_bases(obj),
660 type_kwargs,
661 _get_or_create_tracker_id(obj),
662 None,
663 )
664
665
666def _enum_getnewargs(obj):
667 members = {e.name: e.value for e in obj}
668 return (
669 obj.__bases__,
670 obj.__name__,
671 obj.__qualname__,
672 members,
673 obj.__module__,
674 _get_or_create_tracker_id(obj),
675 None,
676 )
677
678
679# COLLECTION OF OBJECTS RECONSTRUCTORS
680# ------------------------------------
681def _file_reconstructor(retval):
682 return retval
683
684
685# COLLECTION OF OBJECTS STATE GETTERS
686# -----------------------------------
687
688
689def _function_getstate(func):
690 # - Put func's dynamic attributes (stored in func.__dict__) in state. These
691 # attributes will be restored at unpickling time using
692 # f.__dict__.update(state)
693 # - Put func's members into slotstate. Such attributes will be restored at
694 # unpickling time by iterating over slotstate and calling setattr(func,
695 # slotname, slotvalue)
696 slotstate = {
697 "__name__": func.__name__,
698 "__qualname__": func.__qualname__,
699 "__annotations__": func.__annotations__,
700 "__kwdefaults__": func.__kwdefaults__,
701 "__defaults__": func.__defaults__,
702 "__module__": func.__module__,
703 "__doc__": func.__doc__,
704 "__closure__": func.__closure__,
705 }
706
707 f_globals_ref = _extract_code_globals(func.__code__)
708 f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
709
710 if func.__closure__ is not None:
711 closure_values = list(map(_get_cell_contents, func.__closure__))
712 else:
713 closure_values = ()
714
715 # Extract currently-imported submodules used by func. Storing these modules
716 # in a smoke _cloudpickle_subimports attribute of the object's state will
717 # trigger the side effect of importing these modules at unpickling time
718 # (which is necessary for func to work correctly once depickled)
719 slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
720 func.__code__, itertools.chain(f_globals.values(), closure_values)
721 )
722 slotstate["__globals__"] = f_globals
723
724 state = func.__dict__
725 return state, slotstate
726
727
728def _class_getstate(obj):
729 clsdict = _extract_class_dict(obj)
730 clsdict.pop("__weakref__", None)
731
732 if issubclass(type(obj), abc.ABCMeta):
733 # If obj is an instance of an ABCMeta subclass, don't pickle the
734 # cache/negative caches populated during isinstance/issubclass
735 # checks, but pickle the list of registered subclasses of obj.
736 clsdict.pop("_abc_cache", None)
737 clsdict.pop("_abc_negative_cache", None)
738 clsdict.pop("_abc_negative_cache_version", None)
739 registry = clsdict.pop("_abc_registry", None)
740 if registry is None:
741 # The abc caches and registered subclasses of a
742 # class are bundled into the single _abc_impl attribute
743 clsdict.pop("_abc_impl", None)
744 (registry, _, _, _) = abc._get_dump(obj)
745
746 clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
747 else:
748 # In the above if clause, registry is a set of weakrefs -- in
749 # this case, registry is a WeakSet
750 clsdict["_abc_impl"] = [type_ for type_ in registry]
751
752 if "__slots__" in clsdict:
753 # pickle string length optimization: member descriptors of obj are
754 # created automatically from obj's __slots__ attribute, no need to
755 # save them in obj's state
756 if isinstance(obj.__slots__, str):
757 clsdict.pop(obj.__slots__)
758 else:
759 for k in obj.__slots__:
760 clsdict.pop(k, None)
761
762 clsdict.pop("__dict__", None) # unpicklable property object
763
764 return (clsdict, {})
765
766
767def _enum_getstate(obj):
768 clsdict, slotstate = _class_getstate(obj)
769
770 members = {e.name: e.value for e in obj}
771 # Cleanup the clsdict that will be passed to _make_skeleton_enum:
772 # Those attributes are already handled by the metaclass.
773 for attrname in [
774 "_generate_next_value_",
775 "_member_names_",
776 "_member_map_",
777 "_member_type_",
778 "_value2member_map_",
779 ]:
780 clsdict.pop(attrname, None)
781 for member in members:
782 clsdict.pop(member)
783 # Special handling of Enum subclasses
784 return clsdict, slotstate
785
786
787# COLLECTIONS OF OBJECTS REDUCERS
788# -------------------------------
789# A reducer is a function taking a single argument (obj), and that returns a
790# tuple with all the necessary data to re-construct obj. Apart from a few
791# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
792# correctly pickle an object.
793# While many built-in objects (Exceptions objects, instances of the "object"
794# class, etc), are shipped with their own built-in reducer (invoked using
795# obj.__reduce__), some do not. The following methods were created to "fill
796# these holes".
797
798
799def _code_reduce(obj):
800 """code object reducer."""
801 # If you are not sure about the order of arguments, take a look at help
802 # of the specific type from types, for example:
803 # >>> from types import CodeType
804 # >>> help(CodeType)
805 if hasattr(obj, "co_exceptiontable"):
806 # Python 3.11 and later: there are some new attributes
807 # related to the enhanced exceptions.
808 args = (
809 obj.co_argcount,
810 obj.co_posonlyargcount,
811 obj.co_kwonlyargcount,
812 obj.co_nlocals,
813 obj.co_stacksize,
814 obj.co_flags,
815 obj.co_code,
816 obj.co_consts,
817 obj.co_names,
818 obj.co_varnames,
819 obj.co_filename,
820 obj.co_name,
821 obj.co_qualname,
822 obj.co_firstlineno,
823 obj.co_linetable,
824 obj.co_exceptiontable,
825 obj.co_freevars,
826 obj.co_cellvars,
827 )
828 elif hasattr(obj, "co_linetable"):
829 # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
830 # expects obj.co_linetable instead.
831 args = (
832 obj.co_argcount,
833 obj.co_posonlyargcount,
834 obj.co_kwonlyargcount,
835 obj.co_nlocals,
836 obj.co_stacksize,
837 obj.co_flags,
838 obj.co_code,
839 obj.co_consts,
840 obj.co_names,
841 obj.co_varnames,
842 obj.co_filename,
843 obj.co_name,
844 obj.co_firstlineno,
845 obj.co_linetable,
846 obj.co_freevars,
847 obj.co_cellvars,
848 )
849 elif hasattr(obj, "co_nmeta"): # pragma: no cover
850 # "nogil" Python: modified attributes from 3.9
851 args = (
852 obj.co_argcount,
853 obj.co_posonlyargcount,
854 obj.co_kwonlyargcount,
855 obj.co_nlocals,
856 obj.co_framesize,
857 obj.co_ndefaultargs,
858 obj.co_nmeta,
859 obj.co_flags,
860 obj.co_code,
861 obj.co_consts,
862 obj.co_varnames,
863 obj.co_filename,
864 obj.co_name,
865 obj.co_firstlineno,
866 obj.co_lnotab,
867 obj.co_exc_handlers,
868 obj.co_jump_table,
869 obj.co_freevars,
870 obj.co_cellvars,
871 obj.co_free2reg,
872 obj.co_cell2reg,
873 )
874 else:
875 # Backward compat for 3.8 and 3.9
876 args = (
877 obj.co_argcount,
878 obj.co_posonlyargcount,
879 obj.co_kwonlyargcount,
880 obj.co_nlocals,
881 obj.co_stacksize,
882 obj.co_flags,
883 obj.co_code,
884 obj.co_consts,
885 obj.co_names,
886 obj.co_varnames,
887 obj.co_filename,
888 obj.co_name,
889 obj.co_firstlineno,
890 obj.co_lnotab,
891 obj.co_freevars,
892 obj.co_cellvars,
893 )
894 return types.CodeType, args
895
896
897def _cell_reduce(obj):
898 """Cell (containing values of a function's free variables) reducer."""
899 try:
900 obj.cell_contents
901 except ValueError: # cell is empty
902 return _make_empty_cell, ()
903 else:
904 return _make_cell, (obj.cell_contents,)
905
906
907def _classmethod_reduce(obj):
908 orig_func = obj.__func__
909 return type(obj), (orig_func,)
910
911
912def _file_reduce(obj):
913 """Save a file."""
914 import io
915
916 if not hasattr(obj, "name") or not hasattr(obj, "mode"):
917 raise pickle.PicklingError(
918 "Cannot pickle files that do not map to an actual file"
919 )
920 if obj is sys.stdout:
921 return getattr, (sys, "stdout")
922 if obj is sys.stderr:
923 return getattr, (sys, "stderr")
924 if obj is sys.stdin:
925 raise pickle.PicklingError("Cannot pickle standard input")
926 if obj.closed:
927 raise pickle.PicklingError("Cannot pickle closed files")
928 if hasattr(obj, "isatty") and obj.isatty():
929 raise pickle.PicklingError("Cannot pickle files that map to tty objects")
930 if "r" not in obj.mode and "+" not in obj.mode:
931 raise pickle.PicklingError(
932 "Cannot pickle files that are not opened for reading: %s" % obj.mode
933 )
934
935 name = obj.name
936
937 retval = io.StringIO()
938
939 try:
940 # Read the whole file
941 curloc = obj.tell()
942 obj.seek(0)
943 contents = obj.read()
944 obj.seek(curloc)
945 except OSError as e:
946 raise pickle.PicklingError(
947 "Cannot pickle file %s as it cannot be read" % name
948 ) from e
949 retval.write(contents)
950 retval.seek(curloc)
951
952 retval.name = name
953 return _file_reconstructor, (retval,)
954
955
956def _getset_descriptor_reduce(obj):
957 return getattr, (obj.__objclass__, obj.__name__)
958
959
960def _mappingproxy_reduce(obj):
961 return types.MappingProxyType, (dict(obj),)
962
963
964def _memoryview_reduce(obj):
965 return bytes, (obj.tobytes(),)
966
967
968def _module_reduce(obj):
969 if _should_pickle_by_reference(obj):
970 return subimport, (obj.__name__,)
971 else:
972 # Some external libraries can populate the "__builtins__" entry of a
973 # module's `__dict__` with unpicklable objects (see #316). For that
974 # reason, we do not attempt to pickle the "__builtins__" entry, and
975 # restore a default value for it at unpickling time.
976 state = obj.__dict__.copy()
977 state.pop("__builtins__", None)
978 return dynamic_subimport, (obj.__name__, state)
979
980
981def _method_reduce(obj):
982 return (types.MethodType, (obj.__func__, obj.__self__))
983
984
985def _logger_reduce(obj):
986 return logging.getLogger, (obj.name,)
987
988
989def _root_logger_reduce(obj):
990 return logging.getLogger, ()
991
992
993def _property_reduce(obj):
994 return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
995
996
997def _weakset_reduce(obj):
998 return weakref.WeakSet, (list(obj),)
999
1000
1001def _dynamic_class_reduce(obj):
1002 """Save a class that can't be referenced as a module attribute.
1003
1004 This method is used to serialize classes that are defined inside
1005 functions, or that otherwise can't be serialized as attribute lookups
1006 from importable modules.
1007 """
1008 if Enum is not None and issubclass(obj, Enum):
1009 return (
1010 _make_skeleton_enum,
1011 _enum_getnewargs(obj),
1012 _enum_getstate(obj),
1013 None,
1014 None,
1015 _class_setstate,
1016 )
1017 else:
1018 return (
1019 _make_skeleton_class,
1020 _class_getnewargs(obj),
1021 _class_getstate(obj),
1022 None,
1023 None,
1024 _class_setstate,
1025 )
1026
1027
1028def _class_reduce(obj):
1029 """Select the reducer depending on the dynamic nature of the class obj."""
1030 if obj is type(None): # noqa
1031 return type, (None,)
1032 elif obj is type(Ellipsis):
1033 return type, (Ellipsis,)
1034 elif obj is type(NotImplemented):
1035 return type, (NotImplemented,)
1036 elif obj in _BUILTIN_TYPE_NAMES:
1037 return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
1038 elif not _should_pickle_by_reference(obj):
1039 return _dynamic_class_reduce(obj)
1040 return NotImplemented
1041
1042
1043def _dict_keys_reduce(obj):
1044 # Safer not to ship the full dict as sending the rest might
1045 # be unintended and could potentially cause leaking of
1046 # sensitive information
1047 return _make_dict_keys, (list(obj),)
1048
1049
1050def _dict_values_reduce(obj):
1051 # Safer not to ship the full dict as sending the rest might
1052 # be unintended and could potentially cause leaking of
1053 # sensitive information
1054 return _make_dict_values, (list(obj),)
1055
1056
1057def _dict_items_reduce(obj):
1058 return _make_dict_items, (dict(obj),)
1059
1060
1061def _odict_keys_reduce(obj):
1062 # Safer not to ship the full dict as sending the rest might
1063 # be unintended and could potentially cause leaking of
1064 # sensitive information
1065 return _make_dict_keys, (list(obj), True)
1066
1067
1068def _odict_values_reduce(obj):
1069 # Safer not to ship the full dict as sending the rest might
1070 # be unintended and could potentially cause leaking of
1071 # sensitive information
1072 return _make_dict_values, (list(obj), True)
1073
1074
1075def _odict_items_reduce(obj):
1076 return _make_dict_items, (dict(obj), True)
1077
1078
1079def _dataclass_field_base_reduce(obj):
1080 return _get_dataclass_field_type_sentinel, (obj.name,)
1081
1082
1083# COLLECTIONS OF OBJECTS STATE SETTERS
1084# ------------------------------------
1085# state setters are called at unpickling time, once the object is created and
1086# it has to be updated to how it was at unpickling time.
1087
1088
1089def _function_setstate(obj, state):
1090 """Update the state of a dynamic function.
1091
1092 As __closure__ and __globals__ are readonly attributes of a function, we
1093 cannot rely on the native setstate routine of pickle.load_build, that calls
1094 setattr on items of the slotstate. Instead, we have to modify them inplace.
1095 """
1096 state, slotstate = state
1097 obj.__dict__.update(state)
1098
1099 obj_globals = slotstate.pop("__globals__")
1100 obj_closure = slotstate.pop("__closure__")
1101 # _cloudpickle_subimports is a set of submodules that must be loaded for
1102 # the pickled function to work correctly at unpickling time. Now that these
1103 # submodules are depickled (hence imported), they can be removed from the
1104 # object's state (the object state only served as a reference holder to
1105 # these submodules)
1106 slotstate.pop("_cloudpickle_submodules")
1107
1108 obj.__globals__.update(obj_globals)
1109 obj.__globals__["__builtins__"] = __builtins__
1110
1111 if obj_closure is not None:
1112 for i, cell in enumerate(obj_closure):
1113 try:
1114 value = cell.cell_contents
1115 except ValueError: # cell is empty
1116 continue
1117 obj.__closure__[i].cell_contents = value
1118
1119 for k, v in slotstate.items():
1120 setattr(obj, k, v)
1121
1122
1123def _class_setstate(obj, state):
1124 state, slotstate = state
1125 registry = None
1126 for attrname, attr in state.items():
1127 if attrname == "_abc_impl":
1128 registry = attr
1129 else:
1130 setattr(obj, attrname, attr)
1131 if registry is not None:
1132 for subclass in registry:
1133 obj.register(subclass)
1134
1135 return obj
1136
1137
1138# COLLECTION OF DATACLASS UTILITIES
1139# ---------------------------------
1140# There are some internal sentinel values whose identity must be preserved when
1141# unpickling dataclass fields. Each sentinel value has a unique name that we can
1142# use to retrieve its identity at unpickling time.
1143
1144
1145_DATACLASSE_FIELD_TYPE_SENTINELS = {
1146 dataclasses._FIELD.name: dataclasses._FIELD,
1147 dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
1148 dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
1149}
1150
1151
1152def _get_dataclass_field_type_sentinel(name):
1153 return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
1154
1155
1156class Pickler(pickle.Pickler):
1157 # set of reducers defined and used by cloudpickle (private)
1158 _dispatch_table = {}
1159 _dispatch_table[classmethod] = _classmethod_reduce
1160 _dispatch_table[io.TextIOWrapper] = _file_reduce
1161 _dispatch_table[logging.Logger] = _logger_reduce
1162 _dispatch_table[logging.RootLogger] = _root_logger_reduce
1163 _dispatch_table[memoryview] = _memoryview_reduce
1164 _dispatch_table[property] = _property_reduce
1165 _dispatch_table[staticmethod] = _classmethod_reduce
1166 _dispatch_table[CellType] = _cell_reduce
1167 _dispatch_table[types.CodeType] = _code_reduce
1168 _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
1169 _dispatch_table[types.ModuleType] = _module_reduce
1170 _dispatch_table[types.MethodType] = _method_reduce
1171 _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
1172 _dispatch_table[weakref.WeakSet] = _weakset_reduce
1173 _dispatch_table[typing.TypeVar] = _typevar_reduce
1174 _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
1175 _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
1176 _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
1177 _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
1178 _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
1179 _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
1180 _dispatch_table[abc.abstractmethod] = _classmethod_reduce
1181 _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
1182 _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
1183 _dispatch_table[abc.abstractproperty] = _property_reduce
1184 _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
1185
1186 dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
1187
1188 # function reducers are defined as instance methods of cloudpickle.Pickler
1189 # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
1190 def _dynamic_function_reduce(self, func):
1191 """Reduce a function that is not pickleable via attribute lookup."""
1192 newargs = self._function_getnewargs(func)
1193 state = _function_getstate(func)
1194 return (_make_function, newargs, state, None, None, _function_setstate)
1195
1196 def _function_reduce(self, obj):
1197 """Reducer for function objects.
1198
1199 If obj is a top-level attribute of a file-backed module, this reducer
1200 returns NotImplemented, making the cloudpickle.Pickler fall back to
1201 traditional pickle.Pickler routines to save obj. Otherwise, it reduces
1202 obj using a custom cloudpickle reducer designed specifically to handle
1203 dynamic functions.
1204 """
1205 if _should_pickle_by_reference(obj):
1206 return NotImplemented
1207 else:
1208 return self._dynamic_function_reduce(obj)
1209
1210 def _function_getnewargs(self, func):
1211 code = func.__code__
1212
1213 # base_globals represents the future global namespace of func at
1214 # unpickling time. Looking it up and storing it in
1215 # cloudpickle.Pickler.globals_ref allow functions sharing the same
1216 # globals at pickling time to also share them once unpickled, at one
1217 # condition: since globals_ref is an attribute of a cloudpickle.Pickler
1218 # instance, and that a new cloudpickle.Pickler is created each time
1219 # cloudpickle.dump or cloudpickle.dumps is called, functions also need
1220 # to be saved within the same invocation of
1221 # cloudpickle.dump/cloudpickle.dumps (for example:
1222 # cloudpickle.dumps([f1, f2])). There is no such limitation when using
1223 # cloudpickle.Pickler.dump, as long as the multiple invocations are
1224 # bound to the same cloudpickle.Pickler instance.
1225 base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
1226
1227 if base_globals == {}:
1228 # Add module attributes used to resolve relative imports
1229 # instructions inside func.
1230 for k in ["__package__", "__name__", "__path__", "__file__"]:
1231 if k in func.__globals__:
1232 base_globals[k] = func.__globals__[k]
1233
1234 # Do not bind the free variables before the function is created to
1235 # avoid infinite recursion.
1236 if func.__closure__ is None:
1237 closure = None
1238 else:
1239 closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
1240
1241 return code, base_globals, None, None, closure
1242
1243 def dump(self, obj):
1244 try:
1245 return super().dump(obj)
1246 except RuntimeError as e:
1247 if len(e.args) > 0 and "recursion" in e.args[0]:
1248 msg = "Could not pickle object as excessively deep recursion required."
1249 raise pickle.PicklingError(msg) from e
1250 else:
1251 raise
1252
1253 def __init__(self, file, protocol=None, buffer_callback=None):
1254 if protocol is None:
1255 protocol = DEFAULT_PROTOCOL
1256 super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
1257 # map functions __globals__ attribute ids, to ensure that functions
1258 # sharing the same global namespace at pickling time also share
1259 # their global namespace at unpickling time.
1260 self.globals_ref = {}
1261 self.proto = int(protocol)
1262
1263 if not PYPY:
1264 # pickle.Pickler is the C implementation of the CPython pickler and
1265 # therefore we rely on reduce_override method to customize the pickler
1266 # behavior.
1267
1268 # `cloudpickle.Pickler.dispatch` is only left for backward
1269 # compatibility - note that when using protocol 5,
1270 # `cloudpickle.Pickler.dispatch` is not an extension of
1271 # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
1272 # subclasses the C-implemented `pickle.Pickler`, which does not expose
1273 # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
1274 # used `cloudpickle.Pickler.dispatch` as a class-level attribute
1275 # storing all reducers implemented by cloudpickle, but the attribute
1276 # name was not a great choice given because it would collide with a
1277 # similarly named attribute in the pure-Python `pickle._Pickler`
1278 # implementation in the standard library.
1279 dispatch = dispatch_table
1280
1281 # Implementation of the reducer_override callback, in order to
1282 # efficiently serialize dynamic functions and classes by subclassing
1283 # the C-implemented `pickle.Pickler`.
1284 # TODO: decorrelate reducer_override (which is tied to CPython's
1285 # implementation - would it make sense to backport it to pypy? - and
1286 # pickle's protocol 5 which is implementation agnostic. Currently, the
1287 # availability of both notions coincide on CPython's pickle, but it may
1288 # not be the case anymore when pypy implements protocol 5.
1289
1290 def reducer_override(self, obj):
1291 """Type-agnostic reducing callback for function and classes.
1292
1293 For performance reasons, subclasses of the C `pickle.Pickler` class
1294 cannot register custom reducers for functions and classes in the
1295 dispatch_table attribute. Reducers for such types must instead
1296 implemented via the special `reducer_override` method.
1297
1298 Note that this method will be called for any object except a few
1299 builtin-types (int, lists, dicts etc.), which differs from reducers
1300 in the Pickler's dispatch_table, each of them being invoked for
1301 objects of a specific type only.
1302
1303 This property comes in handy for classes: although most classes are
1304 instances of the ``type`` metaclass, some of them can be instances
1305 of other custom metaclasses (such as enum.EnumMeta for example). In
1306 particular, the metaclass will likely not be known in advance, and
1307 thus cannot be special-cased using an entry in the dispatch_table.
1308 reducer_override, among other things, allows us to register a
1309 reducer that will be called for any class, independently of its
1310 type.
1311
1312 Notes:
1313
1314 * reducer_override has the priority over dispatch_table-registered
1315 reducers.
1316 * reducer_override can be used to fix other limitations of
1317 cloudpickle for other types that suffered from type-specific
1318 reducers, such as Exceptions. See
1319 https://github.com/cloudpipe/cloudpickle/issues/248
1320 """
1321 t = type(obj)
1322 try:
1323 is_anyclass = issubclass(t, type)
1324 except TypeError: # t is not a class (old Boost; see SF #502085)
1325 is_anyclass = False
1326
1327 if is_anyclass:
1328 return _class_reduce(obj)
1329 elif isinstance(obj, types.FunctionType):
1330 return self._function_reduce(obj)
1331 else:
1332 # fallback to save_global, including the Pickler's
1333 # dispatch_table
1334 return NotImplemented
1335
1336 else:
1337 # When reducer_override is not available, hack the pure-Python
1338 # Pickler's types.FunctionType and type savers. Note: the type saver
1339 # must override Pickler.save_global, because pickle.py contains a
1340 # hard-coded call to save_global when pickling meta-classes.
1341 dispatch = pickle.Pickler.dispatch.copy()
1342
1343 def _save_reduce_pickle5(
1344 self,
1345 func,
1346 args,
1347 state=None,
1348 listitems=None,
1349 dictitems=None,
1350 state_setter=None,
1351 obj=None,
1352 ):
1353 save = self.save
1354 write = self.write
1355 self.save_reduce(
1356 func,
1357 args,
1358 state=None,
1359 listitems=listitems,
1360 dictitems=dictitems,
1361 obj=obj,
1362 )
1363 # backport of the Python 3.8 state_setter pickle operations
1364 save(state_setter)
1365 save(obj) # simple BINGET opcode as obj is already memoized.
1366 save(state)
1367 write(pickle.TUPLE2)
1368 # Trigger a state_setter(obj, state) function call.
1369 write(pickle.REDUCE)
1370 # The purpose of state_setter is to carry-out an
1371 # inplace modification of obj. We do not care about what the
1372 # method might return, so its output is eventually removed from
1373 # the stack.
1374 write(pickle.POP)
1375
1376 def save_global(self, obj, name=None, pack=struct.pack):
1377 """Main dispatch method.
1378
1379 The name of this method is somewhat misleading: all types get
1380 dispatched here.
1381 """
1382 if obj is type(None): # noqa
1383 return self.save_reduce(type, (None,), obj=obj)
1384 elif obj is type(Ellipsis):
1385 return self.save_reduce(type, (Ellipsis,), obj=obj)
1386 elif obj is type(NotImplemented):
1387 return self.save_reduce(type, (NotImplemented,), obj=obj)
1388 elif obj in _BUILTIN_TYPE_NAMES:
1389 return self.save_reduce(
1390 _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
1391 )
1392
1393 if name is not None:
1394 super().save_global(obj, name=name)
1395 elif not _should_pickle_by_reference(obj, name=name):
1396 self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
1397 else:
1398 super().save_global(obj, name=name)
1399
1400 dispatch[type] = save_global
1401
1402 def save_function(self, obj, name=None):
1403 """Registered with the dispatch to handle all function types.
1404
1405 Determines what kind of function obj is (e.g. lambda, defined at
1406 interactive prompt, etc) and handles the pickling appropriately.
1407 """
1408 if _should_pickle_by_reference(obj, name=name):
1409 return super().save_global(obj, name=name)
1410 elif PYPY and isinstance(obj.__code__, builtin_code_type):
1411 return self.save_pypy_builtin_func(obj)
1412 else:
1413 return self._save_reduce_pickle5(
1414 *self._dynamic_function_reduce(obj), obj=obj
1415 )
1416
1417 def save_pypy_builtin_func(self, obj):
1418 """Save pypy equivalent of builtin functions.
1419
1420 PyPy does not have the concept of builtin-functions. Instead,
1421 builtin-functions are simple function instances, but with a
1422 builtin-code attribute.
1423 Most of the time, builtin functions should be pickled by attribute.
1424 But PyPy has flaky support for __qualname__, so some builtin
1425 functions such as float.__new__ will be classified as dynamic. For
1426 this reason only, we created this special routine. Because
1427 builtin-functions are not expected to have closure or globals,
1428 there is no additional hack (compared the one already implemented
1429 in pickle) to protect ourselves from reference cycles. A simple
1430 (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
1431 also that PyPy improved their support for __qualname__ in v3.6, so
1432 this routing should be removed when cloudpickle supports only PyPy
1433 3.6 and later.
1434 """
1435 rv = (
1436 types.FunctionType,
1437 (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
1438 obj.__dict__,
1439 )
1440 self.save_reduce(*rv, obj=obj)
1441
1442 dispatch[types.FunctionType] = save_function
1443
1444
1445# Shorthands similar to pickle.dump/pickle.dumps
1446
1447
1448def dump(obj, file, protocol=None, buffer_callback=None):
1449 """Serialize obj as bytes streamed into file
1450
1451 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1452 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1453 speed between processes running the same Python version.
1454
1455 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1456 compatibility with older versions of Python (although this is not always
1457 guaranteed to work because cloudpickle relies on some internal
1458 implementation details that can change from one Python version to the
1459 next).
1460 """
1461 Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
1462
1463
1464def dumps(obj, protocol=None, buffer_callback=None):
1465 """Serialize obj as a string of bytes allocated in memory
1466
1467 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
1468 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
1469 speed between processes running the same Python version.
1470
1471 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
1472 compatibility with older versions of Python (although this is not always
1473 guaranteed to work because cloudpickle relies on some internal
1474 implementation details that can change from one Python version to the
1475 next).
1476 """
1477 with io.BytesIO() as file:
1478 cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
1479 cp.dump(obj)
1480 return file.getvalue()
1481
1482
1483# Include pickles unloading functions in this namespace for convenience.
1484load, loads = pickle.load, pickle.loads
1485
1486# Backward compat alias.
1487CloudPickler = Pickler