Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/cloudpickle/cloudpickle.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

568 statements  

1"""Pickler class to extend the standard pickle.Pickler functionality 

2 

3The main objective is to make it natural to perform distributed computing on 

4clusters (such as PySpark, Dask, Ray...) with interactively defined code 

5(functions, classes, ...) written in notebooks or console. 

6 

7In particular this pickler adds the following features: 

8- serialize interactively-defined or locally-defined functions, classes, 

9 enums, typevars, lambdas and nested functions to compiled byte code; 

10- deal with some other non-serializable objects in an ad-hoc manner where 

11 applicable. 

12 

13This pickler is therefore meant to be used for the communication between short 

14lived Python processes running the same version of Python and libraries. In 

15particular, it is not meant to be used for long term storage of Python objects. 

16 

17It does not include an unpickler, as standard Python unpickling suffices. 

18 

19This module was extracted from the `cloud` package, developed by `PiCloud, Inc. 

20<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_. 

21 

22Copyright (c) 2012-now, CloudPickle developers and contributors. 

23Copyright (c) 2012, Regents of the University of California. 

24Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_. 

25All rights reserved. 

26 

27Redistribution and use in source and binary forms, with or without 

28modification, are permitted provided that the following conditions 

29are met: 

30 * Redistributions of source code must retain the above copyright 

31 notice, this list of conditions and the following disclaimer. 

32 * Redistributions in binary form must reproduce the above copyright 

33 notice, this list of conditions and the following disclaimer in the 

34 documentation and/or other materials provided with the distribution. 

35 * Neither the name of the University of California, Berkeley nor the 

36 names of its contributors may be used to endorse or promote 

37 products derived from this software without specific prior written 

38 permission. 

39 

40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

41"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

42LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

43A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

44HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

45SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 

46TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 

47PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 

48LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 

49NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 

50SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

51""" 

52 

53import _collections_abc 

54from collections import ChainMap, OrderedDict 

55import abc 

56import builtins 

57import copyreg 

58import dataclasses 

59import dis 

60from enum import Enum 

61import io 

62import itertools 

63import logging 

64import opcode 

65import pickle 

66from pickle import _getattribute as _pickle_getattribute 

67import platform 

68import struct 

69import sys 

70import threading 

71import types 

72import typing 

73import uuid 

74import warnings 

75import weakref 

76 

77# The following import is required to be imported in the cloudpickle 

78# namespace to be able to load pickle files generated with older versions of 

79# cloudpickle. See: tests/test_backward_compat.py 

80from types import CellType # noqa: F401 

81 

82 

83# cloudpickle is meant for inter process communication: we expect all 

84# communicating processes to run the same Python version hence we favor 

85# communication speed over compatibility: 

86DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL 

87 

88# Names of modules whose resources should be treated as dynamic. 

89_PICKLE_BY_VALUE_MODULES = set() 

90 

91# Track the provenance of reconstructed dynamic classes to make it possible to 

92# reconstruct instances from the matching singleton class definition when 

93# appropriate and preserve the usual "isinstance" semantics of Python objects. 

94_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary() 

95_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary() 

96_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock() 

97 

98PYPY = platform.python_implementation() == "PyPy" 

99 

100builtin_code_type = None 

101if PYPY: 

102 # builtin-code objects only exist in pypy 

103 builtin_code_type = type(float.__new__.__code__) 

104 

105_extract_code_globals_cache = weakref.WeakKeyDictionary() 

106 

107 

108def _get_or_create_tracker_id(class_def): 

109 with _DYNAMIC_CLASS_TRACKER_LOCK: 

110 class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def) 

111 if class_tracker_id is None: 

112 class_tracker_id = uuid.uuid4().hex 

113 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id 

114 _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def 

115 return class_tracker_id 

116 

117 

118def _lookup_class_or_track(class_tracker_id, class_def): 

119 if class_tracker_id is not None: 

120 with _DYNAMIC_CLASS_TRACKER_LOCK: 

121 class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault( 

122 class_tracker_id, class_def 

123 ) 

124 _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id 

125 return class_def 

126 

127 

128def register_pickle_by_value(module): 

129 """Register a module to make its functions and classes picklable by value. 

130 

131 By default, functions and classes that are attributes of an importable 

132 module are to be pickled by reference, that is relying on re-importing 

133 the attribute from the module at load time. 

134 

135 If `register_pickle_by_value(module)` is called, all its functions and 

136 classes are subsequently to be pickled by value, meaning that they can 

137 be loaded in Python processes where the module is not importable. 

138 

139 This is especially useful when developing a module in a distributed 

140 execution environment: restarting the client Python process with the new 

141 source code is enough: there is no need to re-install the new version 

142 of the module on all the worker nodes nor to restart the workers. 

143 

144 Note: this feature is considered experimental. See the cloudpickle 

145 README.md file for more details and limitations. 

146 """ 

147 if not isinstance(module, types.ModuleType): 

148 raise ValueError(f"Input should be a module object, got {str(module)} instead") 

149 # In the future, cloudpickle may need a way to access any module registered 

150 # for pickling by value in order to introspect relative imports inside 

151 # functions pickled by value. (see 

152 # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633). 

153 # This access can be ensured by checking that module is present in 

154 # sys.modules at registering time and assuming that it will still be in 

155 # there when accessed during pickling. Another alternative would be to 

156 # store a weakref to the module. Even though cloudpickle does not implement 

157 # this introspection yet, in order to avoid a possible breaking change 

158 # later, we still enforce the presence of module inside sys.modules. 

159 if module.__name__ not in sys.modules: 

160 raise ValueError( 

161 f"{module} was not imported correctly, have you used an " 

162 "`import` statement to access it?" 

163 ) 

164 _PICKLE_BY_VALUE_MODULES.add(module.__name__) 

165 

166 

167def unregister_pickle_by_value(module): 

168 """Unregister that the input module should be pickled by value.""" 

169 if not isinstance(module, types.ModuleType): 

170 raise ValueError(f"Input should be a module object, got {str(module)} instead") 

171 if module.__name__ not in _PICKLE_BY_VALUE_MODULES: 

172 raise ValueError(f"{module} is not registered for pickle by value") 

173 else: 

174 _PICKLE_BY_VALUE_MODULES.remove(module.__name__) 

175 

176 

177def list_registry_pickle_by_value(): 

178 return _PICKLE_BY_VALUE_MODULES.copy() 

179 

180 

181def _is_registered_pickle_by_value(module): 

182 module_name = module.__name__ 

183 if module_name in _PICKLE_BY_VALUE_MODULES: 

184 return True 

185 while True: 

186 parent_name = module_name.rsplit(".", 1)[0] 

187 if parent_name == module_name: 

188 break 

189 if parent_name in _PICKLE_BY_VALUE_MODULES: 

190 return True 

191 module_name = parent_name 

192 return False 

193 

194 

195if sys.version_info >= (3, 14): 

196 def _getattribute(obj, name): 

197 return _pickle_getattribute(obj, name.split('.')) 

198else: 

199 def _getattribute(obj, name): 

200 return _pickle_getattribute(obj, name)[0] 

201 

202 

203def _whichmodule(obj, name): 

204 """Find the module an object belongs to. 

205 

206 This function differs from ``pickle.whichmodule`` in two ways: 

207 - it does not mangle the cases where obj's module is __main__ and obj was 

208 not found in any module. 

209 - Errors arising during module introspection are ignored, as those errors 

210 are considered unwanted side effects. 

211 """ 

212 module_name = getattr(obj, "__module__", None) 

213 

214 if module_name is not None: 

215 return module_name 

216 # Protect the iteration by using a copy of sys.modules against dynamic 

217 # modules that trigger imports of other modules upon calls to getattr or 

218 # other threads importing at the same time. 

219 for module_name, module in sys.modules.copy().items(): 

220 # Some modules such as coverage can inject non-module objects inside 

221 # sys.modules 

222 if ( 

223 module_name == "__main__" 

224 or module_name == "__mp_main__" 

225 or module is None 

226 or not isinstance(module, types.ModuleType) 

227 ): 

228 continue 

229 try: 

230 if _getattribute(module, name) is obj: 

231 return module_name 

232 except Exception: 

233 pass 

234 return None 

235 

236 

237def _should_pickle_by_reference(obj, name=None): 

238 """Test whether an function or a class should be pickled by reference 

239 

240 Pickling by reference means by that the object (typically a function or a 

241 class) is an attribute of a module that is assumed to be importable in the 

242 target Python environment. Loading will therefore rely on importing the 

243 module and then calling `getattr` on it to access the function or class. 

244 

245 Pickling by reference is the only option to pickle functions and classes 

246 in the standard library. In cloudpickle the alternative option is to 

247 pickle by value (for instance for interactively or locally defined 

248 functions and classes or for attributes of modules that have been 

249 explicitly registered to be pickled by value. 

250 """ 

251 if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): 

252 module_and_name = _lookup_module_and_qualname(obj, name=name) 

253 if module_and_name is None: 

254 return False 

255 module, name = module_and_name 

256 return not _is_registered_pickle_by_value(module) 

257 

258 elif isinstance(obj, types.ModuleType): 

259 # We assume that sys.modules is primarily used as a cache mechanism for 

260 # the Python import machinery. Checking if a module has been added in 

261 # is sys.modules therefore a cheap and simple heuristic to tell us 

262 # whether we can assume that a given module could be imported by name 

263 # in another Python process. 

264 if _is_registered_pickle_by_value(obj): 

265 return False 

266 return obj.__name__ in sys.modules 

267 else: 

268 raise TypeError( 

269 "cannot check importability of {} instances".format(type(obj).__name__) 

270 ) 

271 

272 

273def _lookup_module_and_qualname(obj, name=None): 

274 if name is None: 

275 name = getattr(obj, "__qualname__", None) 

276 if name is None: # pragma: no cover 

277 # This used to be needed for Python 2.7 support but is probably not 

278 # needed anymore. However we keep the __name__ introspection in case 

279 # users of cloudpickle rely on this old behavior for unknown reasons. 

280 name = getattr(obj, "__name__", None) 

281 

282 module_name = _whichmodule(obj, name) 

283 

284 if module_name is None: 

285 # In this case, obj.__module__ is None AND obj was not found in any 

286 # imported module. obj is thus treated as dynamic. 

287 return None 

288 

289 if module_name == "__main__": 

290 return None 

291 

292 # Note: if module_name is in sys.modules, the corresponding module is 

293 # assumed importable at unpickling time. See #357 

294 module = sys.modules.get(module_name, None) 

295 if module is None: 

296 # The main reason why obj's module would not be imported is that this 

297 # module has been dynamically created, using for example 

298 # types.ModuleType. The other possibility is that module was removed 

299 # from sys.modules after obj was created/imported. But this case is not 

300 # supported, as the standard pickle does not support it either. 

301 return None 

302 

303 try: 

304 obj2 = _getattribute(module, name) 

305 except AttributeError: 

306 # obj was not found inside the module it points to 

307 return None 

308 if obj2 is not obj: 

309 return None 

310 return module, name 

311 

312 

313def _extract_code_globals(co): 

314 """Find all globals names read or written to by codeblock co.""" 

315 out_names = _extract_code_globals_cache.get(co) 

316 if out_names is None: 

317 # We use a dict with None values instead of a set to get a 

318 # deterministic order and avoid introducing non-deterministic pickle 

319 # bytes as a results. 

320 out_names = {name: None for name in _walk_global_ops(co)} 

321 

322 # Declaring a function inside another one using the "def ..." syntax 

323 # generates a constant code object corresponding to the one of the 

324 # nested function's As the nested function may itself need global 

325 # variables, we need to introspect its code, extract its globals, (look 

326 # for code object in it's co_consts attribute..) and add the result to 

327 # code_globals 

328 if co.co_consts: 

329 for const in co.co_consts: 

330 if isinstance(const, types.CodeType): 

331 out_names.update(_extract_code_globals(const)) 

332 

333 _extract_code_globals_cache[co] = out_names 

334 

335 return out_names 

336 

337 

338def _find_imported_submodules(code, top_level_dependencies): 

339 """Find currently imported submodules used by a function. 

340 

341 Submodules used by a function need to be detected and referenced for the 

342 function to work correctly at depickling time. Because submodules can be 

343 referenced as attribute of their parent package (``package.submodule``), we 

344 need a special introspection technique that does not rely on GLOBAL-related 

345 opcodes to find references of them in a code object. 

346 

347 Example: 

348 ``` 

349 import concurrent.futures 

350 import cloudpickle 

351 def func(): 

352 x = concurrent.futures.ThreadPoolExecutor 

353 if __name__ == '__main__': 

354 cloudpickle.dumps(func) 

355 ``` 

356 The globals extracted by cloudpickle in the function's state include the 

357 concurrent package, but not its submodule (here, concurrent.futures), which 

358 is the module used by func. Find_imported_submodules will detect the usage 

359 of concurrent.futures. Saving this module alongside with func will ensure 

360 that calling func once depickled does not fail due to concurrent.futures 

361 not being imported 

362 """ 

363 

364 subimports = [] 

365 # check if any known dependency is an imported package 

366 for x in top_level_dependencies: 

367 if ( 

368 isinstance(x, types.ModuleType) 

369 and hasattr(x, "__package__") 

370 and x.__package__ 

371 ): 

372 # check if the package has any currently loaded sub-imports 

373 prefix = x.__name__ + "." 

374 # A concurrent thread could mutate sys.modules, 

375 # make sure we iterate over a copy to avoid exceptions 

376 for name in list(sys.modules): 

377 # Older versions of pytest will add a "None" module to 

378 # sys.modules. 

379 if name is not None and name.startswith(prefix): 

380 # check whether the function can address the sub-module 

381 tokens = set(name[len(prefix) :].split(".")) 

382 if not tokens - set(code.co_names): 

383 subimports.append(sys.modules[name]) 

384 return subimports 

385 

386 

387# relevant opcodes 

388STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"] 

389DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"] 

390LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"] 

391GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL) 

392HAVE_ARGUMENT = dis.HAVE_ARGUMENT 

393EXTENDED_ARG = dis.EXTENDED_ARG 

394 

395 

396_BUILTIN_TYPE_NAMES = {} 

397for k, v in types.__dict__.items(): 

398 if type(v) is type: 

399 _BUILTIN_TYPE_NAMES[v] = k 

400 

401 

402def _builtin_type(name): 

403 if name == "ClassType": # pragma: no cover 

404 # Backward compat to load pickle files generated with cloudpickle 

405 # < 1.3 even if loading pickle files from older versions is not 

406 # officially supported. 

407 return type 

408 return getattr(types, name) 

409 

410 

411def _walk_global_ops(code): 

412 """Yield referenced name for global-referencing instructions in code.""" 

413 for instr in dis.get_instructions(code): 

414 op = instr.opcode 

415 if op in GLOBAL_OPS: 

416 yield instr.argval 

417 

418 

419def _extract_class_dict(cls): 

420 """Retrieve a copy of the dict of a class without the inherited method.""" 

421 # Hack to circumvent non-predictable memoization caused by string interning. 

422 # See the inline comment in _class_setstate for details. 

423 clsdict = {"".join(k): cls.__dict__[k] for k in sorted(cls.__dict__)} 

424 

425 if len(cls.__bases__) == 1: 

426 inherited_dict = cls.__bases__[0].__dict__ 

427 else: 

428 inherited_dict = {} 

429 for base in reversed(cls.__bases__): 

430 inherited_dict.update(base.__dict__) 

431 to_remove = [] 

432 for name, value in clsdict.items(): 

433 try: 

434 base_value = inherited_dict[name] 

435 if value is base_value: 

436 to_remove.append(name) 

437 except KeyError: 

438 pass 

439 for name in to_remove: 

440 clsdict.pop(name) 

441 return clsdict 

442 

443 

444def is_tornado_coroutine(func): 

445 """Return whether `func` is a Tornado coroutine function. 

446 

447 Running coroutines are not supported. 

448 """ 

449 warnings.warn( 

450 "is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be " 

451 "removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function " 

452 "directly instead.", 

453 category=DeprecationWarning, 

454 ) 

455 if "tornado.gen" not in sys.modules: 

456 return False 

457 gen = sys.modules["tornado.gen"] 

458 if not hasattr(gen, "is_coroutine_function"): 

459 # Tornado version is too old 

460 return False 

461 return gen.is_coroutine_function(func) 

462 

463 

464def subimport(name): 

465 # We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is 

466 # the name of a submodule, __import__ will return the top-level root module 

467 # of this submodule. For instance, __import__('os.path') returns the `os` 

468 # module. 

469 __import__(name) 

470 return sys.modules[name] 

471 

472 

473def dynamic_subimport(name, vars): 

474 mod = types.ModuleType(name) 

475 mod.__dict__.update(vars) 

476 mod.__dict__["__builtins__"] = builtins.__dict__ 

477 return mod 

478 

479 

480def _get_cell_contents(cell): 

481 try: 

482 return cell.cell_contents 

483 except ValueError: 

484 # Handle empty cells explicitly with a sentinel value. 

485 return _empty_cell_value 

486 

487 

488def instance(cls): 

489 """Create a new instance of a class. 

490 

491 Parameters 

492 ---------- 

493 cls : type 

494 The class to create an instance of. 

495 

496 Returns 

497 ------- 

498 instance : cls 

499 A new instance of ``cls``. 

500 """ 

501 return cls() 

502 

503 

504@instance 

505class _empty_cell_value: 

506 """Sentinel for empty closures.""" 

507 

508 @classmethod 

509 def __reduce__(cls): 

510 return cls.__name__ 

511 

512 

513def _make_function(code, globals, name, argdefs, closure): 

514 # Setting __builtins__ in globals is needed for nogil CPython. 

515 globals["__builtins__"] = __builtins__ 

516 return types.FunctionType(code, globals, name, argdefs, closure) 

517 

518 

519def _make_empty_cell(): 

520 if False: 

521 # trick the compiler into creating an empty cell in our lambda 

522 cell = None 

523 raise AssertionError("this route should not be executed") 

524 

525 return (lambda: cell).__closure__[0] 

526 

527 

528def _make_cell(value=_empty_cell_value): 

529 cell = _make_empty_cell() 

530 if value is not _empty_cell_value: 

531 cell.cell_contents = value 

532 return cell 

533 

534 

535def _make_skeleton_class( 

536 type_constructor, name, bases, type_kwargs, class_tracker_id, extra 

537): 

538 """Build dynamic class with an empty __dict__ to be filled once memoized 

539 

540 If class_tracker_id is not None, try to lookup an existing class definition 

541 matching that id. If none is found, track a newly reconstructed class 

542 definition under that id so that other instances stemming from the same 

543 class id will also reuse this class definition. 

544 

545 The "extra" variable is meant to be a dict (or None) that can be used for 

546 forward compatibility shall the need arise. 

547 """ 

548 # We need to intern the keys of the type_kwargs dict to avoid having 

549 # different pickles for the same dynamic class depending on whether it was 

550 # dynamically created or reconstructed from a pickled stream. 

551 type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()} 

552 

553 skeleton_class = types.new_class( 

554 name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs) 

555 ) 

556 

557 return _lookup_class_or_track(class_tracker_id, skeleton_class) 

558 

559 

560def _make_skeleton_enum( 

561 bases, name, qualname, members, module, class_tracker_id, extra 

562): 

563 """Build dynamic enum with an empty __dict__ to be filled once memoized 

564 

565 The creation of the enum class is inspired by the code of 

566 EnumMeta._create_. 

567 

568 If class_tracker_id is not None, try to lookup an existing enum definition 

569 matching that id. If none is found, track a newly reconstructed enum 

570 definition under that id so that other instances stemming from the same 

571 class id will also reuse this enum definition. 

572 

573 The "extra" variable is meant to be a dict (or None) that can be used for 

574 forward compatibility shall the need arise. 

575 """ 

576 # enums always inherit from their base Enum class at the last position in 

577 # the list of base classes: 

578 enum_base = bases[-1] 

579 metacls = enum_base.__class__ 

580 classdict = metacls.__prepare__(name, bases) 

581 

582 for member_name, member_value in members.items(): 

583 classdict[member_name] = member_value 

584 enum_class = metacls.__new__(metacls, name, bases, classdict) 

585 enum_class.__module__ = module 

586 enum_class.__qualname__ = qualname 

587 

588 return _lookup_class_or_track(class_tracker_id, enum_class) 

589 

590 

591def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id): 

592 tv = typing.TypeVar( 

593 name, 

594 *constraints, 

595 bound=bound, 

596 covariant=covariant, 

597 contravariant=contravariant, 

598 ) 

599 return _lookup_class_or_track(class_tracker_id, tv) 

600 

601 

602def _decompose_typevar(obj): 

603 return ( 

604 obj.__name__, 

605 obj.__bound__, 

606 obj.__constraints__, 

607 obj.__covariant__, 

608 obj.__contravariant__, 

609 _get_or_create_tracker_id(obj), 

610 ) 

611 

612 

613def _typevar_reduce(obj): 

614 # TypeVar instances require the module information hence why we 

615 # are not using the _should_pickle_by_reference directly 

616 module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) 

617 

618 if module_and_name is None: 

619 return (_make_typevar, _decompose_typevar(obj)) 

620 elif _is_registered_pickle_by_value(module_and_name[0]): 

621 return (_make_typevar, _decompose_typevar(obj)) 

622 

623 return (getattr, module_and_name) 

624 

625 

626def _get_bases(typ): 

627 if "__orig_bases__" in getattr(typ, "__dict__", {}): 

628 # For generic types (see PEP 560) 

629 # Note that simply checking `hasattr(typ, '__orig_bases__')` is not 

630 # correct. Subclasses of a fully-parameterized generic class does not 

631 # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')` 

632 # will return True because it's defined in the base class. 

633 bases_attr = "__orig_bases__" 

634 else: 

635 # For regular class objects 

636 bases_attr = "__bases__" 

637 return getattr(typ, bases_attr) 

638 

639 

640def _make_dict_keys(obj, is_ordered=False): 

641 if is_ordered: 

642 return OrderedDict.fromkeys(obj).keys() 

643 else: 

644 return dict.fromkeys(obj).keys() 

645 

646 

647def _make_dict_values(obj, is_ordered=False): 

648 if is_ordered: 

649 return OrderedDict((i, _) for i, _ in enumerate(obj)).values() 

650 else: 

651 return {i: _ for i, _ in enumerate(obj)}.values() 

652 

653 

654def _make_dict_items(obj, is_ordered=False): 

655 if is_ordered: 

656 return OrderedDict(obj).items() 

657 else: 

658 return obj.items() 

659 

660 

661# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS 

662# ------------------------------------------------- 

663 

664 

665def _class_getnewargs(obj): 

666 type_kwargs = {} 

667 if "__module__" in obj.__dict__: 

668 type_kwargs["__module__"] = obj.__module__ 

669 

670 __dict__ = obj.__dict__.get("__dict__", None) 

671 if isinstance(__dict__, property): 

672 type_kwargs["__dict__"] = __dict__ 

673 

674 return ( 

675 type(obj), 

676 obj.__name__, 

677 _get_bases(obj), 

678 type_kwargs, 

679 _get_or_create_tracker_id(obj), 

680 None, 

681 ) 

682 

683 

684def _enum_getnewargs(obj): 

685 members = {e.name: e.value for e in obj} 

686 return ( 

687 obj.__bases__, 

688 obj.__name__, 

689 obj.__qualname__, 

690 members, 

691 obj.__module__, 

692 _get_or_create_tracker_id(obj), 

693 None, 

694 ) 

695 

696 

697# COLLECTION OF OBJECTS RECONSTRUCTORS 

698# ------------------------------------ 

699def _file_reconstructor(retval): 

700 return retval 

701 

702 

703# COLLECTION OF OBJECTS STATE GETTERS 

704# ----------------------------------- 

705 

706 

707def _function_getstate(func): 

708 # - Put func's dynamic attributes (stored in func.__dict__) in state. These 

709 # attributes will be restored at unpickling time using 

710 # f.__dict__.update(state) 

711 # - Put func's members into slotstate. Such attributes will be restored at 

712 # unpickling time by iterating over slotstate and calling setattr(func, 

713 # slotname, slotvalue) 

714 slotstate = { 

715 # Hack to circumvent non-predictable memoization caused by string interning. 

716 # See the inline comment in _class_setstate for details. 

717 "__name__": "".join(func.__name__), 

718 "__qualname__": "".join(func.__qualname__), 

719 "__annotations__": func.__annotations__, 

720 "__kwdefaults__": func.__kwdefaults__, 

721 "__defaults__": func.__defaults__, 

722 "__module__": func.__module__, 

723 "__doc__": func.__doc__, 

724 "__closure__": func.__closure__, 

725 } 

726 

727 f_globals_ref = _extract_code_globals(func.__code__) 

728 f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__} 

729 

730 if func.__closure__ is not None: 

731 closure_values = list(map(_get_cell_contents, func.__closure__)) 

732 else: 

733 closure_values = () 

734 

735 # Extract currently-imported submodules used by func. Storing these modules 

736 # in a smoke _cloudpickle_subimports attribute of the object's state will 

737 # trigger the side effect of importing these modules at unpickling time 

738 # (which is necessary for func to work correctly once depickled) 

739 slotstate["_cloudpickle_submodules"] = _find_imported_submodules( 

740 func.__code__, itertools.chain(f_globals.values(), closure_values) 

741 ) 

742 slotstate["__globals__"] = f_globals 

743 

744 # Hack to circumvent non-predictable memoization caused by string interning. 

745 # See the inline comment in _class_setstate for details. 

746 state = {"".join(k): v for k, v in func.__dict__.items()} 

747 return state, slotstate 

748 

749 

750def _class_getstate(obj): 

751 clsdict = _extract_class_dict(obj) 

752 clsdict.pop("__weakref__", None) 

753 

754 if issubclass(type(obj), abc.ABCMeta): 

755 # If obj is an instance of an ABCMeta subclass, don't pickle the 

756 # cache/negative caches populated during isinstance/issubclass 

757 # checks, but pickle the list of registered subclasses of obj. 

758 clsdict.pop("_abc_cache", None) 

759 clsdict.pop("_abc_negative_cache", None) 

760 clsdict.pop("_abc_negative_cache_version", None) 

761 registry = clsdict.pop("_abc_registry", None) 

762 if registry is None: 

763 # The abc caches and registered subclasses of a 

764 # class are bundled into the single _abc_impl attribute 

765 clsdict.pop("_abc_impl", None) 

766 (registry, _, _, _) = abc._get_dump(obj) 

767 

768 clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry] 

769 else: 

770 # In the above if clause, registry is a set of weakrefs -- in 

771 # this case, registry is a WeakSet 

772 clsdict["_abc_impl"] = [type_ for type_ in registry] 

773 

774 if "__slots__" in clsdict: 

775 # pickle string length optimization: member descriptors of obj are 

776 # created automatically from obj's __slots__ attribute, no need to 

777 # save them in obj's state 

778 if isinstance(obj.__slots__, str): 

779 clsdict.pop(obj.__slots__) 

780 else: 

781 for k in obj.__slots__: 

782 clsdict.pop(k, None) 

783 

784 clsdict.pop("__dict__", None) # unpicklable property object 

785 

786 if sys.version_info >= (3, 14): 

787 # PEP-649/749: __annotate_func__ contains a closure that references the class 

788 # dict. We need to exclude it from pickling. Python will recreate it when 

789 # __annotations__ is accessed at unpickling time. 

790 clsdict.pop("__annotate_func__", None) 

791 

792 return (clsdict, {}) 

793 

794 

795def _enum_getstate(obj): 

796 clsdict, slotstate = _class_getstate(obj) 

797 

798 members = {e.name: e.value for e in obj} 

799 # Cleanup the clsdict that will be passed to _make_skeleton_enum: 

800 # Those attributes are already handled by the metaclass. 

801 for attrname in [ 

802 "_generate_next_value_", 

803 "_member_names_", 

804 "_member_map_", 

805 "_member_type_", 

806 "_value2member_map_", 

807 ]: 

808 clsdict.pop(attrname, None) 

809 for member in members: 

810 clsdict.pop(member) 

811 # Special handling of Enum subclasses 

812 return clsdict, slotstate 

813 

814 

815# COLLECTIONS OF OBJECTS REDUCERS 

816# ------------------------------- 

817# A reducer is a function taking a single argument (obj), and that returns a 

818# tuple with all the necessary data to re-construct obj. Apart from a few 

819# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to 

820# correctly pickle an object. 

821# While many built-in objects (Exceptions objects, instances of the "object" 

822# class, etc), are shipped with their own built-in reducer (invoked using 

823# obj.__reduce__), some do not. The following methods were created to "fill 

824# these holes". 

825 

826 

827def _code_reduce(obj): 

828 """code object reducer.""" 

829 # If you are not sure about the order of arguments, take a look at help 

830 # of the specific type from types, for example: 

831 # >>> from types import CodeType 

832 # >>> help(CodeType) 

833 

834 # Hack to circumvent non-predictable memoization caused by string interning. 

835 # See the inline comment in _class_setstate for details. 

836 co_name = "".join(obj.co_name) 

837 

838 # Create shallow copies of these tuple to make cloudpickle payload deterministic. 

839 # When creating a code object during load, copies of these four tuples are 

840 # created, while in the main process, these tuples can be shared. 

841 # By always creating copies, we make sure the resulting payload is deterministic. 

842 co_names = tuple(name for name in obj.co_names) 

843 co_varnames = tuple(name for name in obj.co_varnames) 

844 co_freevars = tuple(name for name in obj.co_freevars) 

845 co_cellvars = tuple(name for name in obj.co_cellvars) 

846 if hasattr(obj, "co_exceptiontable"): 

847 # Python 3.11 and later: there are some new attributes 

848 # related to the enhanced exceptions. 

849 args = ( 

850 obj.co_argcount, 

851 obj.co_posonlyargcount, 

852 obj.co_kwonlyargcount, 

853 obj.co_nlocals, 

854 obj.co_stacksize, 

855 obj.co_flags, 

856 obj.co_code, 

857 obj.co_consts, 

858 co_names, 

859 co_varnames, 

860 obj.co_filename, 

861 co_name, 

862 obj.co_qualname, 

863 obj.co_firstlineno, 

864 obj.co_linetable, 

865 obj.co_exceptiontable, 

866 co_freevars, 

867 co_cellvars, 

868 ) 

869 elif hasattr(obj, "co_linetable"): 

870 # Python 3.10 and later: obj.co_lnotab is deprecated and constructor 

871 # expects obj.co_linetable instead. 

872 args = ( 

873 obj.co_argcount, 

874 obj.co_posonlyargcount, 

875 obj.co_kwonlyargcount, 

876 obj.co_nlocals, 

877 obj.co_stacksize, 

878 obj.co_flags, 

879 obj.co_code, 

880 obj.co_consts, 

881 co_names, 

882 co_varnames, 

883 obj.co_filename, 

884 co_name, 

885 obj.co_firstlineno, 

886 obj.co_linetable, 

887 co_freevars, 

888 co_cellvars, 

889 ) 

890 elif hasattr(obj, "co_nmeta"): # pragma: no cover 

891 # "nogil" Python: modified attributes from 3.9 

892 args = ( 

893 obj.co_argcount, 

894 obj.co_posonlyargcount, 

895 obj.co_kwonlyargcount, 

896 obj.co_nlocals, 

897 obj.co_framesize, 

898 obj.co_ndefaultargs, 

899 obj.co_nmeta, 

900 obj.co_flags, 

901 obj.co_code, 

902 obj.co_consts, 

903 co_varnames, 

904 obj.co_filename, 

905 co_name, 

906 obj.co_firstlineno, 

907 obj.co_lnotab, 

908 obj.co_exc_handlers, 

909 obj.co_jump_table, 

910 co_freevars, 

911 co_cellvars, 

912 obj.co_free2reg, 

913 obj.co_cell2reg, 

914 ) 

915 else: 

916 # Backward compat for 3.8 and 3.9 

917 args = ( 

918 obj.co_argcount, 

919 obj.co_posonlyargcount, 

920 obj.co_kwonlyargcount, 

921 obj.co_nlocals, 

922 obj.co_stacksize, 

923 obj.co_flags, 

924 obj.co_code, 

925 obj.co_consts, 

926 co_names, 

927 co_varnames, 

928 obj.co_filename, 

929 co_name, 

930 obj.co_firstlineno, 

931 obj.co_lnotab, 

932 co_freevars, 

933 co_cellvars, 

934 ) 

935 return types.CodeType, args 

936 

937 

938def _cell_reduce(obj): 

939 """Cell (containing values of a function's free variables) reducer.""" 

940 try: 

941 obj.cell_contents 

942 except ValueError: # cell is empty 

943 return _make_empty_cell, () 

944 else: 

945 return _make_cell, (obj.cell_contents,) 

946 

947 

948def _classmethod_reduce(obj): 

949 orig_func = obj.__func__ 

950 return type(obj), (orig_func,) 

951 

952 

953def _file_reduce(obj): 

954 """Save a file.""" 

955 import io 

956 

957 if not hasattr(obj, "name") or not hasattr(obj, "mode"): 

958 raise pickle.PicklingError( 

959 "Cannot pickle files that do not map to an actual file" 

960 ) 

961 if obj is sys.stdout: 

962 return getattr, (sys, "stdout") 

963 if obj is sys.stderr: 

964 return getattr, (sys, "stderr") 

965 if obj is sys.stdin: 

966 raise pickle.PicklingError("Cannot pickle standard input") 

967 if obj.closed: 

968 raise pickle.PicklingError("Cannot pickle closed files") 

969 if hasattr(obj, "isatty") and obj.isatty(): 

970 raise pickle.PicklingError("Cannot pickle files that map to tty objects") 

971 if "r" not in obj.mode and "+" not in obj.mode: 

972 raise pickle.PicklingError( 

973 "Cannot pickle files that are not opened for reading: %s" % obj.mode 

974 ) 

975 

976 name = obj.name 

977 

978 retval = io.StringIO() 

979 

980 try: 

981 # Read the whole file 

982 curloc = obj.tell() 

983 obj.seek(0) 

984 contents = obj.read() 

985 obj.seek(curloc) 

986 except OSError as e: 

987 raise pickle.PicklingError( 

988 "Cannot pickle file %s as it cannot be read" % name 

989 ) from e 

990 retval.write(contents) 

991 retval.seek(curloc) 

992 

993 retval.name = name 

994 return _file_reconstructor, (retval,) 

995 

996 

997def _getset_descriptor_reduce(obj): 

998 return getattr, (obj.__objclass__, obj.__name__) 

999 

1000 

1001def _mappingproxy_reduce(obj): 

1002 return types.MappingProxyType, (dict(obj),) 

1003 

1004 

1005def _memoryview_reduce(obj): 

1006 return bytes, (obj.tobytes(),) 

1007 

1008 

1009def _module_reduce(obj): 

1010 if _should_pickle_by_reference(obj): 

1011 return subimport, (obj.__name__,) 

1012 else: 

1013 # Some external libraries can populate the "__builtins__" entry of a 

1014 # module's `__dict__` with unpicklable objects (see #316). For that 

1015 # reason, we do not attempt to pickle the "__builtins__" entry, and 

1016 # restore a default value for it at unpickling time. 

1017 state = obj.__dict__.copy() 

1018 state.pop("__builtins__", None) 

1019 return dynamic_subimport, (obj.__name__, state) 

1020 

1021 

1022def _method_reduce(obj): 

1023 return (types.MethodType, (obj.__func__, obj.__self__)) 

1024 

1025 

1026def _logger_reduce(obj): 

1027 return logging.getLogger, (obj.name,) 

1028 

1029 

1030def _root_logger_reduce(obj): 

1031 return logging.getLogger, () 

1032 

1033 

1034def _property_reduce(obj): 

1035 return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__) 

1036 

1037 

1038def _weakset_reduce(obj): 

1039 return weakref.WeakSet, (list(obj),) 

1040 

1041 

1042def _dynamic_class_reduce(obj): 

1043 """Save a class that can't be referenced as a module attribute. 

1044 

1045 This method is used to serialize classes that are defined inside 

1046 functions, or that otherwise can't be serialized as attribute lookups 

1047 from importable modules. 

1048 """ 

1049 if Enum is not None and issubclass(obj, Enum): 

1050 return ( 

1051 _make_skeleton_enum, 

1052 _enum_getnewargs(obj), 

1053 _enum_getstate(obj), 

1054 None, 

1055 None, 

1056 _class_setstate, 

1057 ) 

1058 else: 

1059 return ( 

1060 _make_skeleton_class, 

1061 _class_getnewargs(obj), 

1062 _class_getstate(obj), 

1063 None, 

1064 None, 

1065 _class_setstate, 

1066 ) 

1067 

1068 

1069def _class_reduce(obj): 

1070 """Select the reducer depending on the dynamic nature of the class obj.""" 

1071 if obj is type(None): # noqa 

1072 return type, (None,) 

1073 elif obj is type(Ellipsis): 

1074 return type, (Ellipsis,) 

1075 elif obj is type(NotImplemented): 

1076 return type, (NotImplemented,) 

1077 elif obj in _BUILTIN_TYPE_NAMES: 

1078 return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) 

1079 elif not _should_pickle_by_reference(obj): 

1080 return _dynamic_class_reduce(obj) 

1081 return NotImplemented 

1082 

1083 

1084def _dict_keys_reduce(obj): 

1085 # Safer not to ship the full dict as sending the rest might 

1086 # be unintended and could potentially cause leaking of 

1087 # sensitive information 

1088 return _make_dict_keys, (list(obj),) 

1089 

1090 

1091def _dict_values_reduce(obj): 

1092 # Safer not to ship the full dict as sending the rest might 

1093 # be unintended and could potentially cause leaking of 

1094 # sensitive information 

1095 return _make_dict_values, (list(obj),) 

1096 

1097 

1098def _dict_items_reduce(obj): 

1099 return _make_dict_items, (dict(obj),) 

1100 

1101 

1102def _odict_keys_reduce(obj): 

1103 # Safer not to ship the full dict as sending the rest might 

1104 # be unintended and could potentially cause leaking of 

1105 # sensitive information 

1106 return _make_dict_keys, (list(obj), True) 

1107 

1108 

1109def _odict_values_reduce(obj): 

1110 # Safer not to ship the full dict as sending the rest might 

1111 # be unintended and could potentially cause leaking of 

1112 # sensitive information 

1113 return _make_dict_values, (list(obj), True) 

1114 

1115 

1116def _odict_items_reduce(obj): 

1117 return _make_dict_items, (dict(obj), True) 

1118 

1119 

1120def _dataclass_field_base_reduce(obj): 

1121 return _get_dataclass_field_type_sentinel, (obj.name,) 

1122 

1123 

1124# COLLECTIONS OF OBJECTS STATE SETTERS 

1125# ------------------------------------ 

1126# state setters are called at unpickling time, once the object is created and 

1127# it has to be updated to how it was at unpickling time. 

1128 

1129 

1130def _function_setstate(obj, state): 

1131 """Update the state of a dynamic function. 

1132 

1133 As __closure__ and __globals__ are readonly attributes of a function, we 

1134 cannot rely on the native setstate routine of pickle.load_build, that calls 

1135 setattr on items of the slotstate. Instead, we have to modify them inplace. 

1136 """ 

1137 state, slotstate = state 

1138 obj.__dict__.update(state) 

1139 

1140 obj_globals = slotstate.pop("__globals__") 

1141 obj_closure = slotstate.pop("__closure__") 

1142 # _cloudpickle_subimports is a set of submodules that must be loaded for 

1143 # the pickled function to work correctly at unpickling time. Now that these 

1144 # submodules are depickled (hence imported), they can be removed from the 

1145 # object's state (the object state only served as a reference holder to 

1146 # these submodules) 

1147 slotstate.pop("_cloudpickle_submodules") 

1148 

1149 obj.__globals__.update(obj_globals) 

1150 obj.__globals__["__builtins__"] = __builtins__ 

1151 

1152 if obj_closure is not None: 

1153 for i, cell in enumerate(obj_closure): 

1154 try: 

1155 value = cell.cell_contents 

1156 except ValueError: # cell is empty 

1157 continue 

1158 obj.__closure__[i].cell_contents = value 

1159 

1160 for k, v in slotstate.items(): 

1161 setattr(obj, k, v) 

1162 

1163 

1164def _class_setstate(obj, state): 

1165 state, slotstate = state 

1166 registry = None 

1167 for attrname, attr in state.items(): 

1168 if attrname == "_abc_impl": 

1169 registry = attr 

1170 else: 

1171 # Note: setting attribute names on a class automatically triggers their 

1172 # interning in CPython: 

1173 # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957 

1174 # 

1175 # This means that to get deterministic pickling for a dynamic class that 

1176 # was initially defined in a different Python process, the pickler 

1177 # needs to ensure that dynamic class and function attribute names are 

1178 # systematically copied into a non-interned version to avoid 

1179 # unpredictable pickle payloads. 

1180 # 

1181 # Indeed the Pickler's memoizer relies on physical object identity to break 

1182 # cycles in the reference graph of the object being serialized. 

1183 setattr(obj, attrname, attr) 

1184 

1185 if sys.version_info >= (3, 13) and "__firstlineno__" in state: 

1186 # Set the Python 3.13+ only __firstlineno__ attribute one more time, as it 

1187 # will be automatically deleted by the `setattr(obj, attrname, attr)` call 

1188 # above when `attrname` is "__firstlineno__". We assume that preserving this 

1189 # information might be important for some users and that it not stale in the 

1190 # context of cloudpickle usage, hence legitimate to propagate. Furthermore it 

1191 # is necessary to do so to keep deterministic chained pickling as tested in 

1192 # test_deterministic_str_interning_for_chained_dynamic_class_pickling. 

1193 obj.__firstlineno__ = state["__firstlineno__"] 

1194 

1195 if registry is not None: 

1196 for subclass in registry: 

1197 obj.register(subclass) 

1198 

1199 # PEP-649/749: During pickling, we excluded the __annotate_func__ attribute but it 

1200 # will be created by Python. Subsequently, annotations will be recreated when 

1201 # __annotations__ is accessed. 

1202 

1203 return obj 

1204 

1205 

1206# COLLECTION OF DATACLASS UTILITIES 

1207# --------------------------------- 

1208# There are some internal sentinel values whose identity must be preserved when 

1209# unpickling dataclass fields. Each sentinel value has a unique name that we can 

1210# use to retrieve its identity at unpickling time. 

1211 

1212 

1213_DATACLASSE_FIELD_TYPE_SENTINELS = { 

1214 dataclasses._FIELD.name: dataclasses._FIELD, 

1215 dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR, 

1216 dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR, 

1217} 

1218 

1219 

1220def _get_dataclass_field_type_sentinel(name): 

1221 return _DATACLASSE_FIELD_TYPE_SENTINELS[name] 

1222 

1223 

1224class Pickler(pickle.Pickler): 

1225 # set of reducers defined and used by cloudpickle (private) 

1226 _dispatch_table = {} 

1227 _dispatch_table[classmethod] = _classmethod_reduce 

1228 _dispatch_table[io.TextIOWrapper] = _file_reduce 

1229 _dispatch_table[logging.Logger] = _logger_reduce 

1230 _dispatch_table[logging.RootLogger] = _root_logger_reduce 

1231 _dispatch_table[memoryview] = _memoryview_reduce 

1232 _dispatch_table[property] = _property_reduce 

1233 _dispatch_table[staticmethod] = _classmethod_reduce 

1234 _dispatch_table[CellType] = _cell_reduce 

1235 _dispatch_table[types.CodeType] = _code_reduce 

1236 _dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce 

1237 _dispatch_table[types.ModuleType] = _module_reduce 

1238 _dispatch_table[types.MethodType] = _method_reduce 

1239 _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce 

1240 _dispatch_table[weakref.WeakSet] = _weakset_reduce 

1241 _dispatch_table[typing.TypeVar] = _typevar_reduce 

1242 _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce 

1243 _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce 

1244 _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce 

1245 _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce 

1246 _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce 

1247 _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce 

1248 _dispatch_table[abc.abstractmethod] = _classmethod_reduce 

1249 _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce 

1250 _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce 

1251 _dispatch_table[abc.abstractproperty] = _property_reduce 

1252 _dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce 

1253 

1254 dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table) 

1255 

1256 # function reducers are defined as instance methods of cloudpickle.Pickler 

1257 # objects, as they rely on a cloudpickle.Pickler attribute (globals_ref) 

1258 def _dynamic_function_reduce(self, func): 

1259 """Reduce a function that is not pickleable via attribute lookup.""" 

1260 newargs = self._function_getnewargs(func) 

1261 state = _function_getstate(func) 

1262 return (_make_function, newargs, state, None, None, _function_setstate) 

1263 

1264 def _function_reduce(self, obj): 

1265 """Reducer for function objects. 

1266 

1267 If obj is a top-level attribute of a file-backed module, this reducer 

1268 returns NotImplemented, making the cloudpickle.Pickler fall back to 

1269 traditional pickle.Pickler routines to save obj. Otherwise, it reduces 

1270 obj using a custom cloudpickle reducer designed specifically to handle 

1271 dynamic functions. 

1272 """ 

1273 if _should_pickle_by_reference(obj): 

1274 return NotImplemented 

1275 else: 

1276 return self._dynamic_function_reduce(obj) 

1277 

1278 def _function_getnewargs(self, func): 

1279 code = func.__code__ 

1280 

1281 # base_globals represents the future global namespace of func at 

1282 # unpickling time. Looking it up and storing it in 

1283 # cloudpickle.Pickler.globals_ref allow functions sharing the same 

1284 # globals at pickling time to also share them once unpickled, at one 

1285 # condition: since globals_ref is an attribute of a cloudpickle.Pickler 

1286 # instance, and that a new cloudpickle.Pickler is created each time 

1287 # cloudpickle.dump or cloudpickle.dumps is called, functions also need 

1288 # to be saved within the same invocation of 

1289 # cloudpickle.dump/cloudpickle.dumps (for example: 

1290 # cloudpickle.dumps([f1, f2])). There is no such limitation when using 

1291 # cloudpickle.Pickler.dump, as long as the multiple invocations are 

1292 # bound to the same cloudpickle.Pickler instance. 

1293 base_globals = self.globals_ref.setdefault(id(func.__globals__), {}) 

1294 

1295 if base_globals == {}: 

1296 # Add module attributes used to resolve relative imports 

1297 # instructions inside func. 

1298 for k in ["__package__", "__name__", "__path__", "__file__"]: 

1299 if k in func.__globals__: 

1300 base_globals[k] = func.__globals__[k] 

1301 

1302 # Do not bind the free variables before the function is created to 

1303 # avoid infinite recursion. 

1304 if func.__closure__ is None: 

1305 closure = None 

1306 else: 

1307 closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars))) 

1308 

1309 return code, base_globals, None, None, closure 

1310 

1311 def dump(self, obj): 

1312 try: 

1313 return super().dump(obj) 

1314 except RecursionError as e: 

1315 msg = "Could not pickle object as excessively deep recursion required." 

1316 raise pickle.PicklingError(msg) from e 

1317 

1318 def __init__(self, file, protocol=None, buffer_callback=None): 

1319 if protocol is None: 

1320 protocol = DEFAULT_PROTOCOL 

1321 super().__init__(file, protocol=protocol, buffer_callback=buffer_callback) 

1322 # map functions __globals__ attribute ids, to ensure that functions 

1323 # sharing the same global namespace at pickling time also share 

1324 # their global namespace at unpickling time. 

1325 self.globals_ref = {} 

1326 self.proto = int(protocol) 

1327 

1328 if not PYPY: 

1329 # pickle.Pickler is the C implementation of the CPython pickler and 

1330 # therefore we rely on reduce_override method to customize the pickler 

1331 # behavior. 

1332 

1333 # `cloudpickle.Pickler.dispatch` is only left for backward 

1334 # compatibility - note that when using protocol 5, 

1335 # `cloudpickle.Pickler.dispatch` is not an extension of 

1336 # `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler` 

1337 # subclasses the C-implemented `pickle.Pickler`, which does not expose 

1338 # a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler` 

1339 # used `cloudpickle.Pickler.dispatch` as a class-level attribute 

1340 # storing all reducers implemented by cloudpickle, but the attribute 

1341 # name was not a great choice given because it would collide with a 

1342 # similarly named attribute in the pure-Python `pickle._Pickler` 

1343 # implementation in the standard library. 

1344 dispatch = dispatch_table 

1345 

1346 # Implementation of the reducer_override callback, in order to 

1347 # efficiently serialize dynamic functions and classes by subclassing 

1348 # the C-implemented `pickle.Pickler`. 

1349 # TODO: decorrelate reducer_override (which is tied to CPython's 

1350 # implementation - would it make sense to backport it to pypy? - and 

1351 # pickle's protocol 5 which is implementation agnostic. Currently, the 

1352 # availability of both notions coincide on CPython's pickle, but it may 

1353 # not be the case anymore when pypy implements protocol 5. 

1354 

1355 def reducer_override(self, obj): 

1356 """Type-agnostic reducing callback for function and classes. 

1357 

1358 For performance reasons, subclasses of the C `pickle.Pickler` class 

1359 cannot register custom reducers for functions and classes in the 

1360 dispatch_table attribute. Reducers for such types must instead 

1361 implemented via the special `reducer_override` method. 

1362 

1363 Note that this method will be called for any object except a few 

1364 builtin-types (int, lists, dicts etc.), which differs from reducers 

1365 in the Pickler's dispatch_table, each of them being invoked for 

1366 objects of a specific type only. 

1367 

1368 This property comes in handy for classes: although most classes are 

1369 instances of the ``type`` metaclass, some of them can be instances 

1370 of other custom metaclasses (such as enum.EnumMeta for example). In 

1371 particular, the metaclass will likely not be known in advance, and 

1372 thus cannot be special-cased using an entry in the dispatch_table. 

1373 reducer_override, among other things, allows us to register a 

1374 reducer that will be called for any class, independently of its 

1375 type. 

1376 

1377 Notes: 

1378 

1379 * reducer_override has the priority over dispatch_table-registered 

1380 reducers. 

1381 * reducer_override can be used to fix other limitations of 

1382 cloudpickle for other types that suffered from type-specific 

1383 reducers, such as Exceptions. See 

1384 https://github.com/cloudpipe/cloudpickle/issues/248 

1385 """ 

1386 t = type(obj) 

1387 try: 

1388 is_anyclass = issubclass(t, type) 

1389 except TypeError: # t is not a class (old Boost; see SF #502085) 

1390 is_anyclass = False 

1391 

1392 if is_anyclass: 

1393 return _class_reduce(obj) 

1394 elif isinstance(obj, types.FunctionType): 

1395 return self._function_reduce(obj) 

1396 else: 

1397 # fallback to save_global, including the Pickler's 

1398 # dispatch_table 

1399 return NotImplemented 

1400 

1401 else: 

1402 # When reducer_override is not available, hack the pure-Python 

1403 # Pickler's types.FunctionType and type savers. Note: the type saver 

1404 # must override Pickler.save_global, because pickle.py contains a 

1405 # hard-coded call to save_global when pickling meta-classes. 

1406 dispatch = pickle.Pickler.dispatch.copy() 

1407 

1408 def _save_reduce_pickle5( 

1409 self, 

1410 func, 

1411 args, 

1412 state=None, 

1413 listitems=None, 

1414 dictitems=None, 

1415 state_setter=None, 

1416 obj=None, 

1417 ): 

1418 save = self.save 

1419 write = self.write 

1420 self.save_reduce( 

1421 func, 

1422 args, 

1423 state=None, 

1424 listitems=listitems, 

1425 dictitems=dictitems, 

1426 obj=obj, 

1427 ) 

1428 # backport of the Python 3.8 state_setter pickle operations 

1429 save(state_setter) 

1430 save(obj) # simple BINGET opcode as obj is already memoized. 

1431 save(state) 

1432 write(pickle.TUPLE2) 

1433 # Trigger a state_setter(obj, state) function call. 

1434 write(pickle.REDUCE) 

1435 # The purpose of state_setter is to carry-out an 

1436 # inplace modification of obj. We do not care about what the 

1437 # method might return, so its output is eventually removed from 

1438 # the stack. 

1439 write(pickle.POP) 

1440 

1441 def save_global(self, obj, name=None, pack=struct.pack): 

1442 """Main dispatch method. 

1443 

1444 The name of this method is somewhat misleading: all types get 

1445 dispatched here. 

1446 """ 

1447 if obj is type(None): # noqa 

1448 return self.save_reduce(type, (None,), obj=obj) 

1449 elif obj is type(Ellipsis): 

1450 return self.save_reduce(type, (Ellipsis,), obj=obj) 

1451 elif obj is type(NotImplemented): 

1452 return self.save_reduce(type, (NotImplemented,), obj=obj) 

1453 elif obj in _BUILTIN_TYPE_NAMES: 

1454 return self.save_reduce( 

1455 _builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj 

1456 ) 

1457 

1458 if name is not None: 

1459 super().save_global(obj, name=name) 

1460 elif not _should_pickle_by_reference(obj, name=name): 

1461 self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) 

1462 else: 

1463 super().save_global(obj, name=name) 

1464 

1465 dispatch[type] = save_global 

1466 

1467 def save_function(self, obj, name=None): 

1468 """Registered with the dispatch to handle all function types. 

1469 

1470 Determines what kind of function obj is (e.g. lambda, defined at 

1471 interactive prompt, etc) and handles the pickling appropriately. 

1472 """ 

1473 if _should_pickle_by_reference(obj, name=name): 

1474 return super().save_global(obj, name=name) 

1475 elif PYPY and isinstance(obj.__code__, builtin_code_type): 

1476 return self.save_pypy_builtin_func(obj) 

1477 else: 

1478 return self._save_reduce_pickle5( 

1479 *self._dynamic_function_reduce(obj), obj=obj 

1480 ) 

1481 

1482 def save_pypy_builtin_func(self, obj): 

1483 """Save pypy equivalent of builtin functions. 

1484 

1485 PyPy does not have the concept of builtin-functions. Instead, 

1486 builtin-functions are simple function instances, but with a 

1487 builtin-code attribute. 

1488 Most of the time, builtin functions should be pickled by attribute. 

1489 But PyPy has flaky support for __qualname__, so some builtin 

1490 functions such as float.__new__ will be classified as dynamic. For 

1491 this reason only, we created this special routine. Because 

1492 builtin-functions are not expected to have closure or globals, 

1493 there is no additional hack (compared the one already implemented 

1494 in pickle) to protect ourselves from reference cycles. A simple 

1495 (reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note 

1496 also that PyPy improved their support for __qualname__ in v3.6, so 

1497 this routing should be removed when cloudpickle supports only PyPy 

1498 3.6 and later. 

1499 """ 

1500 rv = ( 

1501 types.FunctionType, 

1502 (obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__), 

1503 obj.__dict__, 

1504 ) 

1505 self.save_reduce(*rv, obj=obj) 

1506 

1507 dispatch[types.FunctionType] = save_function 

1508 

1509 

1510# Shorthands similar to pickle.dump/pickle.dumps 

1511 

1512 

1513def dump(obj, file, protocol=None, buffer_callback=None): 

1514 """Serialize obj as bytes streamed into file 

1515 

1516 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to 

1517 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication 

1518 speed between processes running the same Python version. 

1519 

1520 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure 

1521 compatibility with older versions of Python (although this is not always 

1522 guaranteed to work because cloudpickle relies on some internal 

1523 implementation details that can change from one Python version to the 

1524 next). 

1525 """ 

1526 Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj) 

1527 

1528 

1529def dumps(obj, protocol=None, buffer_callback=None): 

1530 """Serialize obj as a string of bytes allocated in memory 

1531 

1532 protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to 

1533 pickle.HIGHEST_PROTOCOL. This setting favors maximum communication 

1534 speed between processes running the same Python version. 

1535 

1536 Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure 

1537 compatibility with older versions of Python (although this is not always 

1538 guaranteed to work because cloudpickle relies on some internal 

1539 implementation details that can change from one Python version to the 

1540 next). 

1541 """ 

1542 with io.BytesIO() as file: 

1543 cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback) 

1544 cp.dump(obj) 

1545 return file.getvalue() 

1546 

1547 

1548# Include pickles unloading functions in this namespace for convenience. 

1549load, loads = pickle.load, pickle.loads 

1550 

1551# Backward compat alias. 

1552CloudPickler = Pickler