Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/joblib/_memmapping_reducer.py: 23%

250 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1""" 

2Reducer using memory mapping for numpy arrays 

3""" 

4# Author: Thomas Moreau <thomas.moreau.2010@gmail.com> 

5# Copyright: 2017, Thomas Moreau 

6# License: BSD 3 clause 

7 

8from mmap import mmap 

9import errno 

10import os 

11import stat 

12import threading 

13import atexit 

14import tempfile 

15import time 

16import warnings 

17import weakref 

18from uuid import uuid4 

19from multiprocessing import util 

20 

21from pickle import whichmodule, loads, dumps, HIGHEST_PROTOCOL, PicklingError 

22 

23try: 

24 WindowsError 

25except NameError: 

26 WindowsError = type(None) 

27 

28try: 

29 import numpy as np 

30 from numpy.lib.stride_tricks import as_strided 

31except ImportError: 

32 np = None 

33 

34from .numpy_pickle import dump, load, load_temporary_memmap 

35from .backports import make_memmap 

36from .disk import delete_folder 

37from .externals.loky.backend import resource_tracker 

38 

39# Some system have a ramdisk mounted by default, we can use it instead of /tmp 

40# as the default folder to dump big arrays to share with subprocesses. 

41SYSTEM_SHARED_MEM_FS = '/dev/shm' 

42 

43# Minimal number of bytes available on SYSTEM_SHARED_MEM_FS to consider using 

44# it as the default folder to dump big arrays to share with subprocesses. 

45SYSTEM_SHARED_MEM_FS_MIN_SIZE = int(2e9) 

46 

47# Folder and file permissions to chmod temporary files generated by the 

48# memmapping pool. Only the owner of the Python process can access the 

49# temporary files and folder. 

50FOLDER_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR 

51FILE_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR 

52 

53# Set used in joblib workers, referencing the filenames of temporary memmaps 

54# created by joblib to speed up data communication. In child processes, we add 

55# a finalizer to these memmaps that sends a maybe_unlink call to the 

56# resource_tracker, in order to free main memory as fast as possible. 

57JOBLIB_MMAPS = set() 

58 

59 

60def _log_and_unlink(filename): 

61 from .externals.loky.backend.resource_tracker import _resource_tracker 

62 util.debug( 

63 "[FINALIZER CALL] object mapping to {} about to be deleted," 

64 " decrementing the refcount of the file (pid: {})".format( 

65 os.path.basename(filename), os.getpid())) 

66 _resource_tracker.maybe_unlink(filename, "file") 

67 

68 

69def add_maybe_unlink_finalizer(memmap): 

70 util.debug( 

71 "[FINALIZER ADD] adding finalizer to {} (id {}, filename {}, pid {})" 

72 "".format(type(memmap), id(memmap), os.path.basename(memmap.filename), 

73 os.getpid())) 

74 weakref.finalize(memmap, _log_and_unlink, memmap.filename) 

75 

76 

77def unlink_file(filename): 

78 """Wrapper around os.unlink with a retry mechanism. 

79 

80 The retry mechanism has been implemented primarily to overcome a race 

81 condition happening during the finalizer of a np.memmap: when a process 

82 holding the last reference to a mmap-backed np.memmap/np.array is about to 

83 delete this array (and close the reference), it sends a maybe_unlink 

84 request to the resource_tracker. This request can be processed faster than 

85 it takes for the last reference of the memmap to be closed, yielding (on 

86 Windows) a PermissionError in the resource_tracker loop. 

87 """ 

88 NUM_RETRIES = 10 

89 for retry_no in range(1, NUM_RETRIES + 1): 

90 try: 

91 os.unlink(filename) 

92 break 

93 except PermissionError: 

94 util.debug( 

95 '[ResourceTracker] tried to unlink {}, got ' 

96 'PermissionError'.format(filename) 

97 ) 

98 if retry_no == NUM_RETRIES: 

99 raise 

100 else: 

101 time.sleep(.2) 

102 except FileNotFoundError: 

103 # In case of a race condition when deleting the temporary folder, 

104 # avoid noisy FileNotFoundError exception in the resource tracker. 

105 pass 

106 

107 

108resource_tracker._CLEANUP_FUNCS['file'] = unlink_file 

109 

110 

111class _WeakArrayKeyMap: 

112 """A variant of weakref.WeakKeyDictionary for unhashable numpy arrays. 

113 

114 This datastructure will be used with numpy arrays as obj keys, therefore we 

115 do not use the __get__ / __set__ methods to avoid any conflict with the 

116 numpy fancy indexing syntax. 

117 """ 

118 

119 def __init__(self): 

120 self._data = {} 

121 

122 def get(self, obj): 

123 ref, val = self._data[id(obj)] 

124 if ref() is not obj: 

125 # In case of race condition with on_destroy: could never be 

126 # triggered by the joblib tests with CPython. 

127 raise KeyError(obj) 

128 return val 

129 

130 def set(self, obj, value): 

131 key = id(obj) 

132 try: 

133 ref, _ = self._data[key] 

134 if ref() is not obj: 

135 # In case of race condition with on_destroy: could never be 

136 # triggered by the joblib tests with CPython. 

137 raise KeyError(obj) 

138 except KeyError: 

139 # Insert the new entry in the mapping along with a weakref 

140 # callback to automatically delete the entry from the mapping 

141 # as soon as the object used as key is garbage collected. 

142 def on_destroy(_): 

143 del self._data[key] 

144 ref = weakref.ref(obj, on_destroy) 

145 self._data[key] = ref, value 

146 

147 def __getstate__(self): 

148 raise PicklingError("_WeakArrayKeyMap is not pickleable") 

149 

150 

151############################################################################### 

152# Support for efficient transient pickling of numpy data structures 

153 

154 

155def _get_backing_memmap(a): 

156 """Recursively look up the original np.memmap instance base if any.""" 

157 b = getattr(a, 'base', None) 

158 if b is None: 

159 # TODO: check scipy sparse datastructure if scipy is installed 

160 # a nor its descendants do not have a memmap base 

161 return None 

162 

163 elif isinstance(b, mmap): 

164 # a is already a real memmap instance. 

165 return a 

166 

167 else: 

168 # Recursive exploration of the base ancestry 

169 return _get_backing_memmap(b) 

170 

171 

172def _get_temp_dir(pool_folder_name, temp_folder=None): 

173 """Get the full path to a subfolder inside the temporary folder. 

174 

175 Parameters 

176 ---------- 

177 pool_folder_name : str 

178 Sub-folder name used for the serialization of a pool instance. 

179 

180 temp_folder: str, optional 

181 Folder to be used by the pool for memmapping large arrays 

182 for sharing memory with worker processes. If None, this will try in 

183 order: 

184 

185 - a folder pointed by the JOBLIB_TEMP_FOLDER environment 

186 variable, 

187 - /dev/shm if the folder exists and is writable: this is a 

188 RAMdisk filesystem available by default on modern Linux 

189 distributions, 

190 - the default system temporary folder that can be 

191 overridden with TMP, TMPDIR or TEMP environment 

192 variables, typically /tmp under Unix operating systems. 

193 

194 Returns 

195 ------- 

196 pool_folder : str 

197 full path to the temporary folder 

198 use_shared_mem : bool 

199 whether the temporary folder is written to the system shared memory 

200 folder or some other temporary folder. 

201 """ 

202 use_shared_mem = False 

203 if temp_folder is None: 

204 temp_folder = os.environ.get('JOBLIB_TEMP_FOLDER', None) 

205 if temp_folder is None: 

206 if os.path.exists(SYSTEM_SHARED_MEM_FS) and hasattr(os, 'statvfs'): 

207 try: 

208 shm_stats = os.statvfs(SYSTEM_SHARED_MEM_FS) 

209 available_nbytes = shm_stats.f_bsize * shm_stats.f_bavail 

210 if available_nbytes > SYSTEM_SHARED_MEM_FS_MIN_SIZE: 

211 # Try to see if we have write access to the shared mem 

212 # folder only if it is reasonably large (that is 2GB or 

213 # more). 

214 temp_folder = SYSTEM_SHARED_MEM_FS 

215 pool_folder = os.path.join(temp_folder, pool_folder_name) 

216 if not os.path.exists(pool_folder): 

217 os.makedirs(pool_folder) 

218 use_shared_mem = True 

219 except (IOError, OSError): 

220 # Missing rights in the /dev/shm partition, fallback to regular 

221 # temp folder. 

222 temp_folder = None 

223 if temp_folder is None: 

224 # Fallback to the default tmp folder, typically /tmp 

225 temp_folder = tempfile.gettempdir() 

226 temp_folder = os.path.abspath(os.path.expanduser(temp_folder)) 

227 pool_folder = os.path.join(temp_folder, pool_folder_name) 

228 return pool_folder, use_shared_mem 

229 

230 

231def has_shareable_memory(a): 

232 """Return True if a is backed by some mmap buffer directly or not.""" 

233 return _get_backing_memmap(a) is not None 

234 

235 

236def _strided_from_memmap(filename, dtype, mode, offset, order, shape, strides, 

237 total_buffer_len, unlink_on_gc_collect): 

238 """Reconstruct an array view on a memory mapped file.""" 

239 if mode == 'w+': 

240 # Do not zero the original data when unpickling 

241 mode = 'r+' 

242 

243 if strides is None: 

244 # Simple, contiguous memmap 

245 return make_memmap( 

246 filename, dtype=dtype, shape=shape, mode=mode, offset=offset, 

247 order=order, unlink_on_gc_collect=unlink_on_gc_collect 

248 ) 

249 else: 

250 # For non-contiguous data, memmap the total enclosing buffer and then 

251 # extract the non-contiguous view with the stride-tricks API 

252 base = make_memmap( 

253 filename, dtype=dtype, shape=total_buffer_len, offset=offset, 

254 mode=mode, order=order, unlink_on_gc_collect=unlink_on_gc_collect 

255 ) 

256 return as_strided(base, shape=shape, strides=strides) 

257 

258 

259def _reduce_memmap_backed(a, m): 

260 """Pickling reduction for memmap backed arrays. 

261 

262 a is expected to be an instance of np.ndarray (or np.memmap) 

263 m is expected to be an instance of np.memmap on the top of the ``base`` 

264 attribute ancestry of a. ``m.base`` should be the real python mmap object. 

265 """ 

266 # offset that comes from the striding differences between a and m 

267 util.debug('[MEMMAP REDUCE] reducing a memmap-backed array ' 

268 '(shape, {}, pid: {})'.format(a.shape, os.getpid())) 

269 a_start, a_end = np.byte_bounds(a) 

270 m_start = np.byte_bounds(m)[0] 

271 offset = a_start - m_start 

272 

273 # offset from the backing memmap 

274 offset += m.offset 

275 

276 if m.flags['F_CONTIGUOUS']: 

277 order = 'F' 

278 else: 

279 # The backing memmap buffer is necessarily contiguous hence C if not 

280 # Fortran 

281 order = 'C' 

282 

283 if a.flags['F_CONTIGUOUS'] or a.flags['C_CONTIGUOUS']: 

284 # If the array is a contiguous view, no need to pass the strides 

285 strides = None 

286 total_buffer_len = None 

287 else: 

288 # Compute the total number of items to map from which the strided 

289 # view will be extracted. 

290 strides = a.strides 

291 total_buffer_len = (a_end - a_start) // a.itemsize 

292 

293 return (_strided_from_memmap, 

294 (m.filename, a.dtype, m.mode, offset, order, a.shape, strides, 

295 total_buffer_len, False)) 

296 

297 

298def reduce_array_memmap_backward(a): 

299 """reduce a np.array or a np.memmap from a child process""" 

300 m = _get_backing_memmap(a) 

301 if isinstance(m, np.memmap) and m.filename not in JOBLIB_MMAPS: 

302 # if a is backed by a memmaped file, reconstruct a using the 

303 # memmaped file. 

304 return _reduce_memmap_backed(a, m) 

305 else: 

306 # a is either a regular (not memmap-backed) numpy array, or an array 

307 # backed by a shared temporary file created by joblib. In the latter 

308 # case, in order to limit the lifespan of these temporary files, we 

309 # serialize the memmap as a regular numpy array, and decref the 

310 # file backing the memmap (done implicitly in a previously registered 

311 # finalizer, see ``unlink_on_gc_collect`` for more details) 

312 return ( 

313 loads, (dumps(np.asarray(a), protocol=HIGHEST_PROTOCOL), ) 

314 ) 

315 

316 

317class ArrayMemmapForwardReducer(object): 

318 """Reducer callable to dump large arrays to memmap files. 

319 

320 Parameters 

321 ---------- 

322 max_nbytes: int 

323 Threshold to trigger memmapping of large arrays to files created 

324 a folder. 

325 temp_folder_resolver: callable 

326 An callable in charge of resolving a temporary folder name where files 

327 for backing memmapped arrays are created. 

328 mmap_mode: 'r', 'r+' or 'c' 

329 Mode for the created memmap datastructure. See the documentation of 

330 numpy.memmap for more details. Note: 'w+' is coerced to 'r+' 

331 automatically to avoid zeroing the data on unpickling. 

332 verbose: int, optional, 0 by default 

333 If verbose > 0, memmap creations are logged. 

334 If verbose > 1, both memmap creations, reuse and array pickling are 

335 logged. 

336 prewarm: bool, optional, False by default. 

337 Force a read on newly memmapped array to make sure that OS pre-cache it 

338 memory. This can be useful to avoid concurrent disk access when the 

339 same data array is passed to different worker processes. 

340 """ 

341 

342 def __init__(self, max_nbytes, temp_folder_resolver, mmap_mode, 

343 unlink_on_gc_collect, verbose=0, prewarm=True): 

344 self._max_nbytes = max_nbytes 

345 self._temp_folder_resolver = temp_folder_resolver 

346 self._mmap_mode = mmap_mode 

347 self.verbose = int(verbose) 

348 if prewarm == "auto": 

349 self._prewarm = not self._temp_folder.startswith( 

350 SYSTEM_SHARED_MEM_FS 

351 ) 

352 else: 

353 self._prewarm = prewarm 

354 self._prewarm = prewarm 

355 self._memmaped_arrays = _WeakArrayKeyMap() 

356 self._temporary_memmaped_filenames = set() 

357 self._unlink_on_gc_collect = unlink_on_gc_collect 

358 

359 @property 

360 def _temp_folder(self): 

361 return self._temp_folder_resolver() 

362 

363 def __reduce__(self): 

364 # The ArrayMemmapForwardReducer is passed to the children processes: it 

365 # needs to be pickled but the _WeakArrayKeyMap need to be skipped as 

366 # it's only guaranteed to be consistent with the parent process memory 

367 # garbage collection. 

368 # Although this reducer is pickled, it is not needed in its destination 

369 # process (child processes), as we only use this reducer to send 

370 # memmaps from the parent process to the children processes. For this 

371 # reason, we can afford skipping the resolver, (which would otherwise 

372 # be unpicklable), and pass it as None instead. 

373 args = (self._max_nbytes, None, self._mmap_mode, 

374 self._unlink_on_gc_collect) 

375 kwargs = { 

376 'verbose': self.verbose, 

377 'prewarm': self._prewarm, 

378 } 

379 return ArrayMemmapForwardReducer, args, kwargs 

380 

381 def __call__(self, a): 

382 m = _get_backing_memmap(a) 

383 if m is not None and isinstance(m, np.memmap): 

384 # a is already backed by a memmap file, let's reuse it directly 

385 return _reduce_memmap_backed(a, m) 

386 

387 if (not a.dtype.hasobject and self._max_nbytes is not None and 

388 a.nbytes > self._max_nbytes): 

389 # check that the folder exists (lazily create the pool temp folder 

390 # if required) 

391 try: 

392 os.makedirs(self._temp_folder) 

393 os.chmod(self._temp_folder, FOLDER_PERMISSIONS) 

394 except OSError as e: 

395 if e.errno != errno.EEXIST: 

396 raise e 

397 

398 try: 

399 basename = self._memmaped_arrays.get(a) 

400 except KeyError: 

401 # Generate a new unique random filename. The process and thread 

402 # ids are only useful for debugging purpose and to make it 

403 # easier to cleanup orphaned files in case of hard process 

404 # kill (e.g. by "kill -9" or segfault). 

405 basename = "{}-{}-{}.pkl".format( 

406 os.getpid(), id(threading.current_thread()), uuid4().hex) 

407 self._memmaped_arrays.set(a, basename) 

408 filename = os.path.join(self._temp_folder, basename) 

409 

410 # In case the same array with the same content is passed several 

411 # times to the pool subprocess children, serialize it only once 

412 

413 is_new_memmap = filename not in self._temporary_memmaped_filenames 

414 

415 # add the memmap to the list of temporary memmaps created by joblib 

416 self._temporary_memmaped_filenames.add(filename) 

417 

418 if self._unlink_on_gc_collect: 

419 # Bump reference count of the memmap by 1 to account for 

420 # shared usage of the memmap by a child process. The 

421 # corresponding decref call will be executed upon calling 

422 # resource_tracker.maybe_unlink, registered as a finalizer in 

423 # the child. 

424 # the incref/decref calls here are only possible when the child 

425 # and the parent share the same resource_tracker. It is not the 

426 # case for the multiprocessing backend, but it does not matter 

427 # because unlinking a memmap from a child process is only 

428 # useful to control the memory usage of long-lasting child 

429 # processes, while the multiprocessing-based pools terminate 

430 # their workers at the end of a map() call. 

431 resource_tracker.register(filename, "file") 

432 

433 if is_new_memmap: 

434 # Incref each temporary memmap created by joblib one extra 

435 # time. This means that these memmaps will only be deleted 

436 # once an extra maybe_unlink() is called, which is done once 

437 # all the jobs have completed (or been canceled) in the 

438 # Parallel._terminate_backend() method. 

439 resource_tracker.register(filename, "file") 

440 

441 if not os.path.exists(filename): 

442 util.debug( 

443 "[ARRAY DUMP] Pickling new array (shape={}, dtype={}) " 

444 "creating a new memmap at {}".format( 

445 a.shape, a.dtype, filename)) 

446 for dumped_filename in dump(a, filename): 

447 os.chmod(dumped_filename, FILE_PERMISSIONS) 

448 

449 if self._prewarm: 

450 # Warm up the data by accessing it. This operation ensures 

451 # that the disk access required to create the memmapping 

452 # file are performed in the reducing process and avoids 

453 # concurrent memmap creation in multiple children 

454 # processes. 

455 load(filename, mmap_mode=self._mmap_mode).max() 

456 

457 else: 

458 util.debug( 

459 "[ARRAY DUMP] Pickling known array (shape={}, dtype={}) " 

460 "reusing memmap file: {}".format( 

461 a.shape, a.dtype, os.path.basename(filename))) 

462 

463 # The worker process will use joblib.load to memmap the data 

464 return ( 

465 (load_temporary_memmap, (filename, self._mmap_mode, 

466 self._unlink_on_gc_collect)) 

467 ) 

468 else: 

469 # do not convert a into memmap, let pickler do its usual copy with 

470 # the default system pickler 

471 util.debug( 

472 '[ARRAY DUMP] Pickling array (NO MEMMAPPING) (shape={}, ' 

473 ' dtype={}).'.format(a.shape, a.dtype)) 

474 return (loads, (dumps(a, protocol=HIGHEST_PROTOCOL),)) 

475 

476 

477def get_memmapping_reducers( 

478 forward_reducers=None, backward_reducers=None, 

479 temp_folder_resolver=None, max_nbytes=1e6, mmap_mode='r', verbose=0, 

480 prewarm=False, unlink_on_gc_collect=True, **kwargs): 

481 """Construct a pair of memmapping reducer linked to a tmpdir. 

482 

483 This function manage the creation and the clean up of the temporary folders 

484 underlying the memory maps and should be use to get the reducers necessary 

485 to construct joblib pool or executor. 

486 """ 

487 if forward_reducers is None: 

488 forward_reducers = dict() 

489 if backward_reducers is None: 

490 backward_reducers = dict() 

491 

492 if np is not None: 

493 # Register smart numpy.ndarray reducers that detects memmap backed 

494 # arrays and that is also able to dump to memmap large in-memory 

495 # arrays over the max_nbytes threshold 

496 forward_reduce_ndarray = ArrayMemmapForwardReducer( 

497 max_nbytes, temp_folder_resolver, mmap_mode, unlink_on_gc_collect, 

498 verbose, prewarm=prewarm) 

499 forward_reducers[np.ndarray] = forward_reduce_ndarray 

500 forward_reducers[np.memmap] = forward_reduce_ndarray 

501 

502 # Communication from child process to the parent process always 

503 # pickles in-memory numpy.ndarray without dumping them as memmap 

504 # to avoid confusing the caller and make it tricky to collect the 

505 # temporary folder 

506 backward_reducers[np.ndarray] = reduce_array_memmap_backward 

507 backward_reducers[np.memmap] = reduce_array_memmap_backward 

508 

509 return forward_reducers, backward_reducers 

510 

511 

512class TemporaryResourcesManager(object): 

513 """Stateful object able to manage temporary folder and pickles 

514 

515 It exposes: 

516 - a per-context folder name resolving API that memmap-based reducers will 

517 rely on to know where to pickle the temporary memmaps 

518 - a temporary file/folder management API that internally uses the 

519 resource_tracker. 

520 """ 

521 

522 def __init__(self, temp_folder_root=None, context_id=None): 

523 self._current_temp_folder = None 

524 self._temp_folder_root = temp_folder_root 

525 self._use_shared_mem = None 

526 self._cached_temp_folders = dict() 

527 self._id = uuid4().hex 

528 self._finalizers = {} 

529 if context_id is None: 

530 # It would be safer to not assign a default context id (less silent 

531 # bugs), but doing this while maintaining backward compatibility 

532 # with the previous, context-unaware version get_memmaping_executor 

533 # exposes too many low-level details. 

534 context_id = uuid4().hex 

535 self.set_current_context(context_id) 

536 

537 def set_current_context(self, context_id): 

538 self._current_context_id = context_id 

539 self.register_new_context(context_id) 

540 

541 def register_new_context(self, context_id): 

542 # Prepare a sub-folder name specific to a context (usually a unique id 

543 # generated by each instance of the Parallel class). Do not create in 

544 # advance to spare FS write access if no array is to be dumped). 

545 if context_id in self._cached_temp_folders: 

546 return 

547 else: 

548 # During its lifecycle, one Parallel object can have several 

549 # executors associated to it (for instance, if a loky worker raises 

550 # an exception, joblib shutdowns the executor and instantly 

551 # recreates a new one before raising the error - see 

552 # ``ensure_ready``. Because we don't want two executors tied to 

553 # the same Parallel object (and thus the same context id) to 

554 # register/use/delete the same folder, we also add an id specific 

555 # to the current Manager (and thus specific to its associated 

556 # executor) to the folder name. 

557 new_folder_name = ( 

558 "joblib_memmapping_folder_{}_{}_{}".format( 

559 os.getpid(), self._id, context_id) 

560 ) 

561 new_folder_path, _ = _get_temp_dir( 

562 new_folder_name, self._temp_folder_root 

563 ) 

564 self.register_folder_finalizer(new_folder_path, context_id) 

565 self._cached_temp_folders[context_id] = new_folder_path 

566 

567 def resolve_temp_folder_name(self): 

568 """Return a folder name specific to the currently activated context""" 

569 return self._cached_temp_folders[self._current_context_id] 

570 

571 # resource management API 

572 

573 def register_folder_finalizer(self, pool_subfolder, context_id): 

574 # Register the garbage collector at program exit in case caller forgets 

575 # to call terminate explicitly: note we do not pass any reference to 

576 # ensure that this callback won't prevent garbage collection of 

577 # parallel instance and related file handler resources such as POSIX 

578 # semaphores and pipes 

579 pool_module_name = whichmodule(delete_folder, 'delete_folder') 

580 resource_tracker.register(pool_subfolder, "folder") 

581 

582 def _cleanup(): 

583 # In some cases the Python runtime seems to set delete_folder to 

584 # None just before exiting when accessing the delete_folder 

585 # function from the closure namespace. So instead we reimport 

586 # the delete_folder function explicitly. 

587 # https://github.com/joblib/joblib/issues/328 

588 # We cannot just use from 'joblib.pool import delete_folder' 

589 # because joblib should only use relative imports to allow 

590 # easy vendoring. 

591 delete_folder = __import__( 

592 pool_module_name, fromlist=['delete_folder'] 

593 ).delete_folder 

594 try: 

595 delete_folder(pool_subfolder, allow_non_empty=True) 

596 resource_tracker.unregister(pool_subfolder, "folder") 

597 except OSError: 

598 warnings.warn("Failed to delete temporary folder: {}" 

599 .format(pool_subfolder)) 

600 

601 self._finalizers[context_id] = atexit.register(_cleanup) 

602 

603 def _clean_temporary_resources(self, context_id=None, force=False, 

604 allow_non_empty=False): 

605 """Clean temporary resources created by a process-based pool""" 

606 if context_id is None: 

607 # Iterates over a copy of the cache keys to avoid Error due to 

608 # iterating over a changing size dictionary. 

609 for context_id in list(self._cached_temp_folders): 

610 self._clean_temporary_resources( 

611 context_id, force=force, allow_non_empty=allow_non_empty 

612 ) 

613 else: 

614 temp_folder = self._cached_temp_folders.get(context_id) 

615 if temp_folder and os.path.exists(temp_folder): 

616 for filename in os.listdir(temp_folder): 

617 if force: 

618 # Some workers have failed and the ref counted might 

619 # be off. The workers should have shut down by this 

620 # time so forcefully clean up the files. 

621 resource_tracker.unregister( 

622 os.path.join(temp_folder, filename), "file" 

623 ) 

624 else: 

625 resource_tracker.maybe_unlink( 

626 os.path.join(temp_folder, filename), "file" 

627 ) 

628 

629 # When forcing clean-up, try to delete the folder even if some 

630 # files are still in it. Otherwise, try to delete the folder 

631 allow_non_empty |= force 

632 

633 # Clean up the folder if possible, either if it is empty or 

634 # if none of the files in it are in used and allow_non_empty. 

635 try: 

636 delete_folder( 

637 temp_folder, allow_non_empty=allow_non_empty 

638 ) 

639 # Forget the folder once it has been deleted 

640 self._cached_temp_folders.pop(context_id, None) 

641 resource_tracker.unregister(temp_folder, "folder") 

642 

643 # Also cancel the finalizers that gets triggered at gc. 

644 finalizer = self._finalizers.pop(context_id, None) 

645 if finalizer is not None: 

646 atexit.unregister(finalizer) 

647 

648 except OSError: 

649 # Temporary folder cannot be deleted right now. 

650 # This folder will be cleaned up by an atexit 

651 # finalizer registered by the memmapping_reducer. 

652 pass