Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/joblib/memory.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

398 statements  

1""" 

2A context object for caching a function's return value each time it 

3is called with the same input arguments. 

4 

5""" 

6 

7# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> 

8# Copyright (c) 2009 Gael Varoquaux 

9# License: BSD Style, 3 clauses. 

10 

11import asyncio 

12import datetime 

13import functools 

14import inspect 

15import logging 

16import os 

17import pathlib 

18import pydoc 

19import re 

20import textwrap 

21import time 

22import tokenize 

23import traceback 

24import warnings 

25import weakref 

26 

27from . import hashing 

28from ._store_backends import ( 

29 CacheWarning, # noqa 

30 FileSystemStoreBackend, 

31 StoreBackendBase, 

32) 

33from .func_inspect import ( 

34 filter_args, 

35 format_call, 

36 format_signature, 

37 get_func_code, 

38 get_func_name, 

39) 

40from .logger import Logger, format_time, pformat 

41 

42FIRST_LINE_TEXT = "# first line:" 

43 

44# TODO: The following object should have a data store object as a sub 

45# object, and the interface to persist and query should be separated in 

46# the data store. 

47# 

48# This would enable creating 'Memory' objects with a different logic for 

49# pickling that would simply span a MemorizedFunc with the same 

50# store (or do we want to copy it to avoid cross-talks?), for instance to 

51# implement HDF5 pickling. 

52 

53# TODO: Same remark for the logger, and probably use the Python logging 

54# mechanism. 

55 

56 

57def extract_first_line(func_code): 

58 """Extract the first line information from the function code 

59 text if available. 

60 """ 

61 if func_code.startswith(FIRST_LINE_TEXT): 

62 func_code = func_code.split("\n") 

63 first_line = int(func_code[0][len(FIRST_LINE_TEXT) :]) 

64 func_code = "\n".join(func_code[1:]) 

65 else: 

66 first_line = -1 

67 return func_code, first_line 

68 

69 

70class JobLibCollisionWarning(UserWarning): 

71 """Warn that there might be a collision between names of functions.""" 

72 

73 

74_STORE_BACKENDS = {"local": FileSystemStoreBackend} 

75 

76 

77def register_store_backend(backend_name, backend): 

78 """Extend available store backends. 

79 

80 The Memory, MemorizeResult and MemorizeFunc objects are designed to be 

81 agnostic to the type of store used behind. By default, the local file 

82 system is used but this function gives the possibility to extend joblib's 

83 memory pattern with other types of storage such as cloud storage (S3, GCS, 

84 OpenStack, HadoopFS, etc) or blob DBs. 

85 

86 Parameters 

87 ---------- 

88 backend_name: str 

89 The name identifying the store backend being registered. For example, 

90 'local' is used with FileSystemStoreBackend. 

91 backend: StoreBackendBase subclass 

92 The name of a class that implements the StoreBackendBase interface. 

93 

94 """ 

95 if not isinstance(backend_name, str): 

96 raise ValueError( 

97 "Store backend name should be a string, '{0}' given.".format(backend_name) 

98 ) 

99 if backend is None or not issubclass(backend, StoreBackendBase): 

100 raise ValueError( 

101 "Store backend should inherit StoreBackendBase, '{0}' given.".format( 

102 backend 

103 ) 

104 ) 

105 

106 _STORE_BACKENDS[backend_name] = backend 

107 

108 

109def _store_backend_factory(backend, location, verbose=0, backend_options=None): 

110 """Return the correct store object for the given location.""" 

111 if backend_options is None: 

112 backend_options = {} 

113 

114 if isinstance(location, pathlib.Path): 

115 location = str(location) 

116 

117 if isinstance(location, StoreBackendBase): 

118 return location 

119 elif isinstance(location, str): 

120 obj = None 

121 location = os.path.expanduser(location) 

122 # The location is not a local file system, we look in the 

123 # registered backends if there's one matching the given backend 

124 # name. 

125 for backend_key, backend_obj in _STORE_BACKENDS.items(): 

126 if backend == backend_key: 

127 obj = backend_obj() 

128 

129 # By default, we assume the FileSystemStoreBackend can be used if no 

130 # matching backend could be found. 

131 if obj is None: 

132 raise TypeError( 

133 "Unknown location {0} or backend {1}".format(location, backend) 

134 ) 

135 

136 # The store backend is configured with the extra named parameters, 

137 # some of them are specific to the underlying store backend. 

138 obj.configure(location, verbose=verbose, backend_options=backend_options) 

139 return obj 

140 elif location is not None: 

141 warnings.warn( 

142 "Instantiating a backend using a {} as a location is not " 

143 "supported by joblib. Returning None instead.".format( 

144 location.__class__.__name__ 

145 ), 

146 UserWarning, 

147 ) 

148 

149 return None 

150 

151 

152def _build_func_identifier(func): 

153 """Build a roughly unique identifier for the cached function.""" 

154 modules, funcname = get_func_name(func) 

155 # We reuse historical fs-like way of building a function identifier 

156 return os.path.join(*modules, funcname) 

157 

158 

159# An in-memory store to avoid looking at the disk-based function 

160# source code to check if a function definition has changed 

161_FUNCTION_HASHES = weakref.WeakKeyDictionary() 

162 

163 

164############################################################################### 

165# class `MemorizedResult` 

166############################################################################### 

167class MemorizedResult(Logger): 

168 """Object representing a cached value. 

169 

170 Attributes 

171 ---------- 

172 location: str 

173 The location of joblib cache. Depends on the store backend used. 

174 

175 func: function or str 

176 function whose output is cached. The string case is intended only for 

177 instantiation based on the output of repr() on another instance. 

178 (namely eval(repr(memorized_instance)) works). 

179 

180 argument_hash: str 

181 hash of the function arguments. 

182 

183 backend: str 

184 Type of store backend for reading/writing cache files. 

185 Default is 'local'. 

186 

187 mmap_mode: {None, 'r+', 'r', 'w+', 'c'} 

188 The memmapping mode used when loading from cache numpy arrays. See 

189 numpy.load for the meaning of the different values. 

190 

191 verbose: int 

192 verbosity level (0 means no message). 

193 

194 timestamp, metadata: string 

195 for internal use only. 

196 """ 

197 

198 def __init__( 

199 self, 

200 location, 

201 call_id, 

202 backend="local", 

203 mmap_mode=None, 

204 verbose=0, 

205 timestamp=None, 

206 metadata=None, 

207 ): 

208 Logger.__init__(self) 

209 self._call_id = call_id 

210 self.store_backend = _store_backend_factory(backend, location, verbose=verbose) 

211 self.mmap_mode = mmap_mode 

212 

213 if metadata is not None: 

214 self.metadata = metadata 

215 else: 

216 self.metadata = self.store_backend.get_metadata(self._call_id) 

217 

218 self.duration = self.metadata.get("duration", None) 

219 self.verbose = verbose 

220 self.timestamp = timestamp 

221 

222 @property 

223 def func(self): 

224 return self.func_id 

225 

226 @property 

227 def func_id(self): 

228 return self._call_id[0] 

229 

230 @property 

231 def args_id(self): 

232 return self._call_id[1] 

233 

234 def get(self): 

235 """Read value from cache and return it.""" 

236 try: 

237 return self.store_backend.load_item( 

238 self._call_id, 

239 timestamp=self.timestamp, 

240 metadata=self.metadata, 

241 verbose=self.verbose, 

242 ) 

243 except ValueError as exc: 

244 new_exc = KeyError( 

245 "Error while trying to load a MemorizedResult's value. " 

246 "It seems that this folder is corrupted : {}".format( 

247 os.path.join(self.store_backend.location, *self._call_id) 

248 ) 

249 ) 

250 raise new_exc from exc 

251 

252 def clear(self): 

253 """Clear value from cache""" 

254 self.store_backend.clear_item(self._call_id) 

255 

256 def __repr__(self): 

257 return '{}(location="{}", func="{}", args_id="{}")'.format( 

258 self.__class__.__name__, self.store_backend.location, *self._call_id 

259 ) 

260 

261 def __getstate__(self): 

262 state = self.__dict__.copy() 

263 state["timestamp"] = None 

264 return state 

265 

266 

267class NotMemorizedResult(object): 

268 """Class representing an arbitrary value. 

269 

270 This class is a replacement for MemorizedResult when there is no cache. 

271 """ 

272 

273 __slots__ = ("value", "valid") 

274 

275 def __init__(self, value): 

276 self.value = value 

277 self.valid = True 

278 

279 def get(self): 

280 if self.valid: 

281 return self.value 

282 else: 

283 raise KeyError("No value stored.") 

284 

285 def clear(self): 

286 self.valid = False 

287 self.value = None 

288 

289 def __repr__(self): 

290 if self.valid: 

291 return "{class_name}({value})".format( 

292 class_name=self.__class__.__name__, value=pformat(self.value) 

293 ) 

294 else: 

295 return self.__class__.__name__ + " with no value" 

296 

297 # __getstate__ and __setstate__ are required because of __slots__ 

298 def __getstate__(self): 

299 return {"valid": self.valid, "value": self.value} 

300 

301 def __setstate__(self, state): 

302 self.valid = state["valid"] 

303 self.value = state["value"] 

304 

305 

306############################################################################### 

307# class `NotMemorizedFunc` 

308############################################################################### 

309class NotMemorizedFunc(object): 

310 """No-op object decorating a function. 

311 

312 This class replaces MemorizedFunc when there is no cache. It provides an 

313 identical API but does not write anything on disk. 

314 

315 Attributes 

316 ---------- 

317 func: callable 

318 Original undecorated function. 

319 """ 

320 

321 # Should be a light as possible (for speed) 

322 def __init__(self, func): 

323 self.func = func 

324 

325 def __call__(self, *args, **kwargs): 

326 return self.func(*args, **kwargs) 

327 

328 def call_and_shelve(self, *args, **kwargs): 

329 return NotMemorizedResult(self.func(*args, **kwargs)) 

330 

331 def __repr__(self): 

332 return "{0}(func={1})".format(self.__class__.__name__, self.func) 

333 

334 def clear(self, warn=True): 

335 # Argument "warn" is for compatibility with MemorizedFunc.clear 

336 pass 

337 

338 def call(self, *args, **kwargs): 

339 return self.func(*args, **kwargs), {} 

340 

341 def check_call_in_cache(self, *args, **kwargs): 

342 return False 

343 

344 

345############################################################################### 

346# class `AsyncNotMemorizedFunc` 

347############################################################################### 

348class AsyncNotMemorizedFunc(NotMemorizedFunc): 

349 async def call_and_shelve(self, *args, **kwargs): 

350 return NotMemorizedResult(await self.func(*args, **kwargs)) 

351 

352 

353############################################################################### 

354# class `MemorizedFunc` 

355############################################################################### 

356class MemorizedFunc(Logger): 

357 """Callable object decorating a function for caching its return value 

358 each time it is called. 

359 

360 Methods are provided to inspect the cache or clean it. 

361 

362 Attributes 

363 ---------- 

364 func: callable 

365 The original, undecorated, function. 

366 

367 location: string 

368 The location of joblib cache. Depends on the store backend used. 

369 

370 backend: str 

371 Type of store backend for reading/writing cache files. 

372 Default is 'local', in which case the location is the path to a 

373 disk storage. 

374 

375 ignore: list or None 

376 List of variable names to ignore when choosing whether to 

377 recompute. 

378 

379 mmap_mode: {None, 'r+', 'r', 'w+', 'c'} 

380 The memmapping mode used when loading from cache 

381 numpy arrays. See numpy.load for the meaning of the different 

382 values. 

383 

384 compress: boolean, or integer 

385 Whether to zip the stored data on disk. If an integer is 

386 given, it should be between 1 and 9, and sets the amount 

387 of compression. Note that compressed arrays cannot be 

388 read by memmapping. 

389 

390 verbose: int, optional 

391 The verbosity flag, controls messages that are issued as 

392 the function is evaluated. 

393 

394 cache_validation_callback: callable, optional 

395 Callable to check if a result in cache is valid or is to be recomputed. 

396 When the function is called with arguments for which a cache exists, 

397 the callback is called with the cache entry's metadata as its sole 

398 argument. If it returns True, the cached result is returned, else the 

399 cache for these arguments is cleared and the result is recomputed. 

400 """ 

401 

402 # ------------------------------------------------------------------------ 

403 # Public interface 

404 # ------------------------------------------------------------------------ 

405 

406 def __init__( 

407 self, 

408 func, 

409 location, 

410 backend="local", 

411 ignore=None, 

412 mmap_mode=None, 

413 compress=False, 

414 verbose=1, 

415 timestamp=None, 

416 cache_validation_callback=None, 

417 ): 

418 Logger.__init__(self) 

419 self.mmap_mode = mmap_mode 

420 self.compress = compress 

421 self.func = func 

422 self.cache_validation_callback = cache_validation_callback 

423 self.func_id = _build_func_identifier(func) 

424 self.ignore = ignore if ignore is not None else [] 

425 self._verbose = verbose 

426 

427 # retrieve store object from backend type and location. 

428 self.store_backend = _store_backend_factory( 

429 backend, 

430 location, 

431 verbose=verbose, 

432 backend_options=dict(compress=compress, mmap_mode=mmap_mode), 

433 ) 

434 if self.store_backend is not None: 

435 # Create func directory on demand. 

436 self.store_backend.store_cached_func_code([self.func_id]) 

437 

438 self.timestamp = timestamp if timestamp is not None else time.time() 

439 try: 

440 functools.update_wrapper(self, func) 

441 except Exception: 

442 pass # Objects like ufunc don't like that 

443 if inspect.isfunction(func): 

444 doc = pydoc.TextDoc().document(func) 

445 # Remove blank line 

446 doc = doc.replace("\n", "\n\n", 1) 

447 # Strip backspace-overprints for compatibility with autodoc 

448 doc = re.sub("\x08.", "", doc) 

449 else: 

450 # Pydoc does a poor job on other objects 

451 doc = func.__doc__ 

452 self.__doc__ = "Memoized version of %s" % doc 

453 

454 self._func_code_info = None 

455 self._func_code_id = None 

456 

457 def _is_in_cache_and_valid(self, call_id): 

458 """Check if the function call is cached and valid for given arguments. 

459 

460 - Compare the function code with the one from the cached function, 

461 asserting if it has changed. 

462 - Check if the function call is present in the cache. 

463 - Call `cache_validation_callback` for user define cache validation. 

464 

465 Returns True if the function call is in cache and can be used, and 

466 returns False otherwise. 

467 """ 

468 # Check if the code of the function has changed 

469 if not self._check_previous_func_code(stacklevel=4): 

470 return False 

471 

472 # Check if this specific call is in the cache 

473 if not self.store_backend.contains_item(call_id): 

474 return False 

475 

476 # Call the user defined cache validation callback 

477 metadata = self.store_backend.get_metadata(call_id) 

478 if ( 

479 self.cache_validation_callback is not None 

480 and not self.cache_validation_callback(metadata) 

481 ): 

482 self.store_backend.clear_item(call_id) 

483 return False 

484 

485 return True 

486 

487 def _cached_call(self, args, kwargs, shelving): 

488 """Call wrapped function and cache result, or read cache if available. 

489 

490 This function returns the wrapped function output or a reference to 

491 the cached result. 

492 

493 Arguments: 

494 ---------- 

495 

496 args, kwargs: list and dict 

497 input arguments for wrapped function 

498 

499 shelving: bool 

500 True when called via the call_and_shelve function. 

501 

502 

503 Returns 

504 ------- 

505 output: Output of the wrapped function if shelving is false, or a 

506 MemorizedResult reference to the value if shelving is true. 

507 metadata: dict containing the metadata associated with the call. 

508 """ 

509 args_id = self._get_args_id(*args, **kwargs) 

510 call_id = (self.func_id, args_id) 

511 _, func_name = get_func_name(self.func) 

512 func_info = self.store_backend.get_cached_func_info([self.func_id]) 

513 location = func_info["location"] 

514 

515 if self._verbose >= 20: 

516 logging.basicConfig(level=logging.INFO) 

517 _, signature = format_signature(self.func, *args, **kwargs) 

518 self.info( 

519 textwrap.dedent( 

520 f""" 

521 Querying {func_name} with signature 

522 {signature}. 

523 

524 (argument hash {args_id}) 

525 

526 The store location is {location}. 

527 """ 

528 ) 

529 ) 

530 

531 # Compare the function code with the previous to see if the 

532 # function code has changed and check if the results are present in 

533 # the cache. 

534 if self._is_in_cache_and_valid(call_id): 

535 if shelving: 

536 return self._get_memorized_result(call_id), {} 

537 

538 try: 

539 start_time = time.time() 

540 output = self._load_item(call_id) 

541 if self._verbose > 4: 

542 self._print_duration( 

543 time.time() - start_time, context="cache loaded " 

544 ) 

545 return output, {} 

546 except Exception: 

547 # XXX: Should use an exception logger 

548 _, signature = format_signature(self.func, *args, **kwargs) 

549 self.warn( 

550 "Exception while loading results for {}\n {}".format( 

551 signature, traceback.format_exc() 

552 ) 

553 ) 

554 

555 if self._verbose > 10: 

556 self.warn( 

557 f"Computing func {func_name}, argument hash {args_id} " 

558 f"in location {location}" 

559 ) 

560 

561 # Returns the output but not the metadata 

562 return self._call(call_id, args, kwargs, shelving) 

563 

564 @property 

565 def func_code_info(self): 

566 # 3-tuple property containing: the function source code, source file, 

567 # and first line of the code inside the source file 

568 if hasattr(self.func, "__code__"): 

569 if self._func_code_id is None: 

570 self._func_code_id = id(self.func.__code__) 

571 elif id(self.func.__code__) != self._func_code_id: 

572 # Be robust to dynamic reassignments of self.func.__code__ 

573 self._func_code_info = None 

574 

575 if self._func_code_info is None: 

576 # Cache the source code of self.func . Provided that get_func_code 

577 # (which should be called once on self) gets called in the process 

578 # in which self.func was defined, this caching mechanism prevents 

579 # undesired cache clearing when the cached function is called in 

580 # an environment where the introspection utilities get_func_code 

581 # relies on do not work (typically, in joblib child processes). 

582 # See #1035 for more info 

583 # TODO (pierreglaser): do the same with get_func_name? 

584 self._func_code_info = get_func_code(self.func) 

585 return self._func_code_info 

586 

587 def call_and_shelve(self, *args, **kwargs): 

588 """Call wrapped function, cache result and return a reference. 

589 

590 This method returns a reference to the cached result instead of the 

591 result itself. The reference object is small and picklable, allowing 

592 to send or store it easily. Call .get() on reference object to get 

593 result. 

594 

595 Returns 

596 ------- 

597 cached_result: MemorizedResult or NotMemorizedResult 

598 reference to the value returned by the wrapped function. The 

599 class "NotMemorizedResult" is used when there is no cache 

600 activated (e.g. location=None in Memory). 

601 """ 

602 # Return the wrapped output, without the metadata 

603 return self._cached_call(args, kwargs, shelving=True)[0] 

604 

605 def __call__(self, *args, **kwargs): 

606 # Return the output, without the metadata 

607 return self._cached_call(args, kwargs, shelving=False)[0] 

608 

609 def __getstate__(self): 

610 # Make sure self.func's source is introspected prior to being pickled - 

611 # code introspection utilities typically do not work inside child 

612 # processes 

613 _ = self.func_code_info 

614 

615 # We don't store the timestamp when pickling, to avoid the hash 

616 # depending from it. 

617 state = self.__dict__.copy() 

618 state["timestamp"] = None 

619 

620 # Invalidate the code id as id(obj) will be different in the child 

621 state["_func_code_id"] = None 

622 

623 return state 

624 

625 def check_call_in_cache(self, *args, **kwargs): 

626 """Check if the function call is cached and valid for given arguments. 

627 

628 Does not call the function or do any work besides function inspection 

629 and argument hashing. 

630 

631 - Compare the function code with the one from the cached function, 

632 asserting if it has changed. 

633 - Check if the function call is present in the cache. 

634 - Call `cache_validation_callback` for user define cache validation. 

635 

636 Returns 

637 ------- 

638 is_call_in_cache: bool 

639 Whether or not the function call is in cache and can be used. 

640 """ 

641 call_id = (self.func_id, self._get_args_id(*args, **kwargs)) 

642 return self._is_in_cache_and_valid(call_id) 

643 

644 # ------------------------------------------------------------------------ 

645 # Private interface 

646 # ------------------------------------------------------------------------ 

647 

648 def _get_args_id(self, *args, **kwargs): 

649 """Return the input parameter hash of a result.""" 

650 return hashing.hash( 

651 filter_args(self.func, self.ignore, args, kwargs), 

652 coerce_mmap=self.mmap_mode is not None, 

653 ) 

654 

655 def _hash_func(self): 

656 """Hash a function to key the online cache""" 

657 func_code_h = hash(getattr(self.func, "__code__", None)) 

658 return id(self.func), hash(self.func), func_code_h 

659 

660 def _write_func_code(self, func_code, first_line): 

661 """Write the function code and the filename to a file.""" 

662 # We store the first line because the filename and the function 

663 # name is not always enough to identify a function: people 

664 # sometimes have several functions named the same way in a 

665 # file. This is bad practice, but joblib should be robust to bad 

666 # practice. 

667 func_code = "%s %i\n%s" % (FIRST_LINE_TEXT, first_line, func_code) 

668 self.store_backend.store_cached_func_code([self.func_id], func_code) 

669 

670 # Also store in the in-memory store of function hashes 

671 is_named_callable = ( 

672 hasattr(self.func, "__name__") and self.func.__name__ != "<lambda>" 

673 ) 

674 if is_named_callable: 

675 # Don't do this for lambda functions or strange callable 

676 # objects, as it ends up being too fragile 

677 func_hash = self._hash_func() 

678 try: 

679 _FUNCTION_HASHES[self.func] = func_hash 

680 except TypeError: 

681 # Some callable are not hashable 

682 pass 

683 

684 def _check_previous_func_code(self, stacklevel=2): 

685 """ 

686 stacklevel is the depth a which this function is called, to 

687 issue useful warnings to the user. 

688 """ 

689 # First check if our function is in the in-memory store. 

690 # Using the in-memory store not only makes things faster, but it 

691 # also renders us robust to variations of the files when the 

692 # in-memory version of the code does not vary 

693 try: 

694 if self.func in _FUNCTION_HASHES: 

695 # We use as an identifier the id of the function and its 

696 # hash. This is more likely to falsely change than have hash 

697 # collisions, thus we are on the safe side. 

698 func_hash = self._hash_func() 

699 if func_hash == _FUNCTION_HASHES[self.func]: 

700 return True 

701 except TypeError: 

702 # Some callables are not hashable 

703 pass 

704 

705 # Here, we go through some effort to be robust to dynamically 

706 # changing code and collision. We cannot inspect.getsource 

707 # because it is not reliable when using IPython's magic "%run". 

708 func_code, source_file, first_line = self.func_code_info 

709 try: 

710 old_func_code, old_first_line = extract_first_line( 

711 self.store_backend.get_cached_func_code([self.func_id]) 

712 ) 

713 except (IOError, OSError): # some backend can also raise OSError 

714 self._write_func_code(func_code, first_line) 

715 return False 

716 if old_func_code == func_code: 

717 return True 

718 

719 # We have differing code, is this because we are referring to 

720 # different functions, or because the function we are referring to has 

721 # changed? 

722 

723 _, func_name = get_func_name( 

724 self.func, resolv_alias=False, win_characters=False 

725 ) 

726 if old_first_line == first_line == -1 or func_name == "<lambda>": 

727 if not first_line == -1: 

728 func_description = "{0} ({1}:{2})".format( 

729 func_name, source_file, first_line 

730 ) 

731 else: 

732 func_description = func_name 

733 warnings.warn( 

734 JobLibCollisionWarning( 

735 "Cannot detect name collisions for function '{0}'".format( 

736 func_description 

737 ) 

738 ), 

739 stacklevel=stacklevel, 

740 ) 

741 

742 # Fetch the code at the old location and compare it. If it is the 

743 # same than the code store, we have a collision: the code in the 

744 # file has not changed, but the name we have is pointing to a new 

745 # code block. 

746 if not old_first_line == first_line and source_file is not None: 

747 if os.path.exists(source_file): 

748 _, func_name = get_func_name(self.func, resolv_alias=False) 

749 num_lines = len(func_code.split("\n")) 

750 with tokenize.open(source_file) as f: 

751 on_disk_func_code = f.readlines()[ 

752 old_first_line - 1 : old_first_line - 1 + num_lines - 1 

753 ] 

754 on_disk_func_code = "".join(on_disk_func_code) 

755 possible_collision = ( 

756 on_disk_func_code.rstrip() == old_func_code.rstrip() 

757 ) 

758 else: 

759 possible_collision = source_file.startswith("<doctest ") 

760 if possible_collision: 

761 warnings.warn( 

762 JobLibCollisionWarning( 

763 "Possible name collisions between functions " 

764 "'%s' (%s:%i) and '%s' (%s:%i)" 

765 % ( 

766 func_name, 

767 source_file, 

768 old_first_line, 

769 func_name, 

770 source_file, 

771 first_line, 

772 ) 

773 ), 

774 stacklevel=stacklevel, 

775 ) 

776 

777 # The function has changed, wipe the cache directory. 

778 # XXX: Should be using warnings, and giving stacklevel 

779 if self._verbose > 10: 

780 _, func_name = get_func_name(self.func, resolv_alias=False) 

781 self.warn( 

782 "Function {0} (identified by {1}) has changed.".format( 

783 func_name, self.func_id 

784 ) 

785 ) 

786 self.clear(warn=True) 

787 return False 

788 

789 def clear(self, warn=True): 

790 """Empty the function's cache.""" 

791 func_id = self.func_id 

792 if self._verbose > 0 and warn: 

793 self.warn("Clearing function cache identified by %s" % func_id) 

794 self.store_backend.clear_path( 

795 [ 

796 func_id, 

797 ] 

798 ) 

799 

800 func_code, _, first_line = self.func_code_info 

801 self._write_func_code(func_code, first_line) 

802 

803 def call(self, *args, **kwargs): 

804 """Force the execution of the function with the given arguments. 

805 

806 The output values will be persisted, i.e., the cache will be updated 

807 with any new values. 

808 

809 Parameters 

810 ---------- 

811 *args: arguments 

812 The arguments. 

813 **kwargs: keyword arguments 

814 Keyword arguments. 

815 

816 Returns 

817 ------- 

818 output : object 

819 The output of the function call. 

820 metadata : dict 

821 The metadata associated with the call. 

822 """ 

823 call_id = (self.func_id, self._get_args_id(*args, **kwargs)) 

824 

825 # Return the output and the metadata 

826 return self._call(call_id, args, kwargs) 

827 

828 def _call(self, call_id, args, kwargs, shelving=False): 

829 # Return the output and the metadata 

830 self._before_call(args, kwargs) 

831 start_time = time.time() 

832 output = self.func(*args, **kwargs) 

833 return self._after_call(call_id, args, kwargs, shelving, output, start_time) 

834 

835 def _before_call(self, args, kwargs): 

836 if self._verbose > 0: 

837 print(format_call(self.func, args, kwargs)) 

838 

839 def _after_call(self, call_id, args, kwargs, shelving, output, start_time): 

840 self.store_backend.dump_item(call_id, output, verbose=self._verbose) 

841 duration = time.time() - start_time 

842 if self._verbose > 0: 

843 self._print_duration(duration) 

844 metadata = self._persist_input(duration, call_id, args, kwargs) 

845 if shelving: 

846 return self._get_memorized_result(call_id, metadata), metadata 

847 

848 if self.mmap_mode is not None: 

849 # Memmap the output at the first call to be consistent with 

850 # later calls 

851 output = self._load_item(call_id, metadata) 

852 return output, metadata 

853 

854 def _persist_input(self, duration, call_id, args, kwargs, this_duration_limit=0.5): 

855 """Save a small summary of the call using json format in the 

856 output directory. 

857 

858 output_dir: string 

859 directory where to write metadata. 

860 

861 duration: float 

862 time taken by hashing input arguments, calling the wrapped 

863 function and persisting its output. 

864 

865 args, kwargs: list and dict 

866 input arguments for wrapped function 

867 

868 this_duration_limit: float 

869 Max execution time for this function before issuing a warning. 

870 """ 

871 start_time = time.time() 

872 argument_dict = filter_args(self.func, self.ignore, args, kwargs) 

873 

874 input_repr = dict((k, repr(v)) for k, v in argument_dict.items()) 

875 # This can fail due to race-conditions with multiple 

876 # concurrent joblibs removing the file or the directory 

877 metadata = { 

878 "duration": duration, 

879 "input_args": input_repr, 

880 "time": start_time, 

881 } 

882 

883 self.store_backend.store_metadata(call_id, metadata) 

884 

885 this_duration = time.time() - start_time 

886 if this_duration > this_duration_limit: 

887 # This persistence should be fast. It will not be if repr() takes 

888 # time and its output is large, because json.dump will have to 

889 # write a large file. This should not be an issue with numpy arrays 

890 # for which repr() always output a short representation, but can 

891 # be with complex dictionaries. Fixing the problem should be a 

892 # matter of replacing repr() above by something smarter. 

893 warnings.warn( 

894 "Persisting input arguments took %.2fs to run." 

895 "If this happens often in your code, it can cause " 

896 "performance problems " 

897 "(results will be correct in all cases). " 

898 "The reason for this is probably some large input " 

899 "arguments for a wrapped function." % this_duration, 

900 stacklevel=5, 

901 ) 

902 return metadata 

903 

904 def _get_memorized_result(self, call_id, metadata=None): 

905 return MemorizedResult( 

906 self.store_backend, 

907 call_id, 

908 metadata=metadata, 

909 timestamp=self.timestamp, 

910 verbose=self._verbose - 1, 

911 ) 

912 

913 def _load_item(self, call_id, metadata=None): 

914 return self.store_backend.load_item( 

915 call_id, metadata=metadata, timestamp=self.timestamp, verbose=self._verbose 

916 ) 

917 

918 def _print_duration(self, duration, context=""): 

919 _, name = get_func_name(self.func) 

920 msg = f"{name} {context}- {format_time(duration)}" 

921 print(max(0, (80 - len(msg))) * "_" + msg) 

922 

923 # ------------------------------------------------------------------------ 

924 # Private `object` interface 

925 # ------------------------------------------------------------------------ 

926 

927 def __repr__(self): 

928 return "{class_name}(func={func}, location={location})".format( 

929 class_name=self.__class__.__name__, 

930 func=self.func, 

931 location=self.store_backend.location, 

932 ) 

933 

934 

935############################################################################### 

936# class `AsyncMemorizedFunc` 

937############################################################################### 

938class AsyncMemorizedFunc(MemorizedFunc): 

939 async def __call__(self, *args, **kwargs): 

940 out = self._cached_call(args, kwargs, shelving=False) 

941 out = await out if asyncio.iscoroutine(out) else out 

942 return out[0] # Don't return metadata 

943 

944 async def call_and_shelve(self, *args, **kwargs): 

945 out = self._cached_call(args, kwargs, shelving=True) 

946 out = await out if asyncio.iscoroutine(out) else out 

947 return out[0] # Don't return metadata 

948 

949 async def call(self, *args, **kwargs): 

950 out = super().call(*args, **kwargs) 

951 return await out if asyncio.iscoroutine(out) else out 

952 

953 async def _call(self, call_id, args, kwargs, shelving=False): 

954 self._before_call(args, kwargs) 

955 start_time = time.time() 

956 output = await self.func(*args, **kwargs) 

957 return self._after_call(call_id, args, kwargs, shelving, output, start_time) 

958 

959 

960############################################################################### 

961# class `Memory` 

962############################################################################### 

963class Memory(Logger): 

964 """A context object for caching a function's return value each time it 

965 is called with the same input arguments. 

966 

967 All values are cached on the filesystem, in a deep directory 

968 structure. 

969 

970 Read more in the :ref:`User Guide <memory>`. 

971 

972 Parameters 

973 ---------- 

974 location: str, pathlib.Path or None 

975 The path of the base directory to use as a data store 

976 or None. If None is given, no caching is done and 

977 the Memory object is completely transparent. This option 

978 replaces cachedir since version 0.12. 

979 

980 backend: str, optional, default='local' 

981 Type of store backend for reading/writing cache files. 

982 The 'local' backend is using regular filesystem operations to 

983 manipulate data (open, mv, etc) in the backend. 

984 

985 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional 

986 The memmapping mode used when loading from cache 

987 numpy arrays. See numpy.load for the meaning of the 

988 arguments. 

989 

990 compress: boolean, or integer, optional 

991 Whether to zip the stored data on disk. If an integer is 

992 given, it should be between 1 and 9, and sets the amount 

993 of compression. Note that compressed arrays cannot be 

994 read by memmapping. 

995 

996 verbose: int, optional 

997 Verbosity flag, controls the debug messages that are issued 

998 as functions are evaluated. 

999 

1000 backend_options: dict, optional 

1001 Contains a dictionary of named parameters used to configure 

1002 the store backend. 

1003 """ 

1004 

1005 # ------------------------------------------------------------------------ 

1006 # Public interface 

1007 # ------------------------------------------------------------------------ 

1008 

1009 def __init__( 

1010 self, 

1011 location=None, 

1012 backend="local", 

1013 mmap_mode=None, 

1014 compress=False, 

1015 verbose=1, 

1016 backend_options=None, 

1017 ): 

1018 Logger.__init__(self) 

1019 self._verbose = verbose 

1020 self.mmap_mode = mmap_mode 

1021 self.timestamp = time.time() 

1022 self.backend = backend 

1023 self.compress = compress 

1024 if backend_options is None: 

1025 backend_options = {} 

1026 self.backend_options = backend_options 

1027 

1028 if compress and mmap_mode is not None: 

1029 warnings.warn("Compressed results cannot be memmapped", stacklevel=2) 

1030 

1031 self.location = location 

1032 if isinstance(location, str): 

1033 location = os.path.join(location, "joblib") 

1034 

1035 self.store_backend = _store_backend_factory( 

1036 backend, 

1037 location, 

1038 verbose=self._verbose, 

1039 backend_options=dict( 

1040 compress=compress, mmap_mode=mmap_mode, **backend_options 

1041 ), 

1042 ) 

1043 

1044 def cache( 

1045 self, 

1046 func=None, 

1047 ignore=None, 

1048 verbose=None, 

1049 mmap_mode=False, 

1050 cache_validation_callback=None, 

1051 ): 

1052 """Decorates the given function func to only compute its return 

1053 value for input arguments not cached on disk. 

1054 

1055 Parameters 

1056 ---------- 

1057 func: callable, optional 

1058 The function to be decorated 

1059 ignore: list of strings 

1060 A list of arguments name to ignore in the hashing 

1061 verbose: integer, optional 

1062 The verbosity mode of the function. By default that 

1063 of the memory object is used. 

1064 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional 

1065 The memmapping mode used when loading from cache 

1066 numpy arrays. See numpy.load for the meaning of the 

1067 arguments. By default that of the memory object is used. 

1068 cache_validation_callback: callable, optional 

1069 Callable to validate whether or not the cache is valid. When 

1070 the cached function is called with arguments for which a cache 

1071 exists, this callable is called with the metadata of the cached 

1072 result as its sole argument. If it returns True, then the 

1073 cached result is returned, else the cache for these arguments 

1074 is cleared and recomputed. 

1075 

1076 Returns 

1077 ------- 

1078 decorated_func: MemorizedFunc object 

1079 The returned object is a MemorizedFunc object, that is 

1080 callable (behaves like a function), but offers extra 

1081 methods for cache lookup and management. See the 

1082 documentation for :class:`joblib.memory.MemorizedFunc`. 

1083 """ 

1084 if cache_validation_callback is not None and not callable( 

1085 cache_validation_callback 

1086 ): 

1087 raise ValueError( 

1088 "cache_validation_callback needs to be callable. " 

1089 f"Got {cache_validation_callback}." 

1090 ) 

1091 if func is None: 

1092 # Partial application, to be able to specify extra keyword 

1093 # arguments in decorators 

1094 return functools.partial( 

1095 self.cache, 

1096 ignore=ignore, 

1097 mmap_mode=mmap_mode, 

1098 verbose=verbose, 

1099 cache_validation_callback=cache_validation_callback, 

1100 ) 

1101 if self.store_backend is None: 

1102 cls = ( 

1103 AsyncNotMemorizedFunc 

1104 if asyncio.iscoroutinefunction(func) 

1105 else NotMemorizedFunc 

1106 ) 

1107 return cls(func) 

1108 if verbose is None: 

1109 verbose = self._verbose 

1110 if mmap_mode is False: 

1111 mmap_mode = self.mmap_mode 

1112 if isinstance(func, MemorizedFunc): 

1113 func = func.func 

1114 cls = AsyncMemorizedFunc if asyncio.iscoroutinefunction(func) else MemorizedFunc 

1115 return cls( 

1116 func, 

1117 location=self.store_backend, 

1118 backend=self.backend, 

1119 ignore=ignore, 

1120 mmap_mode=mmap_mode, 

1121 compress=self.compress, 

1122 verbose=verbose, 

1123 timestamp=self.timestamp, 

1124 cache_validation_callback=cache_validation_callback, 

1125 ) 

1126 

1127 def clear(self, warn=True): 

1128 """Erase the complete cache directory.""" 

1129 if warn: 

1130 self.warn("Flushing completely the cache") 

1131 if self.store_backend is not None: 

1132 self.store_backend.clear() 

1133 

1134 # As the cache is completely clear, make sure the _FUNCTION_HASHES 

1135 # cache is also reset. Else, for a function that is present in this 

1136 # table, results cached after this clear will be have cache miss 

1137 # as the function code is not re-written. 

1138 _FUNCTION_HASHES.clear() 

1139 

1140 def reduce_size(self, bytes_limit=None, items_limit=None, age_limit=None): 

1141 """Remove cache elements to make the cache fit its limits. 

1142 

1143 The limitation can impose that the cache size fits in ``bytes_limit``, 

1144 that the number of cache items is no more than ``items_limit``, and 

1145 that all files in cache are not older than ``age_limit``. 

1146 

1147 Parameters 

1148 ---------- 

1149 bytes_limit: int | str, optional 

1150 Limit in bytes of the size of the cache. By default, the size of 

1151 the cache is unlimited. When reducing the size of the cache, 

1152 ``joblib`` keeps the most recently accessed items first. If a 

1153 str is passed, it is converted to a number of bytes using units 

1154 { K | M | G} for kilo, mega, giga. 

1155 

1156 items_limit: int, optional 

1157 Number of items to limit the cache to. By default, the number of 

1158 items in the cache is unlimited. When reducing the size of the 

1159 cache, ``joblib`` keeps the most recently accessed items first. 

1160 

1161 age_limit: datetime.timedelta, optional 

1162 Maximum age of items to limit the cache to. When reducing the size 

1163 of the cache, any items last accessed more than the given length of 

1164 time ago are deleted. Example: to remove files older than 5 days, 

1165 use datetime.timedelta(days=5). Negative timedelta are not 

1166 accepted. 

1167 """ 

1168 if self.store_backend is None: 

1169 # No cached results, this function does nothing. 

1170 return 

1171 

1172 if bytes_limit is None and items_limit is None and age_limit is None: 

1173 # No limitation to impose, returning 

1174 return 

1175 

1176 # Defers the actual limits enforcing to the store backend. 

1177 self.store_backend.enforce_store_limits(bytes_limit, items_limit, age_limit) 

1178 

1179 def eval(self, func, *args, **kwargs): 

1180 """Eval function func with arguments `*args` and `**kwargs`, 

1181 in the context of the memory. 

1182 

1183 This method works similarly to the builtin `apply`, except 

1184 that the function is called only if the cache is not 

1185 up to date. 

1186 

1187 """ 

1188 if self.store_backend is None: 

1189 return func(*args, **kwargs) 

1190 return self.cache(func)(*args, **kwargs) 

1191 

1192 # ------------------------------------------------------------------------ 

1193 # Private `object` interface 

1194 # ------------------------------------------------------------------------ 

1195 

1196 def __repr__(self): 

1197 return "{class_name}(location={location})".format( 

1198 class_name=self.__class__.__name__, 

1199 location=( 

1200 None if self.store_backend is None else self.store_backend.location 

1201 ), 

1202 ) 

1203 

1204 def __getstate__(self): 

1205 """We don't store the timestamp when pickling, to avoid the hash 

1206 depending from it. 

1207 """ 

1208 state = self.__dict__.copy() 

1209 state["timestamp"] = None 

1210 return state 

1211 

1212 

1213############################################################################### 

1214# cache_validation_callback helpers 

1215############################################################################### 

1216 

1217 

1218def expires_after( 

1219 days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0 

1220): 

1221 """Helper cache_validation_callback to force recompute after a duration. 

1222 

1223 Parameters 

1224 ---------- 

1225 days, seconds, microseconds, milliseconds, minutes, hours, weeks: numbers 

1226 argument passed to a timedelta. 

1227 """ 

1228 delta = datetime.timedelta( 

1229 days=days, 

1230 seconds=seconds, 

1231 microseconds=microseconds, 

1232 milliseconds=milliseconds, 

1233 minutes=minutes, 

1234 hours=hours, 

1235 weeks=weeks, 

1236 ) 

1237 

1238 def cache_validation_callback(metadata): 

1239 computation_age = time.time() - metadata["time"] 

1240 return computation_age < delta.total_seconds() 

1241 

1242 return cache_validation_callback