Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/joblib/memory.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

402 statements  

1""" 

2A context object for caching a function's return value each time it 

3is called with the same input arguments. 

4 

5""" 

6 

7# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org> 

8# Copyright (c) 2009 Gael Varoquaux 

9# License: BSD Style, 3 clauses. 

10 

11 

12import asyncio 

13import datetime 

14import functools 

15import inspect 

16import logging 

17import os 

18import pathlib 

19import pydoc 

20import re 

21import textwrap 

22import time 

23import tokenize 

24import traceback 

25import warnings 

26import weakref 

27 

28from . import hashing 

29from ._store_backends import CacheWarning # noqa 

30from ._store_backends import FileSystemStoreBackend, StoreBackendBase 

31from .func_inspect import (filter_args, format_call, format_signature, 

32 get_func_code, get_func_name) 

33from .logger import Logger, format_time, pformat 

34 

35FIRST_LINE_TEXT = "# first line:" 

36 

37# TODO: The following object should have a data store object as a sub 

38# object, and the interface to persist and query should be separated in 

39# the data store. 

40# 

41# This would enable creating 'Memory' objects with a different logic for 

42# pickling that would simply span a MemorizedFunc with the same 

43# store (or do we want to copy it to avoid cross-talks?), for instance to 

44# implement HDF5 pickling. 

45 

46# TODO: Same remark for the logger, and probably use the Python logging 

47# mechanism. 

48 

49 

50def extract_first_line(func_code): 

51 """ Extract the first line information from the function code 

52 text if available. 

53 """ 

54 if func_code.startswith(FIRST_LINE_TEXT): 

55 func_code = func_code.split('\n') 

56 first_line = int(func_code[0][len(FIRST_LINE_TEXT):]) 

57 func_code = '\n'.join(func_code[1:]) 

58 else: 

59 first_line = -1 

60 return func_code, first_line 

61 

62 

63class JobLibCollisionWarning(UserWarning): 

64 """ Warn that there might be a collision between names of functions. 

65 """ 

66 

67 

68_STORE_BACKENDS = {'local': FileSystemStoreBackend} 

69 

70 

71def register_store_backend(backend_name, backend): 

72 """Extend available store backends. 

73 

74 The Memory, MemorizeResult and MemorizeFunc objects are designed to be 

75 agnostic to the type of store used behind. By default, the local file 

76 system is used but this function gives the possibility to extend joblib's 

77 memory pattern with other types of storage such as cloud storage (S3, GCS, 

78 OpenStack, HadoopFS, etc) or blob DBs. 

79 

80 Parameters 

81 ---------- 

82 backend_name: str 

83 The name identifying the store backend being registered. For example, 

84 'local' is used with FileSystemStoreBackend. 

85 backend: StoreBackendBase subclass 

86 The name of a class that implements the StoreBackendBase interface. 

87 

88 """ 

89 if not isinstance(backend_name, str): 

90 raise ValueError("Store backend name should be a string, " 

91 "'{0}' given.".format(backend_name)) 

92 if backend is None or not issubclass(backend, StoreBackendBase): 

93 raise ValueError("Store backend should inherit " 

94 "StoreBackendBase, " 

95 "'{0}' given.".format(backend)) 

96 

97 _STORE_BACKENDS[backend_name] = backend 

98 

99 

100def _store_backend_factory(backend, location, verbose=0, backend_options=None): 

101 """Return the correct store object for the given location.""" 

102 if backend_options is None: 

103 backend_options = {} 

104 

105 if isinstance(location, pathlib.Path): 

106 location = str(location) 

107 

108 if isinstance(location, StoreBackendBase): 

109 return location 

110 elif isinstance(location, str): 

111 obj = None 

112 location = os.path.expanduser(location) 

113 # The location is not a local file system, we look in the 

114 # registered backends if there's one matching the given backend 

115 # name. 

116 for backend_key, backend_obj in _STORE_BACKENDS.items(): 

117 if backend == backend_key: 

118 obj = backend_obj() 

119 

120 # By default, we assume the FileSystemStoreBackend can be used if no 

121 # matching backend could be found. 

122 if obj is None: 

123 raise TypeError('Unknown location {0} or backend {1}'.format( 

124 location, backend)) 

125 

126 # The store backend is configured with the extra named parameters, 

127 # some of them are specific to the underlying store backend. 

128 obj.configure(location, verbose=verbose, 

129 backend_options=backend_options) 

130 return obj 

131 elif location is not None: 

132 warnings.warn( 

133 "Instantiating a backend using a {} as a location is not " 

134 "supported by joblib. Returning None instead.".format( 

135 location.__class__.__name__), UserWarning) 

136 

137 return None 

138 

139 

140def _build_func_identifier(func): 

141 """Build a roughly unique identifier for the cached function.""" 

142 modules, funcname = get_func_name(func) 

143 # We reuse historical fs-like way of building a function identifier 

144 return os.path.join(*modules, funcname) 

145 

146 

147# An in-memory store to avoid looking at the disk-based function 

148# source code to check if a function definition has changed 

149_FUNCTION_HASHES = weakref.WeakKeyDictionary() 

150 

151 

152############################################################################### 

153# class `MemorizedResult` 

154############################################################################### 

155class MemorizedResult(Logger): 

156 """Object representing a cached value. 

157 

158 Attributes 

159 ---------- 

160 location: str 

161 The location of joblib cache. Depends on the store backend used. 

162 

163 func: function or str 

164 function whose output is cached. The string case is intended only for 

165 instantiation based on the output of repr() on another instance. 

166 (namely eval(repr(memorized_instance)) works). 

167 

168 argument_hash: str 

169 hash of the function arguments. 

170 

171 backend: str 

172 Type of store backend for reading/writing cache files. 

173 Default is 'local'. 

174 

175 mmap_mode: {None, 'r+', 'r', 'w+', 'c'} 

176 The memmapping mode used when loading from cache numpy arrays. See 

177 numpy.load for the meaning of the different values. 

178 

179 verbose: int 

180 verbosity level (0 means no message). 

181 

182 timestamp, metadata: string 

183 for internal use only. 

184 """ 

185 def __init__(self, location, call_id, backend='local', mmap_mode=None, 

186 verbose=0, timestamp=None, metadata=None): 

187 Logger.__init__(self) 

188 self._call_id = call_id 

189 self.store_backend = _store_backend_factory(backend, location, 

190 verbose=verbose) 

191 self.mmap_mode = mmap_mode 

192 

193 if metadata is not None: 

194 self.metadata = metadata 

195 else: 

196 self.metadata = self.store_backend.get_metadata(self._call_id) 

197 

198 self.duration = self.metadata.get('duration', None) 

199 self.verbose = verbose 

200 self.timestamp = timestamp 

201 

202 @property 

203 def func(self): 

204 return self.func_id 

205 

206 @property 

207 def func_id(self): 

208 return self._call_id[0] 

209 

210 @property 

211 def args_id(self): 

212 return self._call_id[1] 

213 

214 @property 

215 def argument_hash(self): 

216 warnings.warn( 

217 "The 'argument_hash' attribute has been deprecated in version " 

218 "0.12 and will be removed in version 0.14.\n" 

219 "Use `args_id` attribute instead.", 

220 DeprecationWarning, stacklevel=2) 

221 return self.args_id 

222 

223 def get(self): 

224 """Read value from cache and return it.""" 

225 try: 

226 return self.store_backend.load_item( 

227 self._call_id, 

228 timestamp=self.timestamp, 

229 metadata=self.metadata, 

230 verbose=self.verbose 

231 ) 

232 except ValueError as exc: 

233 new_exc = KeyError( 

234 "Error while trying to load a MemorizedResult's value. " 

235 "It seems that this folder is corrupted : {}".format( 

236 os.path.join(self.store_backend.location, *self._call_id))) 

237 raise new_exc from exc 

238 

239 def clear(self): 

240 """Clear value from cache""" 

241 self.store_backend.clear_item(self._call_id) 

242 

243 def __repr__(self): 

244 return '{}(location="{}", func="{}", args_id="{}")'.format( 

245 self.__class__.__name__, self.store_backend.location, 

246 *self._call_id 

247 ) 

248 

249 def __getstate__(self): 

250 state = self.__dict__.copy() 

251 state['timestamp'] = None 

252 return state 

253 

254 

255class NotMemorizedResult(object): 

256 """Class representing an arbitrary value. 

257 

258 This class is a replacement for MemorizedResult when there is no cache. 

259 """ 

260 __slots__ = ('value', 'valid') 

261 

262 def __init__(self, value): 

263 self.value = value 

264 self.valid = True 

265 

266 def get(self): 

267 if self.valid: 

268 return self.value 

269 else: 

270 raise KeyError("No value stored.") 

271 

272 def clear(self): 

273 self.valid = False 

274 self.value = None 

275 

276 def __repr__(self): 

277 if self.valid: 

278 return ('{class_name}({value})' 

279 .format(class_name=self.__class__.__name__, 

280 value=pformat(self.value))) 

281 else: 

282 return self.__class__.__name__ + ' with no value' 

283 

284 # __getstate__ and __setstate__ are required because of __slots__ 

285 def __getstate__(self): 

286 return {"valid": self.valid, "value": self.value} 

287 

288 def __setstate__(self, state): 

289 self.valid = state["valid"] 

290 self.value = state["value"] 

291 

292 

293############################################################################### 

294# class `NotMemorizedFunc` 

295############################################################################### 

296class NotMemorizedFunc(object): 

297 """No-op object decorating a function. 

298 

299 This class replaces MemorizedFunc when there is no cache. It provides an 

300 identical API but does not write anything on disk. 

301 

302 Attributes 

303 ---------- 

304 func: callable 

305 Original undecorated function. 

306 """ 

307 # Should be a light as possible (for speed) 

308 def __init__(self, func): 

309 self.func = func 

310 

311 def __call__(self, *args, **kwargs): 

312 return self.func(*args, **kwargs) 

313 

314 def call_and_shelve(self, *args, **kwargs): 

315 return NotMemorizedResult(self.func(*args, **kwargs)) 

316 

317 def __repr__(self): 

318 return '{0}(func={1})'.format(self.__class__.__name__, self.func) 

319 

320 def clear(self, warn=True): 

321 # Argument "warn" is for compatibility with MemorizedFunc.clear 

322 pass 

323 

324 def call(self, *args, **kwargs): 

325 return self.func(*args, **kwargs), {} 

326 

327 def check_call_in_cache(self, *args, **kwargs): 

328 return False 

329 

330 

331############################################################################### 

332# class `AsyncNotMemorizedFunc` 

333############################################################################### 

334class AsyncNotMemorizedFunc(NotMemorizedFunc): 

335 async def call_and_shelve(self, *args, **kwargs): 

336 return NotMemorizedResult(await self.func(*args, **kwargs)) 

337 

338 

339############################################################################### 

340# class `MemorizedFunc` 

341############################################################################### 

342class MemorizedFunc(Logger): 

343 """Callable object decorating a function for caching its return value 

344 each time it is called. 

345 

346 Methods are provided to inspect the cache or clean it. 

347 

348 Attributes 

349 ---------- 

350 func: callable 

351 The original, undecorated, function. 

352 

353 location: string 

354 The location of joblib cache. Depends on the store backend used. 

355 

356 backend: str 

357 Type of store backend for reading/writing cache files. 

358 Default is 'local', in which case the location is the path to a 

359 disk storage. 

360 

361 ignore: list or None 

362 List of variable names to ignore when choosing whether to 

363 recompute. 

364 

365 mmap_mode: {None, 'r+', 'r', 'w+', 'c'} 

366 The memmapping mode used when loading from cache 

367 numpy arrays. See numpy.load for the meaning of the different 

368 values. 

369 

370 compress: boolean, or integer 

371 Whether to zip the stored data on disk. If an integer is 

372 given, it should be between 1 and 9, and sets the amount 

373 of compression. Note that compressed arrays cannot be 

374 read by memmapping. 

375 

376 verbose: int, optional 

377 The verbosity flag, controls messages that are issued as 

378 the function is evaluated. 

379 

380 cache_validation_callback: callable, optional 

381 Callable to check if a result in cache is valid or is to be recomputed. 

382 When the function is called with arguments for which a cache exists, 

383 the callback is called with the cache entry's metadata as its sole 

384 argument. If it returns True, the cached result is returned, else the 

385 cache for these arguments is cleared and the result is recomputed. 

386 """ 

387 # ------------------------------------------------------------------------ 

388 # Public interface 

389 # ------------------------------------------------------------------------ 

390 

391 def __init__(self, func, location, backend='local', ignore=None, 

392 mmap_mode=None, compress=False, verbose=1, timestamp=None, 

393 cache_validation_callback=None): 

394 Logger.__init__(self) 

395 self.mmap_mode = mmap_mode 

396 self.compress = compress 

397 self.func = func 

398 self.cache_validation_callback = cache_validation_callback 

399 self.func_id = _build_func_identifier(func) 

400 self.ignore = ignore if ignore is not None else [] 

401 self._verbose = verbose 

402 

403 # retrieve store object from backend type and location. 

404 self.store_backend = _store_backend_factory(backend, location, 

405 verbose=verbose, 

406 backend_options=dict( 

407 compress=compress, 

408 mmap_mode=mmap_mode), 

409 ) 

410 if self.store_backend is not None: 

411 # Create func directory on demand. 

412 self.store_backend.store_cached_func_code([self.func_id]) 

413 

414 self.timestamp = timestamp if timestamp is not None else time.time() 

415 try: 

416 functools.update_wrapper(self, func) 

417 except Exception: 

418 pass # Objects like ufunc don't like that 

419 if inspect.isfunction(func): 

420 doc = pydoc.TextDoc().document(func) 

421 # Remove blank line 

422 doc = doc.replace('\n', '\n\n', 1) 

423 # Strip backspace-overprints for compatibility with autodoc 

424 doc = re.sub('\x08.', '', doc) 

425 else: 

426 # Pydoc does a poor job on other objects 

427 doc = func.__doc__ 

428 self.__doc__ = 'Memoized version of %s' % doc 

429 

430 self._func_code_info = None 

431 self._func_code_id = None 

432 

433 def _is_in_cache_and_valid(self, call_id): 

434 """Check if the function call is cached and valid for given arguments. 

435 

436 - Compare the function code with the one from the cached function, 

437 asserting if it has changed. 

438 - Check if the function call is present in the cache. 

439 - Call `cache_validation_callback` for user define cache validation. 

440 

441 Returns True if the function call is in cache and can be used, and 

442 returns False otherwise. 

443 """ 

444 # Check if the code of the function has changed 

445 if not self._check_previous_func_code(stacklevel=4): 

446 return False 

447 

448 # Check if this specific call is in the cache 

449 if not self.store_backend.contains_item(call_id): 

450 return False 

451 

452 # Call the user defined cache validation callback 

453 metadata = self.store_backend.get_metadata(call_id) 

454 if (self.cache_validation_callback is not None and 

455 not self.cache_validation_callback(metadata)): 

456 self.store_backend.clear_item(call_id) 

457 return False 

458 

459 return True 

460 

461 def _cached_call(self, args, kwargs, shelving): 

462 """Call wrapped function and cache result, or read cache if available. 

463 

464 This function returns the wrapped function output or a reference to 

465 the cached result. 

466 

467 Arguments: 

468 ---------- 

469 

470 args, kwargs: list and dict 

471 input arguments for wrapped function 

472 

473 shelving: bool 

474 True when called via the call_and_shelve function. 

475 

476 

477 Returns 

478 ------- 

479 output: Output of the wrapped function if shelving is false, or a 

480 MemorizedResult reference to the value if shelving is true. 

481 metadata: dict containing the metadata associated with the call. 

482 """ 

483 args_id = self._get_args_id(*args, **kwargs) 

484 call_id = (self.func_id, args_id) 

485 _, func_name = get_func_name(self.func) 

486 func_info = self.store_backend.get_cached_func_info([self.func_id]) 

487 location = func_info['location'] 

488 

489 if self._verbose >= 20: 

490 logging.basicConfig(level=logging.INFO) 

491 _, signature = format_signature(self.func, *args, **kwargs) 

492 self.info( 

493 textwrap.dedent( 

494 f""" 

495 Querying {func_name} with signature 

496 {signature}. 

497 

498 (argument hash {args_id}) 

499 

500 The store location is {location}. 

501 """ 

502 ) 

503 ) 

504 

505 # Compare the function code with the previous to see if the 

506 # function code has changed and check if the results are present in 

507 # the cache. 

508 if self._is_in_cache_and_valid(call_id): 

509 if shelving: 

510 return self._get_memorized_result(call_id), {} 

511 

512 try: 

513 start_time = time.time() 

514 output = self._load_item(call_id) 

515 if self._verbose > 4: 

516 self._print_duration(time.time() - start_time, 

517 context='cache loaded ') 

518 return output, {} 

519 except Exception: 

520 # XXX: Should use an exception logger 

521 _, signature = format_signature(self.func, *args, **kwargs) 

522 self.warn('Exception while loading results for ' 

523 '{}\n {}'.format(signature, traceback.format_exc())) 

524 

525 if self._verbose > 10: 

526 self.warn( 

527 f"Computing func {func_name}, argument hash {args_id} " 

528 f"in location {location}" 

529 ) 

530 

531 # Returns the output but not the metadata 

532 return self._call(call_id, args, kwargs, shelving) 

533 

534 @property 

535 def func_code_info(self): 

536 # 3-tuple property containing: the function source code, source file, 

537 # and first line of the code inside the source file 

538 if hasattr(self.func, '__code__'): 

539 if self._func_code_id is None: 

540 self._func_code_id = id(self.func.__code__) 

541 elif id(self.func.__code__) != self._func_code_id: 

542 # Be robust to dynamic reassignments of self.func.__code__ 

543 self._func_code_info = None 

544 

545 if self._func_code_info is None: 

546 # Cache the source code of self.func . Provided that get_func_code 

547 # (which should be called once on self) gets called in the process 

548 # in which self.func was defined, this caching mechanism prevents 

549 # undesired cache clearing when the cached function is called in 

550 # an environment where the introspection utilities get_func_code 

551 # relies on do not work (typically, in joblib child processes). 

552 # See #1035 for more info 

553 # TODO (pierreglaser): do the same with get_func_name? 

554 self._func_code_info = get_func_code(self.func) 

555 return self._func_code_info 

556 

557 def call_and_shelve(self, *args, **kwargs): 

558 """Call wrapped function, cache result and return a reference. 

559 

560 This method returns a reference to the cached result instead of the 

561 result itself. The reference object is small and picklable, allowing 

562 to send or store it easily. Call .get() on reference object to get 

563 result. 

564 

565 Returns 

566 ------- 

567 cached_result: MemorizedResult or NotMemorizedResult 

568 reference to the value returned by the wrapped function. The 

569 class "NotMemorizedResult" is used when there is no cache 

570 activated (e.g. location=None in Memory). 

571 """ 

572 # Return the wrapped output, without the metadata 

573 return self._cached_call(args, kwargs, shelving=True)[0] 

574 

575 def __call__(self, *args, **kwargs): 

576 # Return the output, without the metadata 

577 return self._cached_call(args, kwargs, shelving=False)[0] 

578 

579 def __getstate__(self): 

580 # Make sure self.func's source is introspected prior to being pickled - 

581 # code introspection utilities typically do not work inside child 

582 # processes 

583 _ = self.func_code_info 

584 

585 # We don't store the timestamp when pickling, to avoid the hash 

586 # depending from it. 

587 state = self.__dict__.copy() 

588 state['timestamp'] = None 

589 

590 # Invalidate the code id as id(obj) will be different in the child 

591 state['_func_code_id'] = None 

592 

593 return state 

594 

595 def check_call_in_cache(self, *args, **kwargs): 

596 """Check if the function call is cached and valid for given arguments. 

597 

598 Does not call the function or do any work besides function inspection 

599 and argument hashing. 

600 

601 - Compare the function code with the one from the cached function, 

602 asserting if it has changed. 

603 - Check if the function call is present in the cache. 

604 - Call `cache_validation_callback` for user define cache validation. 

605 

606 Returns 

607 ------- 

608 is_call_in_cache: bool 

609 Whether or not the function call is in cache and can be used. 

610 """ 

611 call_id = (self.func_id, self._get_args_id(*args, **kwargs)) 

612 return self._is_in_cache_and_valid(call_id) 

613 

614 # ------------------------------------------------------------------------ 

615 # Private interface 

616 # ------------------------------------------------------------------------ 

617 

618 def _get_args_id(self, *args, **kwargs): 

619 """Return the input parameter hash of a result.""" 

620 return hashing.hash(filter_args(self.func, self.ignore, args, kwargs), 

621 coerce_mmap=self.mmap_mode is not None) 

622 

623 def _hash_func(self): 

624 """Hash a function to key the online cache""" 

625 func_code_h = hash(getattr(self.func, '__code__', None)) 

626 return id(self.func), hash(self.func), func_code_h 

627 

628 def _write_func_code(self, func_code, first_line): 

629 """ Write the function code and the filename to a file. 

630 """ 

631 # We store the first line because the filename and the function 

632 # name is not always enough to identify a function: people 

633 # sometimes have several functions named the same way in a 

634 # file. This is bad practice, but joblib should be robust to bad 

635 # practice. 

636 func_code = u'%s %i\n%s' % (FIRST_LINE_TEXT, first_line, func_code) 

637 self.store_backend.store_cached_func_code([self.func_id], func_code) 

638 

639 # Also store in the in-memory store of function hashes 

640 is_named_callable = (hasattr(self.func, '__name__') and 

641 self.func.__name__ != '<lambda>') 

642 if is_named_callable: 

643 # Don't do this for lambda functions or strange callable 

644 # objects, as it ends up being too fragile 

645 func_hash = self._hash_func() 

646 try: 

647 _FUNCTION_HASHES[self.func] = func_hash 

648 except TypeError: 

649 # Some callable are not hashable 

650 pass 

651 

652 def _check_previous_func_code(self, stacklevel=2): 

653 """ 

654 stacklevel is the depth a which this function is called, to 

655 issue useful warnings to the user. 

656 """ 

657 # First check if our function is in the in-memory store. 

658 # Using the in-memory store not only makes things faster, but it 

659 # also renders us robust to variations of the files when the 

660 # in-memory version of the code does not vary 

661 try: 

662 if self.func in _FUNCTION_HASHES: 

663 # We use as an identifier the id of the function and its 

664 # hash. This is more likely to falsely change than have hash 

665 # collisions, thus we are on the safe side. 

666 func_hash = self._hash_func() 

667 if func_hash == _FUNCTION_HASHES[self.func]: 

668 return True 

669 except TypeError: 

670 # Some callables are not hashable 

671 pass 

672 

673 # Here, we go through some effort to be robust to dynamically 

674 # changing code and collision. We cannot inspect.getsource 

675 # because it is not reliable when using IPython's magic "%run". 

676 func_code, source_file, first_line = self.func_code_info 

677 try: 

678 old_func_code, old_first_line = extract_first_line( 

679 self.store_backend.get_cached_func_code([self.func_id])) 

680 except (IOError, OSError): # some backend can also raise OSError 

681 self._write_func_code(func_code, first_line) 

682 return False 

683 if old_func_code == func_code: 

684 return True 

685 

686 # We have differing code, is this because we are referring to 

687 # different functions, or because the function we are referring to has 

688 # changed? 

689 

690 _, func_name = get_func_name(self.func, resolv_alias=False, 

691 win_characters=False) 

692 if old_first_line == first_line == -1 or func_name == '<lambda>': 

693 if not first_line == -1: 

694 func_description = ("{0} ({1}:{2})" 

695 .format(func_name, source_file, 

696 first_line)) 

697 else: 

698 func_description = func_name 

699 warnings.warn(JobLibCollisionWarning( 

700 "Cannot detect name collisions for function '{0}'" 

701 .format(func_description)), stacklevel=stacklevel) 

702 

703 # Fetch the code at the old location and compare it. If it is the 

704 # same than the code store, we have a collision: the code in the 

705 # file has not changed, but the name we have is pointing to a new 

706 # code block. 

707 if not old_first_line == first_line and source_file is not None: 

708 if os.path.exists(source_file): 

709 _, func_name = get_func_name(self.func, resolv_alias=False) 

710 num_lines = len(func_code.split('\n')) 

711 with tokenize.open(source_file) as f: 

712 on_disk_func_code = f.readlines()[ 

713 old_first_line - 1:old_first_line - 1 + num_lines - 1] 

714 on_disk_func_code = ''.join(on_disk_func_code) 

715 possible_collision = (on_disk_func_code.rstrip() == 

716 old_func_code.rstrip()) 

717 else: 

718 possible_collision = source_file.startswith('<doctest ') 

719 if possible_collision: 

720 warnings.warn(JobLibCollisionWarning( 

721 'Possible name collisions between functions ' 

722 "'%s' (%s:%i) and '%s' (%s:%i)" % 

723 (func_name, source_file, old_first_line, 

724 func_name, source_file, first_line)), 

725 stacklevel=stacklevel) 

726 

727 # The function has changed, wipe the cache directory. 

728 # XXX: Should be using warnings, and giving stacklevel 

729 if self._verbose > 10: 

730 _, func_name = get_func_name(self.func, resolv_alias=False) 

731 self.warn("Function {0} (identified by {1}) has changed" 

732 ".".format(func_name, self.func_id)) 

733 self.clear(warn=True) 

734 return False 

735 

736 def clear(self, warn=True): 

737 """Empty the function's cache.""" 

738 func_id = self.func_id 

739 if self._verbose > 0 and warn: 

740 self.warn("Clearing function cache identified by %s" % func_id) 

741 self.store_backend.clear_path([func_id, ]) 

742 

743 func_code, _, first_line = self.func_code_info 

744 self._write_func_code(func_code, first_line) 

745 

746 def call(self, *args, **kwargs): 

747 """Force the execution of the function with the given arguments. 

748 

749 The output values will be persisted, i.e., the cache will be updated 

750 with any new values. 

751 

752 Parameters 

753 ---------- 

754 *args: arguments 

755 The arguments. 

756 **kwargs: keyword arguments 

757 Keyword arguments. 

758 

759 Returns 

760 ------- 

761 output : object 

762 The output of the function call. 

763 metadata : dict 

764 The metadata associated with the call. 

765 """ 

766 call_id = (self.func_id, self._get_args_id(*args, **kwargs)) 

767 

768 # Return the output and the metadata 

769 return self._call(call_id, args, kwargs) 

770 

771 def _call(self, call_id, args, kwargs, shelving=False): 

772 # Return the output and the metadata 

773 self._before_call(args, kwargs) 

774 start_time = time.time() 

775 output = self.func(*args, **kwargs) 

776 return self._after_call(call_id, args, kwargs, shelving, 

777 output, start_time) 

778 

779 def _before_call(self, args, kwargs): 

780 if self._verbose > 0: 

781 print(format_call(self.func, args, kwargs)) 

782 

783 def _after_call(self, call_id, args, kwargs, shelving, output, start_time): 

784 self.store_backend.dump_item(call_id, output, verbose=self._verbose) 

785 duration = time.time() - start_time 

786 if self._verbose > 0: 

787 self._print_duration(duration) 

788 metadata = self._persist_input(duration, call_id, args, kwargs) 

789 if shelving: 

790 return self._get_memorized_result(call_id, metadata), metadata 

791 

792 if self.mmap_mode is not None: 

793 # Memmap the output at the first call to be consistent with 

794 # later calls 

795 output = self._load_item(call_id, metadata) 

796 return output, metadata 

797 

798 def _persist_input(self, duration, call_id, args, kwargs, 

799 this_duration_limit=0.5): 

800 """ Save a small summary of the call using json format in the 

801 output directory. 

802 

803 output_dir: string 

804 directory where to write metadata. 

805 

806 duration: float 

807 time taken by hashing input arguments, calling the wrapped 

808 function and persisting its output. 

809 

810 args, kwargs: list and dict 

811 input arguments for wrapped function 

812 

813 this_duration_limit: float 

814 Max execution time for this function before issuing a warning. 

815 """ 

816 start_time = time.time() 

817 argument_dict = filter_args(self.func, self.ignore, 

818 args, kwargs) 

819 

820 input_repr = dict((k, repr(v)) for k, v in argument_dict.items()) 

821 # This can fail due to race-conditions with multiple 

822 # concurrent joblibs removing the file or the directory 

823 metadata = { 

824 "duration": duration, "input_args": input_repr, "time": start_time, 

825 } 

826 

827 self.store_backend.store_metadata(call_id, metadata) 

828 

829 this_duration = time.time() - start_time 

830 if this_duration > this_duration_limit: 

831 # This persistence should be fast. It will not be if repr() takes 

832 # time and its output is large, because json.dump will have to 

833 # write a large file. This should not be an issue with numpy arrays 

834 # for which repr() always output a short representation, but can 

835 # be with complex dictionaries. Fixing the problem should be a 

836 # matter of replacing repr() above by something smarter. 

837 warnings.warn("Persisting input arguments took %.2fs to run." 

838 "If this happens often in your code, it can cause " 

839 "performance problems " 

840 "(results will be correct in all cases). " 

841 "The reason for this is probably some large input " 

842 "arguments for a wrapped function." 

843 % this_duration, stacklevel=5) 

844 return metadata 

845 

846 def _get_memorized_result(self, call_id, metadata=None): 

847 return MemorizedResult(self.store_backend, call_id, 

848 metadata=metadata, timestamp=self.timestamp, 

849 verbose=self._verbose - 1) 

850 

851 def _load_item(self, call_id, metadata=None): 

852 return self.store_backend.load_item(call_id, metadata=metadata, 

853 timestamp=self.timestamp, 

854 verbose=self._verbose) 

855 

856 def _print_duration(self, duration, context=''): 

857 _, name = get_func_name(self.func) 

858 msg = f"{name} {context}- {format_time(duration)}" 

859 print(max(0, (80 - len(msg))) * '_' + msg) 

860 

861 # ------------------------------------------------------------------------ 

862 # Private `object` interface 

863 # ------------------------------------------------------------------------ 

864 

865 def __repr__(self): 

866 return '{class_name}(func={func}, location={location})'.format( 

867 class_name=self.__class__.__name__, 

868 func=self.func, 

869 location=self.store_backend.location,) 

870 

871 

872############################################################################### 

873# class `AsyncMemorizedFunc` 

874############################################################################### 

875class AsyncMemorizedFunc(MemorizedFunc): 

876 async def __call__(self, *args, **kwargs): 

877 out = self._cached_call(args, kwargs, shelving=False) 

878 out = await out if asyncio.iscoroutine(out) else out 

879 return out[0] # Don't return metadata 

880 

881 async def call_and_shelve(self, *args, **kwargs): 

882 out = self._cached_call(args, kwargs, shelving=True) 

883 out = await out if asyncio.iscoroutine(out) else out 

884 return out[0] # Don't return metadata 

885 

886 async def call(self, *args, **kwargs): 

887 out = super().call(*args, **kwargs) 

888 return await out if asyncio.iscoroutine(out) else out 

889 

890 async def _call(self, call_id, args, kwargs, shelving=False): 

891 self._before_call(args, kwargs) 

892 start_time = time.time() 

893 output = await self.func(*args, **kwargs) 

894 return self._after_call( 

895 call_id, args, kwargs, shelving, output, start_time 

896 ) 

897 

898 

899############################################################################### 

900# class `Memory` 

901############################################################################### 

902class Memory(Logger): 

903 """ A context object for caching a function's return value each time it 

904 is called with the same input arguments. 

905 

906 All values are cached on the filesystem, in a deep directory 

907 structure. 

908 

909 Read more in the :ref:`User Guide <memory>`. 

910 

911 Parameters 

912 ---------- 

913 location: str, pathlib.Path or None 

914 The path of the base directory to use as a data store 

915 or None. If None is given, no caching is done and 

916 the Memory object is completely transparent. This option 

917 replaces cachedir since version 0.12. 

918 

919 backend: str, optional 

920 Type of store backend for reading/writing cache files. 

921 Default: 'local'. 

922 The 'local' backend is using regular filesystem operations to 

923 manipulate data (open, mv, etc) in the backend. 

924 

925 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional 

926 The memmapping mode used when loading from cache 

927 numpy arrays. See numpy.load for the meaning of the 

928 arguments. 

929 

930 compress: boolean, or integer, optional 

931 Whether to zip the stored data on disk. If an integer is 

932 given, it should be between 1 and 9, and sets the amount 

933 of compression. Note that compressed arrays cannot be 

934 read by memmapping. 

935 

936 verbose: int, optional 

937 Verbosity flag, controls the debug messages that are issued 

938 as functions are evaluated. 

939 

940 backend_options: dict, optional 

941 Contains a dictionary of named parameters used to configure 

942 the store backend. 

943 """ 

944 # ------------------------------------------------------------------------ 

945 # Public interface 

946 # ------------------------------------------------------------------------ 

947 

948 def __init__(self, location=None, backend='local', mmap_mode=None, 

949 compress=False, verbose=1, backend_options=None): 

950 Logger.__init__(self) 

951 self._verbose = verbose 

952 self.mmap_mode = mmap_mode 

953 self.timestamp = time.time() 

954 self.backend = backend 

955 self.compress = compress 

956 if backend_options is None: 

957 backend_options = {} 

958 self.backend_options = backend_options 

959 

960 if compress and mmap_mode is not None: 

961 warnings.warn('Compressed results cannot be memmapped', 

962 stacklevel=2) 

963 

964 self.location = location 

965 if isinstance(location, str): 

966 location = os.path.join(location, 'joblib') 

967 

968 self.store_backend = _store_backend_factory( 

969 backend, location, verbose=self._verbose, 

970 backend_options=dict(compress=compress, mmap_mode=mmap_mode, 

971 **backend_options)) 

972 

973 def cache(self, func=None, ignore=None, verbose=None, mmap_mode=False, 

974 cache_validation_callback=None): 

975 """ Decorates the given function func to only compute its return 

976 value for input arguments not cached on disk. 

977 

978 Parameters 

979 ---------- 

980 func: callable, optional 

981 The function to be decorated 

982 ignore: list of strings 

983 A list of arguments name to ignore in the hashing 

984 verbose: integer, optional 

985 The verbosity mode of the function. By default that 

986 of the memory object is used. 

987 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional 

988 The memmapping mode used when loading from cache 

989 numpy arrays. See numpy.load for the meaning of the 

990 arguments. By default that of the memory object is used. 

991 cache_validation_callback: callable, optional 

992 Callable to validate whether or not the cache is valid. When 

993 the cached function is called with arguments for which a cache 

994 exists, this callable is called with the metadata of the cached 

995 result as its sole argument. If it returns True, then the 

996 cached result is returned, else the cache for these arguments 

997 is cleared and recomputed. 

998 

999 Returns 

1000 ------- 

1001 decorated_func: MemorizedFunc object 

1002 The returned object is a MemorizedFunc object, that is 

1003 callable (behaves like a function), but offers extra 

1004 methods for cache lookup and management. See the 

1005 documentation for :class:`joblib.memory.MemorizedFunc`. 

1006 """ 

1007 if (cache_validation_callback is not None and 

1008 not callable(cache_validation_callback)): 

1009 raise ValueError( 

1010 "cache_validation_callback needs to be callable. " 

1011 f"Got {cache_validation_callback}." 

1012 ) 

1013 if func is None: 

1014 # Partial application, to be able to specify extra keyword 

1015 # arguments in decorators 

1016 return functools.partial( 

1017 self.cache, ignore=ignore, 

1018 mmap_mode=mmap_mode, 

1019 verbose=verbose, 

1020 cache_validation_callback=cache_validation_callback 

1021 ) 

1022 if self.store_backend is None: 

1023 cls = (AsyncNotMemorizedFunc 

1024 if asyncio.iscoroutinefunction(func) 

1025 else NotMemorizedFunc) 

1026 return cls(func) 

1027 if verbose is None: 

1028 verbose = self._verbose 

1029 if mmap_mode is False: 

1030 mmap_mode = self.mmap_mode 

1031 if isinstance(func, MemorizedFunc): 

1032 func = func.func 

1033 cls = (AsyncMemorizedFunc 

1034 if asyncio.iscoroutinefunction(func) 

1035 else MemorizedFunc) 

1036 return cls( 

1037 func, location=self.store_backend, backend=self.backend, 

1038 ignore=ignore, mmap_mode=mmap_mode, compress=self.compress, 

1039 verbose=verbose, timestamp=self.timestamp, 

1040 cache_validation_callback=cache_validation_callback 

1041 ) 

1042 

1043 def clear(self, warn=True): 

1044 """ Erase the complete cache directory. 

1045 """ 

1046 if warn: 

1047 self.warn('Flushing completely the cache') 

1048 if self.store_backend is not None: 

1049 self.store_backend.clear() 

1050 

1051 # As the cache is completely clear, make sure the _FUNCTION_HASHES 

1052 # cache is also reset. Else, for a function that is present in this 

1053 # table, results cached after this clear will be have cache miss 

1054 # as the function code is not re-written. 

1055 _FUNCTION_HASHES.clear() 

1056 

1057 def reduce_size(self, bytes_limit=None, items_limit=None, age_limit=None): 

1058 """Remove cache elements to make the cache fit its limits. 

1059 

1060 The limitation can impose that the cache size fits in ``bytes_limit``, 

1061 that the number of cache items is no more than ``items_limit``, and 

1062 that all files in cache are not older than ``age_limit``. 

1063 

1064 Parameters 

1065 ---------- 

1066 bytes_limit: int | str, optional 

1067 Limit in bytes of the size of the cache. By default, the size of 

1068 the cache is unlimited. When reducing the size of the cache, 

1069 ``joblib`` keeps the most recently accessed items first. If a 

1070 str is passed, it is converted to a number of bytes using units 

1071 { K | M | G} for kilo, mega, giga. 

1072 

1073 items_limit: int, optional 

1074 Number of items to limit the cache to. By default, the number of 

1075 items in the cache is unlimited. When reducing the size of the 

1076 cache, ``joblib`` keeps the most recently accessed items first. 

1077 

1078 age_limit: datetime.timedelta, optional 

1079 Maximum age of items to limit the cache to. When reducing the size 

1080 of the cache, any items last accessed more than the given length of 

1081 time ago are deleted. 

1082 """ 

1083 if self.store_backend is None: 

1084 # No cached results, this function does nothing. 

1085 return 

1086 

1087 if bytes_limit is None and items_limit is None and age_limit is None: 

1088 # No limitation to impose, returning 

1089 return 

1090 

1091 # Defers the actual limits enforcing to the store backend. 

1092 self.store_backend.enforce_store_limits( 

1093 bytes_limit, items_limit, age_limit 

1094 ) 

1095 

1096 def eval(self, func, *args, **kwargs): 

1097 """ Eval function func with arguments `*args` and `**kwargs`, 

1098 in the context of the memory. 

1099 

1100 This method works similarly to the builtin `apply`, except 

1101 that the function is called only if the cache is not 

1102 up to date. 

1103 

1104 """ 

1105 if self.store_backend is None: 

1106 return func(*args, **kwargs) 

1107 return self.cache(func)(*args, **kwargs) 

1108 

1109 # ------------------------------------------------------------------------ 

1110 # Private `object` interface 

1111 # ------------------------------------------------------------------------ 

1112 

1113 def __repr__(self): 

1114 return '{class_name}(location={location})'.format( 

1115 class_name=self.__class__.__name__, 

1116 location=(None if self.store_backend is None 

1117 else self.store_backend.location)) 

1118 

1119 def __getstate__(self): 

1120 """ We don't store the timestamp when pickling, to avoid the hash 

1121 depending from it. 

1122 """ 

1123 state = self.__dict__.copy() 

1124 state['timestamp'] = None 

1125 return state 

1126 

1127 

1128############################################################################### 

1129# cache_validation_callback helpers 

1130############################################################################### 

1131 

1132def expires_after(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, 

1133 hours=0, weeks=0): 

1134 """Helper cache_validation_callback to force recompute after a duration. 

1135 

1136 Parameters 

1137 ---------- 

1138 days, seconds, microseconds, milliseconds, minutes, hours, weeks: numbers 

1139 argument passed to a timedelta. 

1140 """ 

1141 delta = datetime.timedelta( 

1142 days=days, seconds=seconds, microseconds=microseconds, 

1143 milliseconds=milliseconds, minutes=minutes, hours=hours, weeks=weeks 

1144 ) 

1145 

1146 def cache_validation_callback(metadata): 

1147 computation_age = time.time() - metadata['time'] 

1148 return computation_age < delta.total_seconds() 

1149 

1150 return cache_validation_callback