Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parso/cache.py: 28%

141 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1import time 

2import os 

3import sys 

4import hashlib 

5import gc 

6import shutil 

7import platform 

8import logging 

9import warnings 

10import pickle 

11from pathlib import Path 

12from typing import Dict, Any 

13 

14LOG = logging.getLogger(__name__) 

15 

16_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes 

17""" 

18Cached files should survive at least a few minutes. 

19""" 

20 

21_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30 

22""" 

23Maximum time for a cached file to survive if it is not 

24accessed within. 

25""" 

26 

27_CACHED_SIZE_TRIGGER = 600 

28""" 

29This setting limits the amount of cached files. It's basically a way to start 

30garbage collection. 

31 

32The reasoning for this limit being as big as it is, is the following: 

33 

34Numpy, Pandas, Matplotlib and Tensorflow together use about 500 files. This 

35makes Jedi use ~500mb of memory. Since we might want a bit more than those few 

36libraries, we just increase it a bit. 

37""" 

38 

39_PICKLE_VERSION = 33 

40""" 

41Version number (integer) for file system cache. 

42 

43Increment this number when there are any incompatible changes in 

44the parser tree classes. For example, the following changes 

45are regarded as incompatible. 

46 

47- A class name is changed. 

48- A class is moved to another module. 

49- A __slot__ of a class is changed. 

50""" 

51 

52_VERSION_TAG = '%s-%s%s-%s' % ( 

53 platform.python_implementation(), 

54 sys.version_info[0], 

55 sys.version_info[1], 

56 _PICKLE_VERSION 

57) 

58""" 

59Short name for distinguish Python implementations and versions. 

60 

61It's a bit similar to `sys.implementation.cache_tag`. 

62See: http://docs.python.org/3/library/sys.html#sys.implementation 

63""" 

64 

65 

66def _get_default_cache_path(): 

67 if platform.system().lower() == 'windows': 

68 dir_ = Path(os.getenv('LOCALAPPDATA') or '~', 'Parso', 'Parso') 

69 elif platform.system().lower() == 'darwin': 

70 dir_ = Path('~', 'Library', 'Caches', 'Parso') 

71 else: 

72 dir_ = Path(os.getenv('XDG_CACHE_HOME') or '~/.cache', 'parso') 

73 return dir_.expanduser() 

74 

75 

76_default_cache_path = _get_default_cache_path() 

77""" 

78The path where the cache is stored. 

79 

80On Linux, this defaults to ``~/.cache/parso/``, on OS X to 

81``~/Library/Caches/Parso/`` and on Windows to ``%LOCALAPPDATA%\\Parso\\Parso\\``. 

82On Linux, if environment variable ``$XDG_CACHE_HOME`` is set, 

83``$XDG_CACHE_HOME/parso`` is used instead of the default one. 

84""" 

85 

86_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24 

87 

88 

89def _get_cache_clear_lock_path(cache_path=None): 

90 """ 

91 The path where the cache lock is stored. 

92 

93 Cache lock will prevent continous cache clearing and only allow garbage 

94 collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD). 

95 """ 

96 cache_path = cache_path or _default_cache_path 

97 return cache_path.joinpath("PARSO-CACHE-LOCK") 

98 

99 

100parser_cache: Dict[str, Any] = {} 

101 

102 

103class _NodeCacheItem: 

104 def __init__(self, node, lines, change_time=None): 

105 self.node = node 

106 self.lines = lines 

107 if change_time is None: 

108 change_time = time.time() 

109 self.change_time = change_time 

110 self.last_used = change_time 

111 

112 

113def load_module(hashed_grammar, file_io, cache_path=None): 

114 """ 

115 Returns a module or None, if it fails. 

116 """ 

117 p_time = file_io.get_last_modified() 

118 if p_time is None: 

119 return None 

120 

121 try: 

122 module_cache_item = parser_cache[hashed_grammar][file_io.path] 

123 if p_time <= module_cache_item.change_time: 

124 module_cache_item.last_used = time.time() 

125 return module_cache_item.node 

126 except KeyError: 

127 return _load_from_file_system( 

128 hashed_grammar, 

129 file_io.path, 

130 p_time, 

131 cache_path=cache_path 

132 ) 

133 

134 

135def _load_from_file_system(hashed_grammar, path, p_time, cache_path=None): 

136 cache_path = _get_hashed_path(hashed_grammar, path, cache_path=cache_path) 

137 try: 

138 if p_time > os.path.getmtime(cache_path): 

139 # Cache is outdated 

140 return None 

141 

142 with open(cache_path, 'rb') as f: 

143 gc.disable() 

144 try: 

145 module_cache_item = pickle.load(f) 

146 finally: 

147 gc.enable() 

148 except FileNotFoundError: 

149 return None 

150 else: 

151 _set_cache_item(hashed_grammar, path, module_cache_item) 

152 LOG.debug('pickle loaded: %s', path) 

153 return module_cache_item.node 

154 

155 

156def _set_cache_item(hashed_grammar, path, module_cache_item): 

157 if sum(len(v) for v in parser_cache.values()) >= _CACHED_SIZE_TRIGGER: 

158 # Garbage collection of old cache files. 

159 # We are basically throwing everything away that hasn't been accessed 

160 # in 10 minutes. 

161 cutoff_time = time.time() - _CACHED_FILE_MINIMUM_SURVIVAL 

162 for key, path_to_item_map in parser_cache.items(): 

163 parser_cache[key] = { 

164 path: node_item 

165 for path, node_item in path_to_item_map.items() 

166 if node_item.last_used > cutoff_time 

167 } 

168 

169 parser_cache.setdefault(hashed_grammar, {})[path] = module_cache_item 

170 

171 

172def try_to_save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_path=None): 

173 path = file_io.path 

174 try: 

175 p_time = None if path is None else file_io.get_last_modified() 

176 except OSError: 

177 p_time = None 

178 pickling = False 

179 

180 item = _NodeCacheItem(module, lines, p_time) 

181 _set_cache_item(hashed_grammar, path, item) 

182 if pickling and path is not None: 

183 try: 

184 _save_to_file_system(hashed_grammar, path, item, cache_path=cache_path) 

185 except PermissionError: 

186 # It's not really a big issue if the cache cannot be saved to the 

187 # file system. It's still in RAM in that case. However we should 

188 # still warn the user that this is happening. 

189 warnings.warn( 

190 'Tried to save a file to %s, but got permission denied.' % path, 

191 Warning 

192 ) 

193 else: 

194 _remove_cache_and_update_lock(cache_path=cache_path) 

195 

196 

197def _save_to_file_system(hashed_grammar, path, item, cache_path=None): 

198 with open(_get_hashed_path(hashed_grammar, path, cache_path=cache_path), 'wb') as f: 

199 pickle.dump(item, f, pickle.HIGHEST_PROTOCOL) 

200 

201 

202def clear_cache(cache_path=None): 

203 if cache_path is None: 

204 cache_path = _default_cache_path 

205 shutil.rmtree(cache_path) 

206 parser_cache.clear() 

207 

208 

209def clear_inactive_cache( 

210 cache_path=None, 

211 inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL, 

212): 

213 if cache_path is None: 

214 cache_path = _default_cache_path 

215 if not cache_path.exists(): 

216 return False 

217 for dirname in os.listdir(cache_path): 

218 version_path = cache_path.joinpath(dirname) 

219 if not version_path.is_dir(): 

220 continue 

221 for file in os.scandir(version_path): 

222 if file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL <= time.time(): 

223 try: 

224 os.remove(file.path) 

225 except OSError: # silently ignore all failures 

226 continue 

227 else: 

228 return True 

229 

230 

231def _touch(path): 

232 try: 

233 os.utime(path, None) 

234 except FileNotFoundError: 

235 try: 

236 file = open(path, 'a') 

237 file.close() 

238 except (OSError, IOError): # TODO Maybe log this? 

239 return False 

240 return True 

241 

242 

243def _remove_cache_and_update_lock(cache_path=None): 

244 lock_path = _get_cache_clear_lock_path(cache_path=cache_path) 

245 try: 

246 clear_lock_time = os.path.getmtime(lock_path) 

247 except FileNotFoundError: 

248 clear_lock_time = None 

249 if ( 

250 clear_lock_time is None # first time 

251 or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time() 

252 ): 

253 if not _touch(lock_path): 

254 # First make sure that as few as possible other cleanup jobs also 

255 # get started. There is still a race condition but it's probably 

256 # not a big problem. 

257 return False 

258 

259 clear_inactive_cache(cache_path=cache_path) 

260 

261 

262def _get_hashed_path(hashed_grammar, path, cache_path=None): 

263 directory = _get_cache_directory_path(cache_path=cache_path) 

264 

265 file_hash = hashlib.sha256(str(path).encode("utf-8")).hexdigest() 

266 return os.path.join(directory, '%s-%s.pkl' % (hashed_grammar, file_hash)) 

267 

268 

269def _get_cache_directory_path(cache_path=None): 

270 if cache_path is None: 

271 cache_path = _default_cache_path 

272 directory = cache_path.joinpath(_VERSION_TAG) 

273 if not directory.exists(): 

274 os.makedirs(directory) 

275 return directory