Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/utils.py: 24%

211 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-10 06:15 +0000

1"""Utility functions.""" 

2 

3import math 

4import os 

5import sys 

6import warnings 

7import weakref 

8from pathlib import Path 

9from time import perf_counter as clock 

10 

11import numpy as np 

12 

13from .flavor import array_of_flavor 

14 

15# The map between byteorders in NumPy and PyTables 

16byteorders = { 

17 '>': 'big', 

18 '<': 'little', 

19 '=': sys.byteorder, 

20 '|': 'irrelevant', 

21} 

22 

23# The type used for size values: indexes, coordinates, dimension 

24# lengths, row numbers, shapes, chunk shapes, byte counts... 

25SizeType = np.int64 

26 

27 

28def correct_byteorder(ptype, byteorder): 

29 """Fix the byteorder depending on the PyTables types.""" 

30 

31 if ptype in ['string', 'bool', 'int8', 'uint8', 'object']: 

32 return "irrelevant" 

33 else: 

34 return byteorder 

35 

36 

37def is_idx(index): 

38 """Checks if an object can work as an index or not.""" 

39 

40 if type(index) is int: 

41 return True 

42 elif hasattr(index, "__index__"): 

43 # Exclude the array([idx]) as working as an index. Fixes #303. 

44 if (hasattr(index, "shape") and index.shape != ()): 

45 return False 

46 try: 

47 index.__index__() 

48 if isinstance(index, bool): 

49 warnings.warn( 

50 'using a boolean instead of an integer will result in an ' 

51 'error in the future', DeprecationWarning, stacklevel=2) 

52 return True 

53 except TypeError: 

54 return False 

55 elif isinstance(index, np.integer): 

56 return True 

57 # For Python 2.4 one should test 0-dim and 1-dim, 1-elem arrays as well 

58 elif (isinstance(index, np.ndarray) and (index.shape == ()) and 

59 index.dtype.str[1] == 'i'): 

60 return True 

61 

62 return False 

63 

64 

65def idx2long(index): 

66 """Convert a possible index into a long int.""" 

67 

68 try: 

69 return int(index) 

70 except Exception: 

71 raise TypeError("not an integer type.") 

72 

73 

74# This is used in VLArray and EArray to produce NumPy object compliant 

75# with atom from a generic python type. If copy is stated as True, it 

76# is assured that it will return a copy of the object and never the same 

77# object or a new one sharing the same memory. 

78def convert_to_np_atom(arr, atom, copy=False): 

79 """Convert a generic object into a NumPy object compliant with atom.""" 

80 

81 # First, convert the object into a NumPy array 

82 nparr = array_of_flavor(arr, 'numpy') 

83 # Copy of data if necessary for getting a contiguous buffer, or if 

84 # dtype is not the correct one. 

85 if atom.shape == (): 

86 # Scalar atom case 

87 nparr = np.array(nparr, dtype=atom.dtype, copy=copy) 

88 else: 

89 # Multidimensional atom case. Addresses #133. 

90 # We need to use this strange way to obtain a dtype compliant 

91 # array because NumPy doesn't honor the shape of the dtype when 

92 # it is multidimensional. See: 

93 # http://scipy.org/scipy/numpy/ticket/926 

94 # for details. 

95 # All of this is done just to taking advantage of the NumPy 

96 # broadcasting rules. 

97 newshape = nparr.shape[:-len(atom.dtype.shape)] 

98 nparr2 = np.empty(newshape, dtype=[('', atom.dtype)]) 

99 nparr2['f0'][:] = nparr 

100 # Return a view (i.e. get rid of the record type) 

101 nparr = nparr2.view(atom.dtype) 

102 return nparr 

103 

104 

105# The next is used in Array, EArray and VLArray, and it is a bit more 

106# high level than convert_to_np_atom 

107def convert_to_np_atom2(object, atom): 

108 """Convert a generic object into a NumPy object compliant with atom.""" 

109 

110 # Check whether the object needs to be copied to make the operation 

111 # safe to in-place conversion. 

112 copy = atom.type in ['time64'] 

113 nparr = convert_to_np_atom(object, atom, copy) 

114 # Finally, check the byteorder and change it if needed 

115 byteorder = byteorders[nparr.dtype.byteorder] 

116 if (byteorder in ['little', 'big'] and byteorder != sys.byteorder): 

117 # The byteorder needs to be fixed (a copy is made 

118 # so that the original array is not modified) 

119 nparr = nparr.byteswap() 

120 

121 return nparr 

122 

123 

124def check_file_access(filename, mode='r'): 

125 """Check for file access in the specified `mode`. 

126 

127 `mode` is one of the modes supported by `File` objects. If the file 

128 indicated by `filename` can be accessed using that `mode`, the 

129 function ends successfully. Else, an ``IOError`` is raised 

130 explaining the reason of the failure. 

131 

132 All this paraphernalia is used to avoid the lengthy and scaring HDF5 

133 messages produced when there are problems opening a file. No 

134 changes are ever made to the file system. 

135 

136 """ 

137 

138 path = Path(filename).resolve() 

139 

140 if mode == 'r': 

141 # The file should be readable. 

142 if not os.access(path, os.F_OK): 

143 raise FileNotFoundError(f"``{path}`` does not exist") 

144 if not path.is_file(): 

145 raise IsADirectoryError(f"``{path}`` is not a regular file") 

146 if not os.access(path, os.R_OK): 

147 raise PermissionError(f"file ``{path}`` exists but it can not be read") 

148 elif mode == 'w': 

149 if os.access(path, os.F_OK): 

150 # Since the file is not removed but replaced, 

151 # it must already be accessible to read and write operations. 

152 check_file_access(path, 'r+') 

153 else: 

154 # A new file is going to be created, 

155 # so the directory should be writable. 

156 if not os.access(path.parent, os.F_OK): 

157 raise FileNotFoundError(f"``{path.parent}`` does not exist") 

158 if not path.parent.is_dir(): 

159 raise NotADirectoryError(f"``{path.parent}`` is not a directory") 

160 if not os.access(path.parent, os.W_OK): 

161 raise PermissionError( 

162 f"directory ``{path.parent}`` exists but it can not be " 

163 f"written" 

164 ) 

165 elif mode == 'a': 

166 if os.access(path, os.F_OK): 

167 check_file_access(path, 'r+') 

168 else: 

169 check_file_access(path, 'w') 

170 elif mode == 'r+': 

171 check_file_access(path, 'r') 

172 if not os.access(path, os.W_OK): 

173 raise PermissionError(f"file ``{path}`` exists but it can not be written") 

174 else: 

175 raise ValueError(f"invalid mode: {mode!r}") 

176 

177 

178def lazyattr(fget): 

179 """Create a *lazy attribute* from the result of `fget`. 

180 

181 This function is intended to be used as a *method decorator*. It 

182 returns a *property* which caches the result of calling the `fget` 

183 instance method. The docstring of `fget` is used for the property 

184 itself. For instance: 

185 

186 >>> class MyClass(object): 

187 ... @lazyattr 

188 ... def attribute(self): 

189 ... 'Attribute description.' 

190 ... print('creating value') 

191 ... return 10 

192 ... 

193 >>> type(MyClass.attribute) 

194 <class 'property'> 

195 >>> MyClass.attribute.__doc__ 

196 'Attribute description.' 

197 >>> obj = MyClass() 

198 >>> obj.__dict__ 

199 {} 

200 >>> obj.attribute 

201 creating value 

202 10 

203 >>> obj.__dict__ 

204 {'attribute': 10} 

205 >>> obj.attribute 

206 10 

207 >>> del obj.attribute 

208 Traceback (most recent call last): 

209 ... 

210 AttributeError: can't delete attribute 'attribute' 

211 

212 .. warning:: 

213 

214 Please note that this decorator *changes the type of the 

215 decorated object* from an instance method into a property. 

216 

217 """ 

218 

219 name = fget.__name__ 

220 

221 def newfget(self): 

222 mydict = self.__dict__ 

223 if name in mydict: 

224 return mydict[name] 

225 mydict[name] = value = fget(self) 

226 return value 

227 

228 return property(newfget, None, None, fget.__doc__) 

229 

230 

231def show_stats(explain, tref, encoding=None): 

232 """Show the used memory (only works for Linux 2.6.x).""" 

233 

234 for line in Path('/proc/self/status').read_text().splitlines(): 

235 if line.startswith("VmSize:"): 

236 vmsize = int(line.split()[1]) 

237 elif line.startswith("VmRSS:"): 

238 vmrss = int(line.split()[1]) 

239 elif line.startswith("VmData:"): 

240 vmdata = int(line.split()[1]) 

241 elif line.startswith("VmStk:"): 

242 vmstk = int(line.split()[1]) 

243 elif line.startswith("VmExe:"): 

244 vmexe = int(line.split()[1]) 

245 elif line.startswith("VmLib:"): 

246 vmlib = int(line.split()[1]) 

247 print("Memory usage: ******* %s *******" % explain) 

248 print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") 

249 print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") 

250 print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") 

251 tnow = clock() 

252 print(f"WallClock time: {tnow - tref:.3f}") 

253 return tnow 

254 

255 

256# truncate data before calling __setitem__, to improve compression ratio 

257# this function is taken verbatim from netcdf4-python 

258def quantize(data, least_significant_digit): 

259 """quantize data to improve compression. 

260 

261 Data is quantized using around(scale*data)/scale, where scale is 

262 2**bits, and bits is determined from the least_significant_digit. 

263 

264 For example, if least_significant_digit=1, bits will be 4. 

265 

266 """ 

267 

268 exp = -least_significant_digit 

269 exp = math.floor(exp) if exp < 0 else math.ceil(exp) 

270 bits = math.ceil(math.log2(10 ** -exp)) 

271 scale = 2 ** bits 

272 datout = np.around(scale * data) / scale 

273 

274 return datout 

275 

276 

277# Utilities to detect leaked instances. See recipe 14.10 of the Python 

278# Cookbook by Martelli & Ascher. 

279tracked_classes = {} 

280 

281 

282def log_instance_creation(instance, name=None): 

283 if name is None: 

284 name = instance.__class__.__name__ 

285 if name not in tracked_classes: 

286 tracked_classes[name] = [] 

287 tracked_classes[name].append(weakref.ref(instance)) 

288 

289 

290def string_to_classes(s): 

291 if s == '*': 

292 c = sorted(tracked_classes) 

293 return c 

294 else: 

295 return s.split() 

296 

297 

298def fetch_logged_instances(classes="*"): 

299 classnames = string_to_classes(classes) 

300 return [(cn, len(tracked_classes[cn])) for cn in classnames] 

301 

302 

303def count_logged_instances(classes, file=sys.stdout): 

304 for classname in string_to_classes(classes): 

305 file.write("%s: %d\n" % (classname, len(tracked_classes[classname]))) 

306 

307 

308def list_logged_instances(classes, file=sys.stdout): 

309 for classname in string_to_classes(classes): 

310 file.write('\n%s:\n' % classname) 

311 for ref in tracked_classes[classname]: 

312 obj = ref() 

313 if obj is not None: 

314 file.write(' %s\n' % repr(obj)) 

315 

316 

317def dump_logged_instances(classes, file=sys.stdout): 

318 for classname in string_to_classes(classes): 

319 file.write('\n%s:\n' % classname) 

320 for ref in tracked_classes[classname]: 

321 obj = ref() 

322 if obj is not None: 

323 file.write(' %s:\n' % obj) 

324 for key, value in obj.__dict__.items(): 

325 file.write(f' {key:>20} : {value}\n') 

326 

327 

328# 

329# A class useful for cache usage 

330# 

331class CacheDict(dict): 

332 """A dictionary that prevents itself from growing too much.""" 

333 

334 def __init__(self, maxentries): 

335 self.maxentries = maxentries 

336 super().__init__(self) 

337 

338 def __setitem__(self, key, value): 

339 # Protection against growing the cache too much 

340 if len(self) > self.maxentries: 

341 # Remove a 10% of (arbitrary) elements from the cache 

342 entries_to_remove = self.maxentries / 10 

343 for k in list(self)[:entries_to_remove]: 

344 super().__delitem__(k) 

345 super().__setitem__(key, value) 

346 

347 

348class NailedDict: 

349 """A dictionary which ignores its items when it has nails on it.""" 

350 

351 def __init__(self, maxentries): 

352 self.maxentries = maxentries 

353 self._cache = {} 

354 self._nailcount = 0 

355 

356 # Only a restricted set of dictionary methods are supported. That 

357 # is why we buy instead of inherit. 

358 

359 # The following are intended to be used by ``Table`` code changing 

360 # the set of usable indexes. 

361 

362 def clear(self): 

363 self._cache.clear() 

364 

365 def nail(self): 

366 self._nailcount += 1 

367 

368 def unnail(self): 

369 self._nailcount -= 1 

370 

371 # The following are intended to be used by ``Table`` code handling 

372 # conditions. 

373 

374 def __contains__(self, key): 

375 if self._nailcount > 0: 

376 return False 

377 return key in self._cache 

378 

379 def __getitem__(self, key): 

380 if self._nailcount > 0: 

381 raise KeyError(key) 

382 return self._cache[key] 

383 

384 def get(self, key, default=None): 

385 if self._nailcount > 0: 

386 return default 

387 return self._cache.get(key, default) 

388 

389 def __setitem__(self, key, value): 

390 if self._nailcount > 0: 

391 return 

392 cache = self._cache 

393 # Protection against growing the cache too much 

394 if len(cache) > self.maxentries: 

395 # Remove a 10% of (arbitrary) elements from the cache 

396 entries_to_remove = max(self.maxentries // 10, 1) 

397 for k in list(cache)[:entries_to_remove]: 

398 del cache[k] 

399 cache[key] = value 

400 

401 

402def detect_number_of_cores(): 

403 """Detects the number of cores on a system. 

404 

405 Cribbed from pp. 

406 

407 """ 

408 

409 # Linux, Unix and MacOS: 

410 if hasattr(os, "sysconf"): 

411 if "SC_NPROCESSORS_ONLN" in os.sysconf_names: 

412 # Linux & Unix: 

413 ncpus = os.sysconf("SC_NPROCESSORS_ONLN") 

414 if isinstance(ncpus, int) and ncpus > 0: 

415 return ncpus 

416 else: # OSX: 

417 return int(os.popen2("sysctl -n hw.ncpu")[1].read()) 

418 # Windows: 

419 if "NUMBER_OF_PROCESSORS" in os.environ: 

420 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]) 

421 if ncpus > 0: 

422 return ncpus 

423 return 1 # Default 

424 

425 

426def _test(): 

427 """Run ``doctest`` on this module.""" 

428 

429 import doctest 

430 doctest.testmod() 

431 

432 

433if __name__ == '__main__': 

434 _test()