Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/utils.py: 24%

1"""Utility functions."""

3import math

4import os

5import sys

6import warnings

7import weakref

8from pathlib import Path

9from time import perf_counter as clock

11import numpy as np

13from .flavor import array_of_flavor

15# The map between byteorders in NumPy and PyTables

16byteorders = {

17 '>': 'big',

18 '<': 'little',

19 '=': sys.byteorder,

20 '|': 'irrelevant',

21}

23# The type used for size values: indexes, coordinates, dimension

24# lengths, row numbers, shapes, chunk shapes, byte counts...

25SizeType = np.int64

28def correct_byteorder(ptype, byteorder):

29 """Fix the byteorder depending on the PyTables types."""

31 if ptype in ['string', 'bool', 'int8', 'uint8', 'object']:

32 return "irrelevant"

33 else:

34 return byteorder

37def is_idx(index):

38 """Checks if an object can work as an index or not."""

40 if type(index) is int:

41 return True

42 elif hasattr(index, "__index__"):

43 # Exclude the array([idx]) as working as an index. Fixes #303.

44 if (hasattr(index, "shape") and index.shape != ()):

45 return False

46 try:

47 index.__index__()

48 if isinstance(index, bool):

49 warnings.warn(

50 'using a boolean instead of an integer will result in an '

51 'error in the future', DeprecationWarning, stacklevel=2)

52 return True

53 except TypeError:

54 return False

55 elif isinstance(index, np.integer):

56 return True

57 # For Python 2.4 one should test 0-dim and 1-dim, 1-elem arrays as well

58 elif (isinstance(index, np.ndarray) and (index.shape == ()) and

59 index.dtype.str[1] == 'i'):

60 return True

62 return False

65def idx2long(index):

66 """Convert a possible index into a long int."""

68 try:

69 return int(index)

70 except Exception:

71 raise TypeError("not an integer type.")

74# This is used in VLArray and EArray to produce NumPy object compliant

75# with atom from a generic python type. If copy is stated as True, it

76# is assured that it will return a copy of the object and never the same

77# object or a new one sharing the same memory.

78def convert_to_np_atom(arr, atom, copy=False):

79 """Convert a generic object into a NumPy object compliant with atom."""

81 # First, convert the object into a NumPy array

82 nparr = array_of_flavor(arr, 'numpy')

83 # Copy of data if necessary for getting a contiguous buffer, or if

84 # dtype is not the correct one.

85 if atom.shape == ():

86 # Scalar atom case

87 nparr = np.array(nparr, dtype=atom.dtype, copy=copy)

88 else:

89 # Multidimensional atom case. Addresses #133.

90 # We need to use this strange way to obtain a dtype compliant

91 # array because NumPy doesn't honor the shape of the dtype when

92 # it is multidimensional. See:

93 # http://scipy.org/scipy/numpy/ticket/926

94 # for details.

95 # All of this is done just to taking advantage of the NumPy

96 # broadcasting rules.

97 newshape = nparr.shape[:-len(atom.dtype.shape)]

98 nparr2 = np.empty(newshape, dtype=[('', atom.dtype)])

99 nparr2['f0'][:] = nparr

100 # Return a view (i.e. get rid of the record type)

101 nparr = nparr2.view(atom.dtype)

102 return nparr

103

104

105# The next is used in Array, EArray and VLArray, and it is a bit more

106# high level than convert_to_np_atom

107def convert_to_np_atom2(object, atom):

108 """Convert a generic object into a NumPy object compliant with atom."""

109

110 # Check whether the object needs to be copied to make the operation

111 # safe to in-place conversion.

112 copy = atom.type in ['time64']

113 nparr = convert_to_np_atom(object, atom, copy)

114 # Finally, check the byteorder and change it if needed

115 byteorder = byteorders[nparr.dtype.byteorder]

116 if (byteorder in ['little', 'big'] and byteorder != sys.byteorder):

117 # The byteorder needs to be fixed (a copy is made

118 # so that the original array is not modified)

119 nparr = nparr.byteswap()

120

121 return nparr

122

123

124def check_file_access(filename, mode='r'):

125 """Check for file access in the specified `mode`.

126

127 `mode` is one of the modes supported by `File` objects. If the file

128 indicated by `filename` can be accessed using that `mode`, the

129 function ends successfully. Else, an ``IOError`` is raised

130 explaining the reason of the failure.

131

132 All this paraphernalia is used to avoid the lengthy and scaring HDF5

133 messages produced when there are problems opening a file. No

134 changes are ever made to the file system.

135

136 """

137

138 path = Path(filename).resolve()

139

140 if mode == 'r':

141 # The file should be readable.

142 if not os.access(path, os.F_OK):

143 raise FileNotFoundError(f"``{path}`` does not exist")

144 if not path.is_file():

145 raise IsADirectoryError(f"``{path}`` is not a regular file")

146 if not os.access(path, os.R_OK):

147 raise PermissionError(f"file ``{path}`` exists but it can not be read")

148 elif mode == 'w':

149 if os.access(path, os.F_OK):

150 # Since the file is not removed but replaced,

151 # it must already be accessible to read and write operations.

152 check_file_access(path, 'r+')

153 else:

154 # A new file is going to be created,

155 # so the directory should be writable.

156 if not os.access(path.parent, os.F_OK):

157 raise FileNotFoundError(f"``{path.parent}`` does not exist")

158 if not path.parent.is_dir():

159 raise NotADirectoryError(f"``{path.parent}`` is not a directory")

160 if not os.access(path.parent, os.W_OK):

161 raise PermissionError(

162 f"directory ``{path.parent}`` exists but it can not be "

163 f"written"

164 )

165 elif mode == 'a':

166 if os.access(path, os.F_OK):

167 check_file_access(path, 'r+')

168 else:

169 check_file_access(path, 'w')

170 elif mode == 'r+':

171 check_file_access(path, 'r')

172 if not os.access(path, os.W_OK):

173 raise PermissionError(f"file ``{path}`` exists but it can not be written")

174 else:

175 raise ValueError(f"invalid mode: {mode!r}")

176

177

178def lazyattr(fget):

179 """Create a *lazy attribute* from the result of `fget`.

180

181 This function is intended to be used as a *method decorator*. It

182 returns a *property* which caches the result of calling the `fget`

183 instance method. The docstring of `fget` is used for the property

184 itself. For instance:

185

186 >>> class MyClass(object):

187 ... @lazyattr

188 ... def attribute(self):

189 ... 'Attribute description.'

190 ... print('creating value')

191 ... return 10

192 ...

193 >>> type(MyClass.attribute)

194 <class 'property'>

195 >>> MyClass.attribute.__doc__

196 'Attribute description.'

197 >>> obj = MyClass()

198 >>> obj.__dict__

199 {}

200 >>> obj.attribute

201 creating value

202 10

203 >>> obj.__dict__

204 {'attribute': 10}

205 >>> obj.attribute

206 10

207 >>> del obj.attribute

208 Traceback (most recent call last):

209 ...

210 AttributeError: can't delete attribute 'attribute'

211

212 .. warning::

213

214 Please note that this decorator *changes the type of the

215 decorated object* from an instance method into a property.

216

217 """

218

219 name = fget.__name__

220

221 def newfget(self):

222 mydict = self.__dict__

223 if name in mydict:

224 return mydict[name]

225 mydict[name] = value = fget(self)

226 return value

227

228 return property(newfget, None, None, fget.__doc__)

229

230

231def show_stats(explain, tref, encoding=None):

232 """Show the used memory (only works for Linux 2.6.x)."""

233

234 for line in Path('/proc/self/status').read_text().splitlines():

235 if line.startswith("VmSize:"):

236 vmsize = int(line.split()[1])

237 elif line.startswith("VmRSS:"):

238 vmrss = int(line.split()[1])

239 elif line.startswith("VmData:"):

240 vmdata = int(line.split()[1])

241 elif line.startswith("VmStk:"):

242 vmstk = int(line.split()[1])

243 elif line.startswith("VmExe:"):

244 vmexe = int(line.split()[1])

245 elif line.startswith("VmLib:"):

246 vmlib = int(line.split()[1])

247 print("Memory usage: ******* %s *******" % explain)

248 print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB")

249 print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB")

250 print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB")

251 tnow = clock()

252 print(f"WallClock time: {tnow - tref:.3f}")

253 return tnow

254

255

256# truncate data before calling __setitem__, to improve compression ratio

257# this function is taken verbatim from netcdf4-python

258def quantize(data, least_significant_digit):

259 """quantize data to improve compression.

260

261 Data is quantized using around(scale*data)/scale, where scale is

262 2**bits, and bits is determined from the least_significant_digit.

263

264 For example, if least_significant_digit=1, bits will be 4.

265

266 """

267

268 exp = -least_significant_digit

269 exp = math.floor(exp) if exp < 0 else math.ceil(exp)

270 bits = math.ceil(math.log2(10 ** -exp))

271 scale = 2 ** bits

272 datout = np.around(scale * data) / scale

273

274 return datout

275

276

277# Utilities to detect leaked instances. See recipe 14.10 of the Python

278# Cookbook by Martelli & Ascher.

279tracked_classes = {}

280

281

282def log_instance_creation(instance, name=None):

283 if name is None:

284 name = instance.__class__.__name__

285 if name not in tracked_classes:

286 tracked_classes[name] = []

287 tracked_classes[name].append(weakref.ref(instance))

288

289

290def string_to_classes(s):

291 if s == '*':

292 c = sorted(tracked_classes)

293 return c

294 else:

295 return s.split()

296

297

298def fetch_logged_instances(classes="*"):

299 classnames = string_to_classes(classes)

300 return [(cn, len(tracked_classes[cn])) for cn in classnames]

301

302

303def count_logged_instances(classes, file=sys.stdout):

304 for classname in string_to_classes(classes):

305 file.write("%s: %d\n" % (classname, len(tracked_classes[classname])))

306

307

308def list_logged_instances(classes, file=sys.stdout):

309 for classname in string_to_classes(classes):

310 file.write('\n%s:\n' % classname)

311 for ref in tracked_classes[classname]:

312 obj = ref()

313 if obj is not None:

314 file.write(' %s\n' % repr(obj))

315

316

317def dump_logged_instances(classes, file=sys.stdout):

318 for classname in string_to_classes(classes):

319 file.write('\n%s:\n' % classname)

320 for ref in tracked_classes[classname]:

321 obj = ref()

322 if obj is not None:

323 file.write(' %s:\n' % obj)

324 for key, value in obj.__dict__.items():

325 file.write(f' {key:>20} : {value}\n')

326

327

328#

329# A class useful for cache usage

330#

331class CacheDict(dict):

332 """A dictionary that prevents itself from growing too much."""

333

334 def __init__(self, maxentries):

335 self.maxentries = maxentries

336 super().__init__(self)

337

338 def __setitem__(self, key, value):

339 # Protection against growing the cache too much

340 if len(self) > self.maxentries:

341 # Remove a 10% of (arbitrary) elements from the cache

342 entries_to_remove = self.maxentries / 10

343 for k in list(self)[:entries_to_remove]:

344 super().__delitem__(k)

345 super().__setitem__(key, value)

346

347

348class NailedDict:

349 """A dictionary which ignores its items when it has nails on it."""

350

351 def __init__(self, maxentries):

352 self.maxentries = maxentries

353 self._cache = {}

354 self._nailcount = 0

355

356 # Only a restricted set of dictionary methods are supported. That

357 # is why we buy instead of inherit.

358

359 # The following are intended to be used by ``Table`` code changing

360 # the set of usable indexes.

361

362 def clear(self):

363 self._cache.clear()

364

365 def nail(self):

366 self._nailcount += 1

367

368 def unnail(self):

369 self._nailcount -= 1

370

371 # The following are intended to be used by ``Table`` code handling

372 # conditions.

373

374 def __contains__(self, key):

375 if self._nailcount > 0:

376 return False

377 return key in self._cache

378

379 def __getitem__(self, key):

380 if self._nailcount > 0:

381 raise KeyError(key)

382 return self._cache[key]

383

384 def get(self, key, default=None):

385 if self._nailcount > 0:

386 return default

387 return self._cache.get(key, default)

388

389 def __setitem__(self, key, value):

390 if self._nailcount > 0:

391 return

392 cache = self._cache

393 # Protection against growing the cache too much

394 if len(cache) > self.maxentries:

395 # Remove a 10% of (arbitrary) elements from the cache

396 entries_to_remove = max(self.maxentries // 10, 1)

397 for k in list(cache)[:entries_to_remove]:

398 del cache[k]

399 cache[key] = value

400

401

402def detect_number_of_cores():

403 """Detects the number of cores on a system.

404

405 Cribbed from pp.

406

407 """

408

409 # Linux, Unix and MacOS:

410 if hasattr(os, "sysconf"):

411 if "SC_NPROCESSORS_ONLN" in os.sysconf_names:

412 # Linux & Unix:

413 ncpus = os.sysconf("SC_NPROCESSORS_ONLN")

414 if isinstance(ncpus, int) and ncpus > 0:

415 return ncpus

416 else: # OSX:

417 return int(os.popen2("sysctl -n hw.ncpu")[1].read())

418 # Windows:

419 if "NUMBER_OF_PROCESSORS" in os.environ:

420 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])

421 if ncpus > 0:

422 return ncpus

423 return 1 # Default

424

425

426def _test():

427 """Run ``doctest`` on this module."""

428

429 import doctest

430 doctest.testmod()

431

432

433if __name__ == '__main__':

434 _test()