Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tables/utils.py: 24%
211 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-10 06:15 +0000
1"""Utility functions."""
3import math
4import os
5import sys
6import warnings
7import weakref
8from pathlib import Path
9from time import perf_counter as clock
11import numpy as np
13from .flavor import array_of_flavor
15# The map between byteorders in NumPy and PyTables
16byteorders = {
17 '>': 'big',
18 '<': 'little',
19 '=': sys.byteorder,
20 '|': 'irrelevant',
21}
23# The type used for size values: indexes, coordinates, dimension
24# lengths, row numbers, shapes, chunk shapes, byte counts...
25SizeType = np.int64
28def correct_byteorder(ptype, byteorder):
29 """Fix the byteorder depending on the PyTables types."""
31 if ptype in ['string', 'bool', 'int8', 'uint8', 'object']:
32 return "irrelevant"
33 else:
34 return byteorder
37def is_idx(index):
38 """Checks if an object can work as an index or not."""
40 if type(index) is int:
41 return True
42 elif hasattr(index, "__index__"):
43 # Exclude the array([idx]) as working as an index. Fixes #303.
44 if (hasattr(index, "shape") and index.shape != ()):
45 return False
46 try:
47 index.__index__()
48 if isinstance(index, bool):
49 warnings.warn(
50 'using a boolean instead of an integer will result in an '
51 'error in the future', DeprecationWarning, stacklevel=2)
52 return True
53 except TypeError:
54 return False
55 elif isinstance(index, np.integer):
56 return True
57 # For Python 2.4 one should test 0-dim and 1-dim, 1-elem arrays as well
58 elif (isinstance(index, np.ndarray) and (index.shape == ()) and
59 index.dtype.str[1] == 'i'):
60 return True
62 return False
65def idx2long(index):
66 """Convert a possible index into a long int."""
68 try:
69 return int(index)
70 except Exception:
71 raise TypeError("not an integer type.")
74# This is used in VLArray and EArray to produce NumPy object compliant
75# with atom from a generic python type. If copy is stated as True, it
76# is assured that it will return a copy of the object and never the same
77# object or a new one sharing the same memory.
78def convert_to_np_atom(arr, atom, copy=False):
79 """Convert a generic object into a NumPy object compliant with atom."""
81 # First, convert the object into a NumPy array
82 nparr = array_of_flavor(arr, 'numpy')
83 # Copy of data if necessary for getting a contiguous buffer, or if
84 # dtype is not the correct one.
85 if atom.shape == ():
86 # Scalar atom case
87 nparr = np.array(nparr, dtype=atom.dtype, copy=copy)
88 else:
89 # Multidimensional atom case. Addresses #133.
90 # We need to use this strange way to obtain a dtype compliant
91 # array because NumPy doesn't honor the shape of the dtype when
92 # it is multidimensional. See:
93 # http://scipy.org/scipy/numpy/ticket/926
94 # for details.
95 # All of this is done just to taking advantage of the NumPy
96 # broadcasting rules.
97 newshape = nparr.shape[:-len(atom.dtype.shape)]
98 nparr2 = np.empty(newshape, dtype=[('', atom.dtype)])
99 nparr2['f0'][:] = nparr
100 # Return a view (i.e. get rid of the record type)
101 nparr = nparr2.view(atom.dtype)
102 return nparr
105# The next is used in Array, EArray and VLArray, and it is a bit more
106# high level than convert_to_np_atom
107def convert_to_np_atom2(object, atom):
108 """Convert a generic object into a NumPy object compliant with atom."""
110 # Check whether the object needs to be copied to make the operation
111 # safe to in-place conversion.
112 copy = atom.type in ['time64']
113 nparr = convert_to_np_atom(object, atom, copy)
114 # Finally, check the byteorder and change it if needed
115 byteorder = byteorders[nparr.dtype.byteorder]
116 if (byteorder in ['little', 'big'] and byteorder != sys.byteorder):
117 # The byteorder needs to be fixed (a copy is made
118 # so that the original array is not modified)
119 nparr = nparr.byteswap()
121 return nparr
124def check_file_access(filename, mode='r'):
125 """Check for file access in the specified `mode`.
127 `mode` is one of the modes supported by `File` objects. If the file
128 indicated by `filename` can be accessed using that `mode`, the
129 function ends successfully. Else, an ``IOError`` is raised
130 explaining the reason of the failure.
132 All this paraphernalia is used to avoid the lengthy and scaring HDF5
133 messages produced when there are problems opening a file. No
134 changes are ever made to the file system.
136 """
138 path = Path(filename).resolve()
140 if mode == 'r':
141 # The file should be readable.
142 if not os.access(path, os.F_OK):
143 raise FileNotFoundError(f"``{path}`` does not exist")
144 if not path.is_file():
145 raise IsADirectoryError(f"``{path}`` is not a regular file")
146 if not os.access(path, os.R_OK):
147 raise PermissionError(f"file ``{path}`` exists but it can not be read")
148 elif mode == 'w':
149 if os.access(path, os.F_OK):
150 # Since the file is not removed but replaced,
151 # it must already be accessible to read and write operations.
152 check_file_access(path, 'r+')
153 else:
154 # A new file is going to be created,
155 # so the directory should be writable.
156 if not os.access(path.parent, os.F_OK):
157 raise FileNotFoundError(f"``{path.parent}`` does not exist")
158 if not path.parent.is_dir():
159 raise NotADirectoryError(f"``{path.parent}`` is not a directory")
160 if not os.access(path.parent, os.W_OK):
161 raise PermissionError(
162 f"directory ``{path.parent}`` exists but it can not be "
163 f"written"
164 )
165 elif mode == 'a':
166 if os.access(path, os.F_OK):
167 check_file_access(path, 'r+')
168 else:
169 check_file_access(path, 'w')
170 elif mode == 'r+':
171 check_file_access(path, 'r')
172 if not os.access(path, os.W_OK):
173 raise PermissionError(f"file ``{path}`` exists but it can not be written")
174 else:
175 raise ValueError(f"invalid mode: {mode!r}")
178def lazyattr(fget):
179 """Create a *lazy attribute* from the result of `fget`.
181 This function is intended to be used as a *method decorator*. It
182 returns a *property* which caches the result of calling the `fget`
183 instance method. The docstring of `fget` is used for the property
184 itself. For instance:
186 >>> class MyClass(object):
187 ... @lazyattr
188 ... def attribute(self):
189 ... 'Attribute description.'
190 ... print('creating value')
191 ... return 10
192 ...
193 >>> type(MyClass.attribute)
194 <class 'property'>
195 >>> MyClass.attribute.__doc__
196 'Attribute description.'
197 >>> obj = MyClass()
198 >>> obj.__dict__
199 {}
200 >>> obj.attribute
201 creating value
202 10
203 >>> obj.__dict__
204 {'attribute': 10}
205 >>> obj.attribute
206 10
207 >>> del obj.attribute
208 Traceback (most recent call last):
209 ...
210 AttributeError: can't delete attribute 'attribute'
212 .. warning::
214 Please note that this decorator *changes the type of the
215 decorated object* from an instance method into a property.
217 """
219 name = fget.__name__
221 def newfget(self):
222 mydict = self.__dict__
223 if name in mydict:
224 return mydict[name]
225 mydict[name] = value = fget(self)
226 return value
228 return property(newfget, None, None, fget.__doc__)
231def show_stats(explain, tref, encoding=None):
232 """Show the used memory (only works for Linux 2.6.x)."""
234 for line in Path('/proc/self/status').read_text().splitlines():
235 if line.startswith("VmSize:"):
236 vmsize = int(line.split()[1])
237 elif line.startswith("VmRSS:"):
238 vmrss = int(line.split()[1])
239 elif line.startswith("VmData:"):
240 vmdata = int(line.split()[1])
241 elif line.startswith("VmStk:"):
242 vmstk = int(line.split()[1])
243 elif line.startswith("VmExe:"):
244 vmexe = int(line.split()[1])
245 elif line.startswith("VmLib:"):
246 vmlib = int(line.split()[1])
247 print("Memory usage: ******* %s *******" % explain)
248 print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB")
249 print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB")
250 print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB")
251 tnow = clock()
252 print(f"WallClock time: {tnow - tref:.3f}")
253 return tnow
256# truncate data before calling __setitem__, to improve compression ratio
257# this function is taken verbatim from netcdf4-python
258def quantize(data, least_significant_digit):
259 """quantize data to improve compression.
261 Data is quantized using around(scale*data)/scale, where scale is
262 2**bits, and bits is determined from the least_significant_digit.
264 For example, if least_significant_digit=1, bits will be 4.
266 """
268 exp = -least_significant_digit
269 exp = math.floor(exp) if exp < 0 else math.ceil(exp)
270 bits = math.ceil(math.log2(10 ** -exp))
271 scale = 2 ** bits
272 datout = np.around(scale * data) / scale
274 return datout
277# Utilities to detect leaked instances. See recipe 14.10 of the Python
278# Cookbook by Martelli & Ascher.
279tracked_classes = {}
282def log_instance_creation(instance, name=None):
283 if name is None:
284 name = instance.__class__.__name__
285 if name not in tracked_classes:
286 tracked_classes[name] = []
287 tracked_classes[name].append(weakref.ref(instance))
290def string_to_classes(s):
291 if s == '*':
292 c = sorted(tracked_classes)
293 return c
294 else:
295 return s.split()
298def fetch_logged_instances(classes="*"):
299 classnames = string_to_classes(classes)
300 return [(cn, len(tracked_classes[cn])) for cn in classnames]
303def count_logged_instances(classes, file=sys.stdout):
304 for classname in string_to_classes(classes):
305 file.write("%s: %d\n" % (classname, len(tracked_classes[classname])))
308def list_logged_instances(classes, file=sys.stdout):
309 for classname in string_to_classes(classes):
310 file.write('\n%s:\n' % classname)
311 for ref in tracked_classes[classname]:
312 obj = ref()
313 if obj is not None:
314 file.write(' %s\n' % repr(obj))
317def dump_logged_instances(classes, file=sys.stdout):
318 for classname in string_to_classes(classes):
319 file.write('\n%s:\n' % classname)
320 for ref in tracked_classes[classname]:
321 obj = ref()
322 if obj is not None:
323 file.write(' %s:\n' % obj)
324 for key, value in obj.__dict__.items():
325 file.write(f' {key:>20} : {value}\n')
328#
329# A class useful for cache usage
330#
331class CacheDict(dict):
332 """A dictionary that prevents itself from growing too much."""
334 def __init__(self, maxentries):
335 self.maxentries = maxentries
336 super().__init__(self)
338 def __setitem__(self, key, value):
339 # Protection against growing the cache too much
340 if len(self) > self.maxentries:
341 # Remove a 10% of (arbitrary) elements from the cache
342 entries_to_remove = self.maxentries / 10
343 for k in list(self)[:entries_to_remove]:
344 super().__delitem__(k)
345 super().__setitem__(key, value)
348class NailedDict:
349 """A dictionary which ignores its items when it has nails on it."""
351 def __init__(self, maxentries):
352 self.maxentries = maxentries
353 self._cache = {}
354 self._nailcount = 0
356 # Only a restricted set of dictionary methods are supported. That
357 # is why we buy instead of inherit.
359 # The following are intended to be used by ``Table`` code changing
360 # the set of usable indexes.
362 def clear(self):
363 self._cache.clear()
365 def nail(self):
366 self._nailcount += 1
368 def unnail(self):
369 self._nailcount -= 1
371 # The following are intended to be used by ``Table`` code handling
372 # conditions.
374 def __contains__(self, key):
375 if self._nailcount > 0:
376 return False
377 return key in self._cache
379 def __getitem__(self, key):
380 if self._nailcount > 0:
381 raise KeyError(key)
382 return self._cache[key]
384 def get(self, key, default=None):
385 if self._nailcount > 0:
386 return default
387 return self._cache.get(key, default)
389 def __setitem__(self, key, value):
390 if self._nailcount > 0:
391 return
392 cache = self._cache
393 # Protection against growing the cache too much
394 if len(cache) > self.maxentries:
395 # Remove a 10% of (arbitrary) elements from the cache
396 entries_to_remove = max(self.maxentries // 10, 1)
397 for k in list(cache)[:entries_to_remove]:
398 del cache[k]
399 cache[key] = value
402def detect_number_of_cores():
403 """Detects the number of cores on a system.
405 Cribbed from pp.
407 """
409 # Linux, Unix and MacOS:
410 if hasattr(os, "sysconf"):
411 if "SC_NPROCESSORS_ONLN" in os.sysconf_names:
412 # Linux & Unix:
413 ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
414 if isinstance(ncpus, int) and ncpus > 0:
415 return ncpus
416 else: # OSX:
417 return int(os.popen2("sysctl -n hw.ncpu")[1].read())
418 # Windows:
419 if "NUMBER_OF_PROCESSORS" in os.environ:
420 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
421 if ncpus > 0:
422 return ncpus
423 return 1 # Default
426def _test():
427 """Run ``doctest`` on this module."""
429 import doctest
430 doctest.testmod()
433if __name__ == '__main__':
434 _test()