Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_arrayterator_impl.py: 18%
71 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
1"""
2A buffered iterator for big arrays.
4This module solves the problem of iterating over a big file-based array
5without having to read it into memory. The `Arrayterator` class wraps
6an array object, and when iterated it will return sub-arrays with at most
7a user-specified number of elements.
9"""
10from operator import mul
11from functools import reduce
13__all__ = ['Arrayterator']
16class Arrayterator:
17 """
18 Buffered iterator for big arrays.
20 `Arrayterator` creates a buffered iterator for reading big arrays in small
21 contiguous blocks. The class is useful for objects stored in the
22 file system. It allows iteration over the object *without* reading
23 everything in memory; instead, small blocks are read and iterated over.
25 `Arrayterator` can be used with any object that supports multidimensional
26 slices. This includes NumPy arrays, but also variables from
27 Scientific.IO.NetCDF or pynetcdf for example.
29 Parameters
30 ----------
31 var : array_like
32 The object to iterate over.
33 buf_size : int, optional
34 The buffer size. If `buf_size` is supplied, the maximum amount of
35 data that will be read into memory is `buf_size` elements.
36 Default is None, which will read as many element as possible
37 into memory.
39 Attributes
40 ----------
41 var
42 buf_size
43 start
44 stop
45 step
46 shape
47 flat
49 See Also
50 --------
51 numpy.ndenumerate : Multidimensional array iterator.
52 numpy.flatiter : Flat array iterator.
53 numpy.memmap : Create a memory-map to an array stored
54 in a binary file on disk.
56 Notes
57 -----
58 The algorithm works by first finding a "running dimension", along which
59 the blocks will be extracted. Given an array of dimensions
60 ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the
61 first dimension will be used. If, on the other hand,
62 ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on.
63 Blocks are extracted along this dimension, and when the last block is
64 returned the process continues from the next dimension, until all
65 elements have been read.
67 Examples
68 --------
69 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)
70 >>> a_itor = np.lib.Arrayterator(a, 2)
71 >>> a_itor.shape
72 (3, 4, 5, 6)
74 Now we can iterate over ``a_itor``, and it will return arrays of size
75 two. Since `buf_size` was smaller than any dimension, the first
76 dimension will be iterated over first:
78 >>> for subarr in a_itor:
79 ... if not subarr.all():
80 ... print(subarr, subarr.shape) # doctest: +SKIP
81 >>> # [[[[0 1]]]] (1, 1, 1, 2)
83 """
85 def __init__(self, var, buf_size=None):
86 self.var = var
87 self.buf_size = buf_size
89 self.start = [0 for dim in var.shape]
90 self.stop = [dim for dim in var.shape]
91 self.step = [1 for dim in var.shape]
93 def __getattr__(self, attr):
94 return getattr(self.var, attr)
96 def __getitem__(self, index):
97 """
98 Return a new arrayterator.
100 """
101 # Fix index, handling ellipsis and incomplete slices.
102 if not isinstance(index, tuple):
103 index = (index,)
104 fixed = []
105 length, dims = len(index), self.ndim
106 for slice_ in index:
107 if slice_ is Ellipsis:
108 fixed.extend([slice(None)] * (dims-length+1))
109 length = len(fixed)
110 elif isinstance(slice_, int):
111 fixed.append(slice(slice_, slice_+1, 1))
112 else:
113 fixed.append(slice_)
114 index = tuple(fixed)
115 if len(index) < dims:
116 index += (slice(None),) * (dims-len(index))
118 # Return a new arrayterator object.
119 out = self.__class__(self.var, self.buf_size)
120 for i, (start, stop, step, slice_) in enumerate(
121 zip(self.start, self.stop, self.step, index)):
122 out.start[i] = start + (slice_.start or 0)
123 out.step[i] = step * (slice_.step or 1)
124 out.stop[i] = start + (slice_.stop or stop-start)
125 out.stop[i] = min(stop, out.stop[i])
126 return out
128 def __array__(self, dtype=None, copy=None):
129 """
130 Return corresponding data.
132 """
133 slice_ = tuple(slice(*t) for t in zip(
134 self.start, self.stop, self.step))
135 return self.var[slice_]
137 @property
138 def flat(self):
139 """
140 A 1-D flat iterator for Arrayterator objects.
142 This iterator returns elements of the array to be iterated over in
143 `~lib.Arrayterator` one by one.
144 It is similar to `flatiter`.
146 See Also
147 --------
148 lib.Arrayterator
149 flatiter
151 Examples
152 --------
153 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)
154 >>> a_itor = np.lib.Arrayterator(a, 2)
156 >>> for subarr in a_itor.flat:
157 ... if not subarr:
158 ... print(subarr, type(subarr))
159 ...
160 0 <class 'numpy.int64'>
162 """
163 for block in self:
164 yield from block.flat
166 @property
167 def shape(self):
168 """
169 The shape of the array to be iterated over.
171 For an example, see `Arrayterator`.
173 """
174 return tuple(((stop-start-1)//step+1) for start, stop, step in
175 zip(self.start, self.stop, self.step))
177 def __iter__(self):
178 # Skip arrays with degenerate dimensions
179 if [dim for dim in self.shape if dim <= 0]:
180 return
182 start = self.start[:]
183 stop = self.stop[:]
184 step = self.step[:]
185 ndims = self.var.ndim
187 while True:
188 count = self.buf_size or reduce(mul, self.shape)
190 # iterate over each dimension, looking for the
191 # running dimension (ie, the dimension along which
192 # the blocks will be built from)
193 rundim = 0
194 for i in range(ndims-1, -1, -1):
195 # if count is zero we ran out of elements to read
196 # along higher dimensions, so we read only a single position
197 if count == 0:
198 stop[i] = start[i]+1
199 elif count <= self.shape[i]:
200 # limit along this dimension
201 stop[i] = start[i] + count*step[i]
202 rundim = i
203 else:
204 # read everything along this dimension
205 stop[i] = self.stop[i]
206 stop[i] = min(self.stop[i], stop[i])
207 count = count//self.shape[i]
209 # yield a block
210 slice_ = tuple(slice(*t) for t in zip(start, stop, step))
211 yield self.var[slice_]
213 # Update start position, taking care of overflow to
214 # other dimensions
215 start[rundim] = stop[rundim] # start where we stopped
216 for i in range(ndims-1, 0, -1):
217 if start[i] >= self.stop[i]:
218 start[i] = self.start[i]
219 start[i-1] += self.step[i-1]
220 if start[0] >= self.stop[0]:
221 return