Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_arrayterator

1"""

2A buffered iterator for big arrays.

4This module solves the problem of iterating over a big file-based array

5without having to read it into memory. The `Arrayterator` class wraps

6an array object, and when iterated it will return sub-arrays with at most

7a user-specified number of elements.

9"""

10from operator import mul

11from functools import reduce

13__all__ = ['Arrayterator']

16class Arrayterator:

17 """

18 Buffered iterator for big arrays.

20 `Arrayterator` creates a buffered iterator for reading big arrays in small

21 contiguous blocks. The class is useful for objects stored in the

22 file system. It allows iteration over the object *without* reading

23 everything in memory; instead, small blocks are read and iterated over.

25 `Arrayterator` can be used with any object that supports multidimensional

26 slices. This includes NumPy arrays, but also variables from

27 Scientific.IO.NetCDF or pynetcdf for example.

29 Parameters

30 ----------

31 var : array_like

32 The object to iterate over.

33 buf_size : int, optional

34 The buffer size. If `buf_size` is supplied, the maximum amount of

35 data that will be read into memory is `buf_size` elements.

36 Default is None, which will read as many element as possible

37 into memory.

39 Attributes

40 ----------

41 var

42 buf_size

43 start

44 stop

45 step

46 shape

47 flat

49 See Also

50 --------

51 numpy.ndenumerate : Multidimensional array iterator.

52 numpy.flatiter : Flat array iterator.

53 numpy.memmap : Create a memory-map to an array stored

54 in a binary file on disk.

56 Notes

57 -----

58 The algorithm works by first finding a "running dimension", along which

59 the blocks will be extracted. Given an array of dimensions

60 ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the

61 first dimension will be used. If, on the other hand,

62 ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on.

63 Blocks are extracted along this dimension, and when the last block is

64 returned the process continues from the next dimension, until all

65 elements have been read.

67 Examples

68 --------

69 >>> import numpy as np

70 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)

71 >>> a_itor = np.lib.Arrayterator(a, 2)

72 >>> a_itor.shape

73 (3, 4, 5, 6)

75 Now we can iterate over ``a_itor``, and it will return arrays of size

76 two. Since `buf_size` was smaller than any dimension, the first

77 dimension will be iterated over first:

79 >>> for subarr in a_itor:

80 ... if not subarr.all():

81 ... print(subarr, subarr.shape) # doctest: +SKIP

82 >>> # [[[[0 1]]]] (1, 1, 1, 2)

84 """

86 __module__ = "numpy.lib"

88 def __init__(self, var, buf_size=None):

89 self.var = var

90 self.buf_size = buf_size

92 self.start = [0 for dim in var.shape]

93 self.stop = list(var.shape)

94 self.step = [1 for dim in var.shape]

96 def __getattr__(self, attr):

97 return getattr(self.var, attr)

99 def __getitem__(self, index):

100 """

101 Return a new arrayterator.

102

103 """

104 # Fix index, handling ellipsis and incomplete slices.

105 if not isinstance(index, tuple):

106 index = (index,)

107 fixed = []

108 length, dims = len(index), self.ndim

109 for slice_ in index:

110 if slice_ is Ellipsis:

111 fixed.extend([slice(None)] * (dims-length+1))

112 length = len(fixed)

113 elif isinstance(slice_, int):

114 fixed.append(slice(slice_, slice_+1, 1))

115 else:

116 fixed.append(slice_)

117 index = tuple(fixed)

118 if len(index) < dims:

119 index += (slice(None),) * (dims-len(index))

120

121 # Return a new arrayterator object.

122 out = self.__class__(self.var, self.buf_size)

123 for i, (start, stop, step, slice_) in enumerate(

124 zip(self.start, self.stop, self.step, index)):

125 out.start[i] = start + (slice_.start or 0)

126 out.step[i] = step * (slice_.step or 1)

127 out.stop[i] = start + (slice_.stop or stop-start)

128 out.stop[i] = min(stop, out.stop[i])

129 return out

130

131 def __array__(self, dtype=None, copy=None):

132 """

133 Return corresponding data.

134

135 """

136 slice_ = tuple(slice(*t) for t in zip(

137 self.start, self.stop, self.step))

138 return self.var[slice_]

139

140 @property

141 def flat(self):

142 """

143 A 1-D flat iterator for Arrayterator objects.

144

145 This iterator returns elements of the array to be iterated over in

146 `~lib.Arrayterator` one by one.

147 It is similar to `flatiter`.

148

149 See Also

150 --------

151 lib.Arrayterator

152 flatiter

153

154 Examples

155 --------

156 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6)

157 >>> a_itor = np.lib.Arrayterator(a, 2)

158

159 >>> for subarr in a_itor.flat:

160 ... if not subarr:

161 ... print(subarr, type(subarr))

162 ...

163 0 <class 'numpy.int64'>

164

165 """

166 for block in self:

167 yield from block.flat

168

169 @property

170 def shape(self):

171 """

172 The shape of the array to be iterated over.

173

174 For an example, see `Arrayterator`.

175

176 """

177 return tuple(((stop-start-1)//step+1) for start, stop, step in

178 zip(self.start, self.stop, self.step))

179

180 def __iter__(self):

181 # Skip arrays with degenerate dimensions

182 if [dim for dim in self.shape if dim <= 0]:

183 return

184

185 start = self.start[:]

186 stop = self.stop[:]

187 step = self.step[:]

188 ndims = self.var.ndim

189

190 while True:

191 count = self.buf_size or reduce(mul, self.shape)

192

193 # iterate over each dimension, looking for the

194 # running dimension (ie, the dimension along which

195 # the blocks will be built from)

196 rundim = 0

197 for i in range(ndims-1, -1, -1):

198 # if count is zero we ran out of elements to read

199 # along higher dimensions, so we read only a single position

200 if count == 0:

201 stop[i] = start[i]+1

202 elif count <= self.shape[i]:

203 # limit along this dimension

204 stop[i] = start[i] + count*step[i]

205 rundim = i

206 else:

207 # read everything along this dimension

208 stop[i] = self.stop[i]

209 stop[i] = min(self.stop[i], stop[i])

210 count = count//self.shape[i]

211

212 # yield a block

213 slice_ = tuple(slice(*t) for t in zip(start, stop, step))

214 yield self.var[slice_]

215

216 # Update start position, taking care of overflow to

217 # other dimensions

218 start[rundim] = stop[rundim] # start where we stopped

219 for i in range(ndims-1, 0, -1):

220 if start[i] >= self.stop[i]:

221 start[i] = self.start[i]

222 start[i-1] += self.step[i-1]

223 if start[0] >= self.stop[0]:

224 return

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_arrayterator_impl.py: 19%

73 statements