Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_arrayterator_impl.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

73 statements  

1""" 

2A buffered iterator for big arrays. 

3 

4This module solves the problem of iterating over a big file-based array 

5without having to read it into memory. The `Arrayterator` class wraps 

6an array object, and when iterated it will return sub-arrays with at most 

7a user-specified number of elements. 

8 

9""" 

10from operator import mul 

11from functools import reduce 

12 

13__all__ = ['Arrayterator'] 

14 

15 

16class Arrayterator: 

17 """ 

18 Buffered iterator for big arrays. 

19 

20 `Arrayterator` creates a buffered iterator for reading big arrays in small 

21 contiguous blocks. The class is useful for objects stored in the 

22 file system. It allows iteration over the object *without* reading 

23 everything in memory; instead, small blocks are read and iterated over. 

24 

25 `Arrayterator` can be used with any object that supports multidimensional 

26 slices. This includes NumPy arrays, but also variables from 

27 Scientific.IO.NetCDF or pynetcdf for example. 

28 

29 Parameters 

30 ---------- 

31 var : array_like 

32 The object to iterate over. 

33 buf_size : int, optional 

34 The buffer size. If `buf_size` is supplied, the maximum amount of 

35 data that will be read into memory is `buf_size` elements. 

36 Default is None, which will read as many element as possible 

37 into memory. 

38 

39 Attributes 

40 ---------- 

41 var 

42 buf_size 

43 start 

44 stop 

45 step 

46 shape 

47 flat 

48 

49 See Also 

50 -------- 

51 numpy.ndenumerate : Multidimensional array iterator. 

52 numpy.flatiter : Flat array iterator. 

53 numpy.memmap : Create a memory-map to an array stored 

54 in a binary file on disk. 

55 

56 Notes 

57 ----- 

58 The algorithm works by first finding a "running dimension", along which 

59 the blocks will be extracted. Given an array of dimensions 

60 ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the 

61 first dimension will be used. If, on the other hand, 

62 ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on. 

63 Blocks are extracted along this dimension, and when the last block is 

64 returned the process continues from the next dimension, until all 

65 elements have been read. 

66 

67 Examples 

68 -------- 

69 >>> import numpy as np 

70 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) 

71 >>> a_itor = np.lib.Arrayterator(a, 2) 

72 >>> a_itor.shape 

73 (3, 4, 5, 6) 

74 

75 Now we can iterate over ``a_itor``, and it will return arrays of size 

76 two. Since `buf_size` was smaller than any dimension, the first 

77 dimension will be iterated over first: 

78 

79 >>> for subarr in a_itor: 

80 ... if not subarr.all(): 

81 ... print(subarr, subarr.shape) # doctest: +SKIP 

82 >>> # [[[[0 1]]]] (1, 1, 1, 2) 

83 

84 """ 

85 

86 __module__ = "numpy.lib" 

87 

88 def __init__(self, var, buf_size=None): 

89 self.var = var 

90 self.buf_size = buf_size 

91 

92 self.start = [0 for dim in var.shape] 

93 self.stop = list(var.shape) 

94 self.step = [1 for dim in var.shape] 

95 

96 def __getattr__(self, attr): 

97 return getattr(self.var, attr) 

98 

99 def __getitem__(self, index): 

100 """ 

101 Return a new arrayterator. 

102 

103 """ 

104 # Fix index, handling ellipsis and incomplete slices. 

105 if not isinstance(index, tuple): 

106 index = (index,) 

107 fixed = [] 

108 length, dims = len(index), self.ndim 

109 for slice_ in index: 

110 if slice_ is Ellipsis: 

111 fixed.extend([slice(None)] * (dims-length+1)) 

112 length = len(fixed) 

113 elif isinstance(slice_, int): 

114 fixed.append(slice(slice_, slice_+1, 1)) 

115 else: 

116 fixed.append(slice_) 

117 index = tuple(fixed) 

118 if len(index) < dims: 

119 index += (slice(None),) * (dims-len(index)) 

120 

121 # Return a new arrayterator object. 

122 out = self.__class__(self.var, self.buf_size) 

123 for i, (start, stop, step, slice_) in enumerate( 

124 zip(self.start, self.stop, self.step, index)): 

125 out.start[i] = start + (slice_.start or 0) 

126 out.step[i] = step * (slice_.step or 1) 

127 out.stop[i] = start + (slice_.stop or stop-start) 

128 out.stop[i] = min(stop, out.stop[i]) 

129 return out 

130 

131 def __array__(self, dtype=None, copy=None): 

132 """ 

133 Return corresponding data. 

134 

135 """ 

136 slice_ = tuple(slice(*t) for t in zip( 

137 self.start, self.stop, self.step)) 

138 return self.var[slice_] 

139 

140 @property 

141 def flat(self): 

142 """ 

143 A 1-D flat iterator for Arrayterator objects. 

144 

145 This iterator returns elements of the array to be iterated over in 

146 `~lib.Arrayterator` one by one. 

147 It is similar to `flatiter`. 

148 

149 See Also 

150 -------- 

151 lib.Arrayterator 

152 flatiter 

153 

154 Examples 

155 -------- 

156 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) 

157 >>> a_itor = np.lib.Arrayterator(a, 2) 

158 

159 >>> for subarr in a_itor.flat: 

160 ... if not subarr: 

161 ... print(subarr, type(subarr)) 

162 ... 

163 0 <class 'numpy.int64'> 

164 

165 """ 

166 for block in self: 

167 yield from block.flat 

168 

169 @property 

170 def shape(self): 

171 """ 

172 The shape of the array to be iterated over. 

173 

174 For an example, see `Arrayterator`. 

175 

176 """ 

177 return tuple(((stop-start-1)//step+1) for start, stop, step in 

178 zip(self.start, self.stop, self.step)) 

179 

180 def __iter__(self): 

181 # Skip arrays with degenerate dimensions 

182 if [dim for dim in self.shape if dim <= 0]: 

183 return 

184 

185 start = self.start[:] 

186 stop = self.stop[:] 

187 step = self.step[:] 

188 ndims = self.var.ndim 

189 

190 while True: 

191 count = self.buf_size or reduce(mul, self.shape) 

192 

193 # iterate over each dimension, looking for the 

194 # running dimension (ie, the dimension along which 

195 # the blocks will be built from) 

196 rundim = 0 

197 for i in range(ndims-1, -1, -1): 

198 # if count is zero we ran out of elements to read 

199 # along higher dimensions, so we read only a single position 

200 if count == 0: 

201 stop[i] = start[i]+1 

202 elif count <= self.shape[i]: 

203 # limit along this dimension 

204 stop[i] = start[i] + count*step[i] 

205 rundim = i 

206 else: 

207 # read everything along this dimension 

208 stop[i] = self.stop[i] 

209 stop[i] = min(self.stop[i], stop[i]) 

210 count = count//self.shape[i] 

211 

212 # yield a block 

213 slice_ = tuple(slice(*t) for t in zip(start, stop, step)) 

214 yield self.var[slice_] 

215 

216 # Update start position, taking care of overflow to 

217 # other dimensions 

218 start[rundim] = stop[rundim] # start where we stopped 

219 for i in range(ndims-1, 0, -1): 

220 if start[i] >= self.stop[i]: 

221 start[i] = self.start[i] 

222 start[i-1] += self.step[i-1] 

223 if start[0] >= self.stop[0]: 

224 return