Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_arrayterator_impl.py: 18%

71 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-09 06:12 +0000

1""" 

2A buffered iterator for big arrays. 

3 

4This module solves the problem of iterating over a big file-based array 

5without having to read it into memory. The `Arrayterator` class wraps 

6an array object, and when iterated it will return sub-arrays with at most 

7a user-specified number of elements. 

8 

9""" 

10from operator import mul 

11from functools import reduce 

12 

13__all__ = ['Arrayterator'] 

14 

15 

16class Arrayterator: 

17 """ 

18 Buffered iterator for big arrays. 

19 

20 `Arrayterator` creates a buffered iterator for reading big arrays in small 

21 contiguous blocks. The class is useful for objects stored in the 

22 file system. It allows iteration over the object *without* reading 

23 everything in memory; instead, small blocks are read and iterated over. 

24 

25 `Arrayterator` can be used with any object that supports multidimensional 

26 slices. This includes NumPy arrays, but also variables from 

27 Scientific.IO.NetCDF or pynetcdf for example. 

28 

29 Parameters 

30 ---------- 

31 var : array_like 

32 The object to iterate over. 

33 buf_size : int, optional 

34 The buffer size. If `buf_size` is supplied, the maximum amount of 

35 data that will be read into memory is `buf_size` elements. 

36 Default is None, which will read as many element as possible 

37 into memory. 

38 

39 Attributes 

40 ---------- 

41 var 

42 buf_size 

43 start 

44 stop 

45 step 

46 shape 

47 flat 

48 

49 See Also 

50 -------- 

51 numpy.ndenumerate : Multidimensional array iterator. 

52 numpy.flatiter : Flat array iterator. 

53 numpy.memmap : Create a memory-map to an array stored 

54 in a binary file on disk. 

55 

56 Notes 

57 ----- 

58 The algorithm works by first finding a "running dimension", along which 

59 the blocks will be extracted. Given an array of dimensions 

60 ``(d1, d2, ..., dn)``, e.g. if `buf_size` is smaller than ``d1``, the 

61 first dimension will be used. If, on the other hand, 

62 ``d1 < buf_size < d1*d2`` the second dimension will be used, and so on. 

63 Blocks are extracted along this dimension, and when the last block is 

64 returned the process continues from the next dimension, until all 

65 elements have been read. 

66 

67 Examples 

68 -------- 

69 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) 

70 >>> a_itor = np.lib.Arrayterator(a, 2) 

71 >>> a_itor.shape 

72 (3, 4, 5, 6) 

73 

74 Now we can iterate over ``a_itor``, and it will return arrays of size 

75 two. Since `buf_size` was smaller than any dimension, the first 

76 dimension will be iterated over first: 

77 

78 >>> for subarr in a_itor: 

79 ... if not subarr.all(): 

80 ... print(subarr, subarr.shape) # doctest: +SKIP 

81 >>> # [[[[0 1]]]] (1, 1, 1, 2) 

82 

83 """ 

84 

85 def __init__(self, var, buf_size=None): 

86 self.var = var 

87 self.buf_size = buf_size 

88 

89 self.start = [0 for dim in var.shape] 

90 self.stop = [dim for dim in var.shape] 

91 self.step = [1 for dim in var.shape] 

92 

93 def __getattr__(self, attr): 

94 return getattr(self.var, attr) 

95 

96 def __getitem__(self, index): 

97 """ 

98 Return a new arrayterator. 

99 

100 """ 

101 # Fix index, handling ellipsis and incomplete slices. 

102 if not isinstance(index, tuple): 

103 index = (index,) 

104 fixed = [] 

105 length, dims = len(index), self.ndim 

106 for slice_ in index: 

107 if slice_ is Ellipsis: 

108 fixed.extend([slice(None)] * (dims-length+1)) 

109 length = len(fixed) 

110 elif isinstance(slice_, int): 

111 fixed.append(slice(slice_, slice_+1, 1)) 

112 else: 

113 fixed.append(slice_) 

114 index = tuple(fixed) 

115 if len(index) < dims: 

116 index += (slice(None),) * (dims-len(index)) 

117 

118 # Return a new arrayterator object. 

119 out = self.__class__(self.var, self.buf_size) 

120 for i, (start, stop, step, slice_) in enumerate( 

121 zip(self.start, self.stop, self.step, index)): 

122 out.start[i] = start + (slice_.start or 0) 

123 out.step[i] = step * (slice_.step or 1) 

124 out.stop[i] = start + (slice_.stop or stop-start) 

125 out.stop[i] = min(stop, out.stop[i]) 

126 return out 

127 

128 def __array__(self, dtype=None, copy=None): 

129 """ 

130 Return corresponding data. 

131 

132 """ 

133 slice_ = tuple(slice(*t) for t in zip( 

134 self.start, self.stop, self.step)) 

135 return self.var[slice_] 

136 

137 @property 

138 def flat(self): 

139 """ 

140 A 1-D flat iterator for Arrayterator objects. 

141 

142 This iterator returns elements of the array to be iterated over in 

143 `~lib.Arrayterator` one by one.  

144 It is similar to `flatiter`. 

145 

146 See Also 

147 -------- 

148 lib.Arrayterator 

149 flatiter 

150 

151 Examples 

152 -------- 

153 >>> a = np.arange(3 * 4 * 5 * 6).reshape(3, 4, 5, 6) 

154 >>> a_itor = np.lib.Arrayterator(a, 2) 

155 

156 >>> for subarr in a_itor.flat: 

157 ... if not subarr: 

158 ... print(subarr, type(subarr)) 

159 ... 

160 0 <class 'numpy.int64'> 

161 

162 """ 

163 for block in self: 

164 yield from block.flat 

165 

166 @property 

167 def shape(self): 

168 """ 

169 The shape of the array to be iterated over. 

170 

171 For an example, see `Arrayterator`. 

172 

173 """ 

174 return tuple(((stop-start-1)//step+1) for start, stop, step in 

175 zip(self.start, self.stop, self.step)) 

176 

177 def __iter__(self): 

178 # Skip arrays with degenerate dimensions 

179 if [dim for dim in self.shape if dim <= 0]: 

180 return 

181 

182 start = self.start[:] 

183 stop = self.stop[:] 

184 step = self.step[:] 

185 ndims = self.var.ndim 

186 

187 while True: 

188 count = self.buf_size or reduce(mul, self.shape) 

189 

190 # iterate over each dimension, looking for the 

191 # running dimension (ie, the dimension along which 

192 # the blocks will be built from) 

193 rundim = 0 

194 for i in range(ndims-1, -1, -1): 

195 # if count is zero we ran out of elements to read 

196 # along higher dimensions, so we read only a single position 

197 if count == 0: 

198 stop[i] = start[i]+1 

199 elif count <= self.shape[i]: 

200 # limit along this dimension 

201 stop[i] = start[i] + count*step[i] 

202 rundim = i 

203 else: 

204 # read everything along this dimension 

205 stop[i] = self.stop[i] 

206 stop[i] = min(self.stop[i], stop[i]) 

207 count = count//self.shape[i] 

208 

209 # yield a block 

210 slice_ = tuple(slice(*t) for t in zip(start, stop, step)) 

211 yield self.var[slice_] 

212 

213 # Update start position, taking care of overflow to 

214 # other dimensions 

215 start[rundim] = stop[rundim] # start where we stopped 

216 for i in range(ndims-1, 0, -1): 

217 if start[i] >= self.stop[i]: 

218 start[i] = self.start[i] 

219 start[i-1] += self.step[i-1] 

220 if start[0] >= self.stop[0]: 

221 return