Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/_core/memmap.py: 19%

95 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-09 06:12 +0000

1from contextlib import nullcontext 

2import operator 

3import numpy as np 

4from .._utils import set_module 

5from .numeric import uint8, ndarray, dtype 

6 

7__all__ = ['memmap'] 

8 

9dtypedescr = dtype 

10valid_filemodes = ["r", "c", "r+", "w+"] 

11writeable_filemodes = ["r+", "w+"] 

12 

13mode_equivalents = { 

14 "readonly":"r", 

15 "copyonwrite":"c", 

16 "readwrite":"r+", 

17 "write":"w+" 

18 } 

19 

20 

21@set_module('numpy') 

22class memmap(ndarray): 

23 """Create a memory-map to an array stored in a *binary* file on disk. 

24 

25 Memory-mapped files are used for accessing small segments of large files 

26 on disk, without reading the entire file into memory. NumPy's 

27 memmap's are array-like objects. This differs from Python's ``mmap`` 

28 module, which uses file-like objects. 

29 

30 This subclass of ndarray has some unpleasant interactions with 

31 some operations, because it doesn't quite fit properly as a subclass. 

32 An alternative to using this subclass is to create the ``mmap`` 

33 object yourself, then create an ndarray with ndarray.__new__ directly, 

34 passing the object created in its 'buffer=' parameter. 

35 

36 This class may at some point be turned into a factory function 

37 which returns a view into an mmap buffer. 

38 

39 Flush the memmap instance to write the changes to the file. Currently there 

40 is no API to close the underlying ``mmap``. It is tricky to ensure the 

41 resource is actually closed, since it may be shared between different 

42 memmap instances. 

43 

44 

45 Parameters 

46 ---------- 

47 filename : str, file-like object, or pathlib.Path instance 

48 The file name or file object to be used as the array data buffer. 

49 dtype : data-type, optional 

50 The data-type used to interpret the file contents. 

51 Default is `uint8`. 

52 mode : {'r+', 'r', 'w+', 'c'}, optional 

53 The file is opened in this mode: 

54 

55 +------+-------------------------------------------------------------+ 

56 | 'r' | Open existing file for reading only. | 

57 +------+-------------------------------------------------------------+ 

58 | 'r+' | Open existing file for reading and writing. | 

59 +------+-------------------------------------------------------------+ 

60 | 'w+' | Create or overwrite existing file for reading and writing. | 

61 | | If ``mode == 'w+'`` then `shape` must also be specified. | 

62 +------+-------------------------------------------------------------+ 

63 | 'c' | Copy-on-write: assignments affect data in memory, but | 

64 | | changes are not saved to disk. The file on disk is | 

65 | | read-only. | 

66 +------+-------------------------------------------------------------+ 

67 

68 Default is 'r+'. 

69 offset : int, optional 

70 In the file, array data starts at this offset. Since `offset` is 

71 measured in bytes, it should normally be a multiple of the byte-size 

72 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of 

73 file are valid; The file will be extended to accommodate the 

74 additional data. By default, ``memmap`` will start at the beginning of 

75 the file, even if ``filename`` is a file pointer ``fp`` and 

76 ``fp.tell() != 0``. 

77 shape : int or sequence of ints, optional 

78 The desired shape of the array. If ``mode == 'r'`` and the number 

79 of remaining bytes after `offset` is not a multiple of the byte-size 

80 of `dtype`, you must specify `shape`. By default, the returned array 

81 will be 1-D with the number of elements determined by file size 

82 and data-type. 

83 

84 .. versionchanged:: 2.0 

85 The shape parameter can now be any integer sequence type, previously 

86 types were limited to tuple and int. 

87  

88 order : {'C', 'F'}, optional 

89 Specify the order of the ndarray memory layout: 

90 :term:`row-major`, C-style or :term:`column-major`, 

91 Fortran-style. This only has an effect if the shape is 

92 greater than 1-D. The default order is 'C'. 

93 

94 Attributes 

95 ---------- 

96 filename : str or pathlib.Path instance 

97 Path to the mapped file. 

98 offset : int 

99 Offset position in the file. 

100 mode : str 

101 File mode. 

102 

103 Methods 

104 ------- 

105 flush 

106 Flush any changes in memory to file on disk. 

107 When you delete a memmap object, flush is called first to write 

108 changes to disk. 

109 

110 

111 See also 

112 -------- 

113 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

114 

115 Notes 

116 ----- 

117 The memmap object can be used anywhere an ndarray is accepted. 

118 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns 

119 ``True``. 

120 

121 Memory-mapped files cannot be larger than 2GB on 32-bit systems. 

122 

123 When a memmap causes a file to be created or extended beyond its 

124 current size in the filesystem, the contents of the new part are 

125 unspecified. On systems with POSIX filesystem semantics, the extended 

126 part will be filled with zero bytes. 

127 

128 Examples 

129 -------- 

130 >>> data = np.arange(12, dtype='float32') 

131 >>> data.resize((3,4)) 

132 

133 This example uses a temporary file so that doctest doesn't write 

134 files to your directory. You would use a 'normal' filename. 

135 

136 >>> from tempfile import mkdtemp 

137 >>> import os.path as path 

138 >>> filename = path.join(mkdtemp(), 'newfile.dat') 

139 

140 Create a memmap with dtype and shape that matches our data: 

141 

142 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) 

143 >>> fp 

144 memmap([[0., 0., 0., 0.], 

145 [0., 0., 0., 0.], 

146 [0., 0., 0., 0.]], dtype=float32) 

147 

148 Write data to memmap array: 

149 

150 >>> fp[:] = data[:] 

151 >>> fp 

152 memmap([[ 0., 1., 2., 3.], 

153 [ 4., 5., 6., 7.], 

154 [ 8., 9., 10., 11.]], dtype=float32) 

155 

156 >>> fp.filename == path.abspath(filename) 

157 True 

158 

159 Flushes memory changes to disk in order to read them back 

160 

161 >>> fp.flush() 

162 

163 Load the memmap and verify data was stored: 

164 

165 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

166 >>> newfp 

167 memmap([[ 0., 1., 2., 3.], 

168 [ 4., 5., 6., 7.], 

169 [ 8., 9., 10., 11.]], dtype=float32) 

170 

171 Read-only memmap: 

172 

173 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

174 >>> fpr.flags.writeable 

175 False 

176 

177 Copy-on-write memmap: 

178 

179 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) 

180 >>> fpc.flags.writeable 

181 True 

182 

183 It's possible to assign to copy-on-write array, but values are only 

184 written into the memory copy of the array, and not written to disk: 

185 

186 >>> fpc 

187 memmap([[ 0., 1., 2., 3.], 

188 [ 4., 5., 6., 7.], 

189 [ 8., 9., 10., 11.]], dtype=float32) 

190 >>> fpc[0,:] = 0 

191 >>> fpc 

192 memmap([[ 0., 0., 0., 0.], 

193 [ 4., 5., 6., 7.], 

194 [ 8., 9., 10., 11.]], dtype=float32) 

195 

196 File on disk is unchanged: 

197 

198 >>> fpr 

199 memmap([[ 0., 1., 2., 3.], 

200 [ 4., 5., 6., 7.], 

201 [ 8., 9., 10., 11.]], dtype=float32) 

202 

203 Offset into a memmap: 

204 

205 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) 

206 >>> fpo 

207 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) 

208 

209 """ 

210 

211 __array_priority__ = -100.0 

212 

213 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, 

214 shape=None, order='C'): 

215 # Import here to minimize 'import numpy' overhead 

216 import mmap 

217 import os.path 

218 try: 

219 mode = mode_equivalents[mode] 

220 except KeyError as e: 

221 if mode not in valid_filemodes: 

222 raise ValueError( 

223 "mode must be one of {!r} (got {!r})" 

224 .format(valid_filemodes + list(mode_equivalents.keys()), mode) 

225 ) from None 

226 

227 if mode == 'w+' and shape is None: 

228 raise ValueError("shape must be given if mode == 'w+'") 

229 

230 if hasattr(filename, 'read'): 

231 f_ctx = nullcontext(filename) 

232 else: 

233 f_ctx = open( 

234 os.fspath(filename), 

235 ('r' if mode == 'c' else mode)+'b' 

236 ) 

237 

238 with f_ctx as fid: 

239 fid.seek(0, 2) 

240 flen = fid.tell() 

241 descr = dtypedescr(dtype) 

242 _dbytes = descr.itemsize 

243 

244 if shape is None: 

245 bytes = flen - offset 

246 if bytes % _dbytes: 

247 raise ValueError("Size of available data is not a " 

248 "multiple of the data-type size.") 

249 size = bytes // _dbytes 

250 shape = (size,) 

251 else: 

252 if type(shape) not in (tuple, list): 

253 try: 

254 shape = [operator.index(shape)] 

255 except TypeError: 

256 pass 

257 shape = tuple(shape) 

258 size = np.intp(1) # avoid default choice of np.int_, which might overflow 

259 for k in shape: 

260 size *= k 

261 

262 bytes = int(offset + size*_dbytes) 

263 

264 if mode in ('w+', 'r+') and flen < bytes: 

265 fid.seek(bytes - 1, 0) 

266 fid.write(b'\0') 

267 fid.flush() 

268 

269 if mode == 'c': 

270 acc = mmap.ACCESS_COPY 

271 elif mode == 'r': 

272 acc = mmap.ACCESS_READ 

273 else: 

274 acc = mmap.ACCESS_WRITE 

275 

276 start = offset - offset % mmap.ALLOCATIONGRANULARITY 

277 bytes -= start 

278 array_offset = offset - start 

279 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) 

280 

281 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, 

282 offset=array_offset, order=order) 

283 self._mmap = mm 

284 self.offset = offset 

285 self.mode = mode 

286 

287 if isinstance(filename, os.PathLike): 

288 # special case - if we were constructed with a pathlib.path, 

289 # then filename is a path object, not a string 

290 self.filename = filename.resolve() 

291 elif hasattr(fid, "name") and isinstance(fid.name, str): 

292 # py3 returns int for TemporaryFile().name 

293 self.filename = os.path.abspath(fid.name) 

294 # same as memmap copies (e.g. memmap + 1) 

295 else: 

296 self.filename = None 

297 

298 return self 

299 

300 def __array_finalize__(self, obj): 

301 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): 

302 self._mmap = obj._mmap 

303 self.filename = obj.filename 

304 self.offset = obj.offset 

305 self.mode = obj.mode 

306 else: 

307 self._mmap = None 

308 self.filename = None 

309 self.offset = None 

310 self.mode = None 

311 

312 def flush(self): 

313 """ 

314 Write any changes in the array to the file on disk. 

315 

316 For further information, see `memmap`. 

317 

318 Parameters 

319 ---------- 

320 None 

321 

322 See Also 

323 -------- 

324 memmap 

325 

326 """ 

327 if self.base is not None and hasattr(self.base, 'flush'): 

328 self.base.flush() 

329 

330 def __array_wrap__(self, arr, context=None, return_scalar=False): 

331 arr = super().__array_wrap__(arr, context) 

332 

333 # Return a memmap if a memmap was given as the output of the 

334 # ufunc. Leave the arr class unchanged if self is not a memmap 

335 # to keep original memmap subclasses behavior 

336 if self is arr or type(self) is not memmap: 

337 return arr 

338 

339 # Return scalar instead of 0d memmap, e.g. for np.sum with 

340 # axis=None (note that subclasses will not reach here) 

341 if return_scalar: 

342 return arr[()] 

343 

344 # Return ndarray otherwise 

345 return arr.view(np.ndarray) 

346 

347 def __getitem__(self, index): 

348 res = super().__getitem__(index) 

349 if type(res) is memmap and res._mmap is None: 

350 return res.view(type=ndarray) 

351 return res