Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/numpy/_core/memmap.py: 18%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

101 statements  

1import operator 

2from contextlib import nullcontext 

3 

4import numpy as np 

5from numpy._utils import set_module 

6 

7from .numeric import dtype, ndarray, uint8 

8 

9__all__ = ['memmap'] 

10 

11dtypedescr = dtype 

12valid_filemodes = ["r", "c", "r+", "w+"] 

13writeable_filemodes = ["r+", "w+"] 

14 

15mode_equivalents = { 

16 "readonly": "r", 

17 "copyonwrite": "c", 

18 "readwrite": "r+", 

19 "write": "w+" 

20 } 

21 

22 

23@set_module('numpy') 

24class memmap(ndarray): 

25 """Create a memory-map to an array stored in a *binary* file on disk. 

26 

27 Memory-mapped files are used for accessing small segments of large files 

28 on disk, without reading the entire file into memory. NumPy's 

29 memmap's are array-like objects. This differs from Python's ``mmap`` 

30 module, which uses file-like objects. 

31 

32 This subclass of ndarray has some unpleasant interactions with 

33 some operations, because it doesn't quite fit properly as a subclass. 

34 An alternative to using this subclass is to create the ``mmap`` 

35 object yourself, then create an ndarray with ndarray.__new__ directly, 

36 passing the object created in its 'buffer=' parameter. 

37 

38 This class may at some point be turned into a factory function 

39 which returns a view into an mmap buffer. 

40 

41 Flush the memmap instance to write the changes to the file. Currently there 

42 is no API to close the underlying ``mmap``. It is tricky to ensure the 

43 resource is actually closed, since it may be shared between different 

44 memmap instances. 

45 

46 

47 Parameters 

48 ---------- 

49 filename : str, file-like object, or pathlib.Path instance 

50 The file name or file object to be used as the array data buffer. 

51 dtype : data-type, optional 

52 The data-type used to interpret the file contents. 

53 Default is `uint8`. 

54 mode : {'r+', 'r', 'w+', 'c'}, optional 

55 The file is opened in this mode: 

56 

57 +------+-------------------------------------------------------------+ 

58 | 'r' | Open existing file for reading only. | 

59 +------+-------------------------------------------------------------+ 

60 | 'r+' | Open existing file for reading and writing. | 

61 +------+-------------------------------------------------------------+ 

62 | 'w+' | Create or overwrite existing file for reading and writing. | 

63 | | If ``mode == 'w+'`` then `shape` must also be specified. | 

64 +------+-------------------------------------------------------------+ 

65 | 'c' | Copy-on-write: assignments affect data in memory, but | 

66 | | changes are not saved to disk. The file on disk is | 

67 | | read-only. | 

68 +------+-------------------------------------------------------------+ 

69 

70 Default is 'r+'. 

71 offset : int, optional 

72 In the file, array data starts at this offset. Since `offset` is 

73 measured in bytes, it should normally be a multiple of the byte-size 

74 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of 

75 file are valid; The file will be extended to accommodate the 

76 additional data. By default, ``memmap`` will start at the beginning of 

77 the file, even if ``filename`` is a file pointer ``fp`` and 

78 ``fp.tell() != 0``. 

79 shape : int or sequence of ints, optional 

80 The desired shape of the array. If ``mode == 'r'`` and the number 

81 of remaining bytes after `offset` is not a multiple of the byte-size 

82 of `dtype`, you must specify `shape`. By default, the returned array 

83 will be 1-D with the number of elements determined by file size 

84 and data-type. 

85 

86 .. versionchanged:: 2.0 

87 The shape parameter can now be any integer sequence type, previously 

88 types were limited to tuple and int. 

89 

90 order : {'C', 'F'}, optional 

91 Specify the order of the ndarray memory layout: 

92 :term:`row-major`, C-style or :term:`column-major`, 

93 Fortran-style. This only has an effect if the shape is 

94 greater than 1-D. The default order is 'C'. 

95 

96 Attributes 

97 ---------- 

98 filename : str or pathlib.Path instance 

99 Path to the mapped file. 

100 offset : int 

101 Offset position in the file. 

102 mode : str 

103 File mode. 

104 

105 Methods 

106 ------- 

107 flush 

108 Flush any changes in memory to file on disk. 

109 When you delete a memmap object, flush is called first to write 

110 changes to disk. 

111 

112 

113 See also 

114 -------- 

115 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

116 

117 Notes 

118 ----- 

119 The memmap object can be used anywhere an ndarray is accepted. 

120 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns 

121 ``True``. 

122 

123 Memory-mapped files cannot be larger than 2GB on 32-bit systems. 

124 

125 When a memmap causes a file to be created or extended beyond its 

126 current size in the filesystem, the contents of the new part are 

127 unspecified. On systems with POSIX filesystem semantics, the extended 

128 part will be filled with zero bytes. 

129 

130 Examples 

131 -------- 

132 >>> import numpy as np 

133 >>> data = np.arange(12, dtype='float32') 

134 >>> data.resize((3,4)) 

135 

136 This example uses a temporary file so that doctest doesn't write 

137 files to your directory. You would use a 'normal' filename. 

138 

139 >>> from tempfile import mkdtemp 

140 >>> import os.path as path 

141 >>> filename = path.join(mkdtemp(), 'newfile.dat') 

142 

143 Create a memmap with dtype and shape that matches our data: 

144 

145 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) 

146 >>> fp 

147 memmap([[0., 0., 0., 0.], 

148 [0., 0., 0., 0.], 

149 [0., 0., 0., 0.]], dtype=float32) 

150 

151 Write data to memmap array: 

152 

153 >>> fp[:] = data[:] 

154 >>> fp 

155 memmap([[ 0., 1., 2., 3.], 

156 [ 4., 5., 6., 7.], 

157 [ 8., 9., 10., 11.]], dtype=float32) 

158 

159 >>> fp.filename == path.abspath(filename) 

160 True 

161 

162 Flushes memory changes to disk in order to read them back 

163 

164 >>> fp.flush() 

165 

166 Load the memmap and verify data was stored: 

167 

168 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

169 >>> newfp 

170 memmap([[ 0., 1., 2., 3.], 

171 [ 4., 5., 6., 7.], 

172 [ 8., 9., 10., 11.]], dtype=float32) 

173 

174 Read-only memmap: 

175 

176 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

177 >>> fpr.flags.writeable 

178 False 

179 

180 Copy-on-write memmap: 

181 

182 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) 

183 >>> fpc.flags.writeable 

184 True 

185 

186 It's possible to assign to copy-on-write array, but values are only 

187 written into the memory copy of the array, and not written to disk: 

188 

189 >>> fpc 

190 memmap([[ 0., 1., 2., 3.], 

191 [ 4., 5., 6., 7.], 

192 [ 8., 9., 10., 11.]], dtype=float32) 

193 >>> fpc[0,:] = 0 

194 >>> fpc 

195 memmap([[ 0., 0., 0., 0.], 

196 [ 4., 5., 6., 7.], 

197 [ 8., 9., 10., 11.]], dtype=float32) 

198 

199 File on disk is unchanged: 

200 

201 >>> fpr 

202 memmap([[ 0., 1., 2., 3.], 

203 [ 4., 5., 6., 7.], 

204 [ 8., 9., 10., 11.]], dtype=float32) 

205 

206 Offset into a memmap: 

207 

208 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) 

209 >>> fpo 

210 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) 

211 

212 """ 

213 

214 __array_priority__ = -100.0 

215 

216 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, 

217 shape=None, order='C'): 

218 # Import here to minimize 'import numpy' overhead 

219 import mmap 

220 import os.path 

221 try: 

222 mode = mode_equivalents[mode] 

223 except KeyError as e: 

224 if mode not in valid_filemodes: 

225 all_modes = valid_filemodes + list(mode_equivalents.keys()) 

226 raise ValueError( 

227 f"mode must be one of {all_modes!r} (got {mode!r})" 

228 ) from None 

229 

230 if mode == 'w+' and shape is None: 

231 raise ValueError("shape must be given if mode == 'w+'") 

232 

233 if hasattr(filename, 'read'): 

234 f_ctx = nullcontext(filename) 

235 else: 

236 f_ctx = open( 

237 os.fspath(filename), 

238 ('r' if mode == 'c' else mode) + 'b' 

239 ) 

240 

241 with f_ctx as fid: 

242 fid.seek(0, 2) 

243 flen = fid.tell() 

244 descr = dtypedescr(dtype) 

245 _dbytes = descr.itemsize 

246 

247 if shape is None: 

248 bytes = flen - offset 

249 if bytes % _dbytes: 

250 raise ValueError("Size of available data is not a " 

251 "multiple of the data-type size.") 

252 size = bytes // _dbytes 

253 shape = (size,) 

254 else: 

255 if not isinstance(shape, (tuple, list)): 

256 try: 

257 shape = [operator.index(shape)] 

258 except TypeError: 

259 pass 

260 shape = tuple(shape) 

261 size = np.intp(1) # avoid overflows 

262 for k in shape: 

263 size *= k 

264 

265 bytes = int(offset + size * _dbytes) 

266 

267 if mode in ('w+', 'r+'): 

268 # gh-27723 

269 # if bytes == 0, we write out 1 byte to allow empty memmap. 

270 bytes = max(bytes, 1) 

271 if flen < bytes: 

272 fid.seek(bytes - 1, 0) 

273 fid.write(b'\0') 

274 fid.flush() 

275 

276 if mode == 'c': 

277 acc = mmap.ACCESS_COPY 

278 elif mode == 'r': 

279 acc = mmap.ACCESS_READ 

280 else: 

281 acc = mmap.ACCESS_WRITE 

282 

283 start = offset - offset % mmap.ALLOCATIONGRANULARITY 

284 bytes -= start 

285 # bytes == 0 is problematic as in mmap length=0 maps the full file. 

286 # See PR gh-27723 for a more detailed explanation. 

287 if bytes == 0 and start > 0: 

288 bytes += mmap.ALLOCATIONGRANULARITY 

289 start -= mmap.ALLOCATIONGRANULARITY 

290 array_offset = offset - start 

291 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) 

292 

293 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, 

294 offset=array_offset, order=order) 

295 self._mmap = mm 

296 self.offset = offset 

297 self.mode = mode 

298 

299 if isinstance(filename, os.PathLike): 

300 # special case - if we were constructed with a pathlib.path, 

301 # then filename is a path object, not a string 

302 self.filename = filename.resolve() 

303 elif hasattr(fid, "name") and isinstance(fid.name, str): 

304 # py3 returns int for TemporaryFile().name 

305 self.filename = os.path.abspath(fid.name) 

306 # same as memmap copies (e.g. memmap + 1) 

307 else: 

308 self.filename = None 

309 

310 return self 

311 

312 def __array_finalize__(self, obj): 

313 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): 

314 self._mmap = obj._mmap 

315 self.filename = obj.filename 

316 self.offset = obj.offset 

317 self.mode = obj.mode 

318 else: 

319 self._mmap = None 

320 self.filename = None 

321 self.offset = None 

322 self.mode = None 

323 

324 def flush(self): 

325 """ 

326 Write any changes in the array to the file on disk. 

327 

328 For further information, see `memmap`. 

329 

330 Parameters 

331 ---------- 

332 None 

333 

334 See Also 

335 -------- 

336 memmap 

337 

338 """ 

339 if self.base is not None and hasattr(self.base, 'flush'): 

340 self.base.flush() 

341 

342 def __array_wrap__(self, arr, context=None, return_scalar=False): 

343 arr = super().__array_wrap__(arr, context) 

344 

345 # Return a memmap if a memmap was given as the output of the 

346 # ufunc. Leave the arr class unchanged if self is not a memmap 

347 # to keep original memmap subclasses behavior 

348 if self is arr or type(self) is not memmap: 

349 return arr 

350 

351 # Return scalar instead of 0d memmap, e.g. for np.sum with 

352 # axis=None (note that subclasses will not reach here) 

353 if return_scalar: 

354 return arr[()] 

355 

356 # Return ndarray otherwise 

357 return arr.view(np.ndarray) 

358 

359 def __getitem__(self, index): 

360 res = super().__getitem__(index) 

361 if type(res) is memmap and res._mmap is None: 

362 return res.view(type=ndarray) 

363 return res