Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/_core/memmap.py: 18%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

100 statements  

1from contextlib import nullcontext 

2import operator 

3import numpy as np 

4from .._utils import set_module 

5from .numeric import uint8, ndarray, dtype 

6 

7__all__ = ['memmap'] 

8 

9dtypedescr = dtype 

10valid_filemodes = ["r", "c", "r+", "w+"] 

11writeable_filemodes = ["r+", "w+"] 

12 

13mode_equivalents = { 

14 "readonly":"r", 

15 "copyonwrite":"c", 

16 "readwrite":"r+", 

17 "write":"w+" 

18 } 

19 

20 

21@set_module('numpy') 

22class memmap(ndarray): 

23 """Create a memory-map to an array stored in a *binary* file on disk. 

24 

25 Memory-mapped files are used for accessing small segments of large files 

26 on disk, without reading the entire file into memory. NumPy's 

27 memmap's are array-like objects. This differs from Python's ``mmap`` 

28 module, which uses file-like objects. 

29 

30 This subclass of ndarray has some unpleasant interactions with 

31 some operations, because it doesn't quite fit properly as a subclass. 

32 An alternative to using this subclass is to create the ``mmap`` 

33 object yourself, then create an ndarray with ndarray.__new__ directly, 

34 passing the object created in its 'buffer=' parameter. 

35 

36 This class may at some point be turned into a factory function 

37 which returns a view into an mmap buffer. 

38 

39 Flush the memmap instance to write the changes to the file. Currently there 

40 is no API to close the underlying ``mmap``. It is tricky to ensure the 

41 resource is actually closed, since it may be shared between different 

42 memmap instances. 

43 

44 

45 Parameters 

46 ---------- 

47 filename : str, file-like object, or pathlib.Path instance 

48 The file name or file object to be used as the array data buffer. 

49 dtype : data-type, optional 

50 The data-type used to interpret the file contents. 

51 Default is `uint8`. 

52 mode : {'r+', 'r', 'w+', 'c'}, optional 

53 The file is opened in this mode: 

54 

55 +------+-------------------------------------------------------------+ 

56 | 'r' | Open existing file for reading only. | 

57 +------+-------------------------------------------------------------+ 

58 | 'r+' | Open existing file for reading and writing. | 

59 +------+-------------------------------------------------------------+ 

60 | 'w+' | Create or overwrite existing file for reading and writing. | 

61 | | If ``mode == 'w+'`` then `shape` must also be specified. | 

62 +------+-------------------------------------------------------------+ 

63 | 'c' | Copy-on-write: assignments affect data in memory, but | 

64 | | changes are not saved to disk. The file on disk is | 

65 | | read-only. | 

66 +------+-------------------------------------------------------------+ 

67 

68 Default is 'r+'. 

69 offset : int, optional 

70 In the file, array data starts at this offset. Since `offset` is 

71 measured in bytes, it should normally be a multiple of the byte-size 

72 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of 

73 file are valid; The file will be extended to accommodate the 

74 additional data. By default, ``memmap`` will start at the beginning of 

75 the file, even if ``filename`` is a file pointer ``fp`` and 

76 ``fp.tell() != 0``. 

77 shape : int or sequence of ints, optional 

78 The desired shape of the array. If ``mode == 'r'`` and the number 

79 of remaining bytes after `offset` is not a multiple of the byte-size 

80 of `dtype`, you must specify `shape`. By default, the returned array 

81 will be 1-D with the number of elements determined by file size 

82 and data-type. 

83 

84 .. versionchanged:: 2.0 

85 The shape parameter can now be any integer sequence type, previously 

86 types were limited to tuple and int. 

87 

88 order : {'C', 'F'}, optional 

89 Specify the order of the ndarray memory layout: 

90 :term:`row-major`, C-style or :term:`column-major`, 

91 Fortran-style. This only has an effect if the shape is 

92 greater than 1-D. The default order is 'C'. 

93 

94 Attributes 

95 ---------- 

96 filename : str or pathlib.Path instance 

97 Path to the mapped file. 

98 offset : int 

99 Offset position in the file. 

100 mode : str 

101 File mode. 

102 

103 Methods 

104 ------- 

105 flush 

106 Flush any changes in memory to file on disk. 

107 When you delete a memmap object, flush is called first to write 

108 changes to disk. 

109 

110 

111 See also 

112 -------- 

113 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

114 

115 Notes 

116 ----- 

117 The memmap object can be used anywhere an ndarray is accepted. 

118 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns 

119 ``True``. 

120 

121 Memory-mapped files cannot be larger than 2GB on 32-bit systems. 

122 

123 When a memmap causes a file to be created or extended beyond its 

124 current size in the filesystem, the contents of the new part are 

125 unspecified. On systems with POSIX filesystem semantics, the extended 

126 part will be filled with zero bytes. 

127 

128 Examples 

129 -------- 

130 >>> import numpy as np 

131 >>> data = np.arange(12, dtype='float32') 

132 >>> data.resize((3,4)) 

133 

134 This example uses a temporary file so that doctest doesn't write 

135 files to your directory. You would use a 'normal' filename. 

136 

137 >>> from tempfile import mkdtemp 

138 >>> import os.path as path 

139 >>> filename = path.join(mkdtemp(), 'newfile.dat') 

140 

141 Create a memmap with dtype and shape that matches our data: 

142 

143 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) 

144 >>> fp 

145 memmap([[0., 0., 0., 0.], 

146 [0., 0., 0., 0.], 

147 [0., 0., 0., 0.]], dtype=float32) 

148 

149 Write data to memmap array: 

150 

151 >>> fp[:] = data[:] 

152 >>> fp 

153 memmap([[ 0., 1., 2., 3.], 

154 [ 4., 5., 6., 7.], 

155 [ 8., 9., 10., 11.]], dtype=float32) 

156 

157 >>> fp.filename == path.abspath(filename) 

158 True 

159 

160 Flushes memory changes to disk in order to read them back 

161 

162 >>> fp.flush() 

163 

164 Load the memmap and verify data was stored: 

165 

166 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

167 >>> newfp 

168 memmap([[ 0., 1., 2., 3.], 

169 [ 4., 5., 6., 7.], 

170 [ 8., 9., 10., 11.]], dtype=float32) 

171 

172 Read-only memmap: 

173 

174 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

175 >>> fpr.flags.writeable 

176 False 

177 

178 Copy-on-write memmap: 

179 

180 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) 

181 >>> fpc.flags.writeable 

182 True 

183 

184 It's possible to assign to copy-on-write array, but values are only 

185 written into the memory copy of the array, and not written to disk: 

186 

187 >>> fpc 

188 memmap([[ 0., 1., 2., 3.], 

189 [ 4., 5., 6., 7.], 

190 [ 8., 9., 10., 11.]], dtype=float32) 

191 >>> fpc[0,:] = 0 

192 >>> fpc 

193 memmap([[ 0., 0., 0., 0.], 

194 [ 4., 5., 6., 7.], 

195 [ 8., 9., 10., 11.]], dtype=float32) 

196 

197 File on disk is unchanged: 

198 

199 >>> fpr 

200 memmap([[ 0., 1., 2., 3.], 

201 [ 4., 5., 6., 7.], 

202 [ 8., 9., 10., 11.]], dtype=float32) 

203 

204 Offset into a memmap: 

205 

206 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) 

207 >>> fpo 

208 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) 

209 

210 """ 

211 

212 __array_priority__ = -100.0 

213 

214 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, 

215 shape=None, order='C'): 

216 # Import here to minimize 'import numpy' overhead 

217 import mmap 

218 import os.path 

219 try: 

220 mode = mode_equivalents[mode] 

221 except KeyError as e: 

222 if mode not in valid_filemodes: 

223 raise ValueError( 

224 "mode must be one of {!r} (got {!r})" 

225 .format(valid_filemodes + list(mode_equivalents.keys()), mode) 

226 ) from None 

227 

228 if mode == 'w+' and shape is None: 

229 raise ValueError("shape must be given if mode == 'w+'") 

230 

231 if hasattr(filename, 'read'): 

232 f_ctx = nullcontext(filename) 

233 else: 

234 f_ctx = open( 

235 os.fspath(filename), 

236 ('r' if mode == 'c' else mode)+'b' 

237 ) 

238 

239 with f_ctx as fid: 

240 fid.seek(0, 2) 

241 flen = fid.tell() 

242 descr = dtypedescr(dtype) 

243 _dbytes = descr.itemsize 

244 

245 if shape is None: 

246 bytes = flen - offset 

247 if bytes % _dbytes: 

248 raise ValueError("Size of available data is not a " 

249 "multiple of the data-type size.") 

250 size = bytes // _dbytes 

251 shape = (size,) 

252 else: 

253 if type(shape) not in (tuple, list): 

254 try: 

255 shape = [operator.index(shape)] 

256 except TypeError: 

257 pass 

258 shape = tuple(shape) 

259 size = np.intp(1) # avoid default choice of np.int_, which might overflow 

260 for k in shape: 

261 size *= k 

262 

263 bytes = int(offset + size*_dbytes) 

264 

265 if mode in ('w+', 'r+'): 

266 # gh-27723 

267 # if bytes == 0, we write out 1 byte to allow empty memmap. 

268 bytes = max(bytes, 1) 

269 if flen < bytes: 

270 fid.seek(bytes - 1, 0) 

271 fid.write(b'\0') 

272 fid.flush() 

273 

274 if mode == 'c': 

275 acc = mmap.ACCESS_COPY 

276 elif mode == 'r': 

277 acc = mmap.ACCESS_READ 

278 else: 

279 acc = mmap.ACCESS_WRITE 

280 

281 start = offset - offset % mmap.ALLOCATIONGRANULARITY 

282 bytes -= start 

283 # bytes == 0 is problematic as in mmap length=0 maps the full file. 

284 # See PR gh-27723 for a more detailed explanation. 

285 if bytes == 0 and start > 0: 

286 bytes += mmap.ALLOCATIONGRANULARITY 

287 start -= mmap.ALLOCATIONGRANULARITY 

288 array_offset = offset - start 

289 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) 

290 

291 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, 

292 offset=array_offset, order=order) 

293 self._mmap = mm 

294 self.offset = offset 

295 self.mode = mode 

296 

297 if isinstance(filename, os.PathLike): 

298 # special case - if we were constructed with a pathlib.path, 

299 # then filename is a path object, not a string 

300 self.filename = filename.resolve() 

301 elif hasattr(fid, "name") and isinstance(fid.name, str): 

302 # py3 returns int for TemporaryFile().name 

303 self.filename = os.path.abspath(fid.name) 

304 # same as memmap copies (e.g. memmap + 1) 

305 else: 

306 self.filename = None 

307 

308 return self 

309 

310 def __array_finalize__(self, obj): 

311 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): 

312 self._mmap = obj._mmap 

313 self.filename = obj.filename 

314 self.offset = obj.offset 

315 self.mode = obj.mode 

316 else: 

317 self._mmap = None 

318 self.filename = None 

319 self.offset = None 

320 self.mode = None 

321 

322 def flush(self): 

323 """ 

324 Write any changes in the array to the file on disk. 

325 

326 For further information, see `memmap`. 

327 

328 Parameters 

329 ---------- 

330 None 

331 

332 See Also 

333 -------- 

334 memmap 

335 

336 """ 

337 if self.base is not None and hasattr(self.base, 'flush'): 

338 self.base.flush() 

339 

340 def __array_wrap__(self, arr, context=None, return_scalar=False): 

341 arr = super().__array_wrap__(arr, context) 

342 

343 # Return a memmap if a memmap was given as the output of the 

344 # ufunc. Leave the arr class unchanged if self is not a memmap 

345 # to keep original memmap subclasses behavior 

346 if self is arr or type(self) is not memmap: 

347 return arr 

348 

349 # Return scalar instead of 0d memmap, e.g. for np.sum with 

350 # axis=None (note that subclasses will not reach here) 

351 if return_scalar: 

352 return arr[()] 

353 

354 # Return ndarray otherwise 

355 return arr.view(np.ndarray) 

356 

357 def __getitem__(self, index): 

358 res = super().__getitem__(index) 

359 if type(res) is memmap and res._mmap is None: 

360 return res.view(type=ndarray) 

361 return res