Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/core/memmap.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

91 statements  

1from contextlib import nullcontext 

2 

3import numpy as np 

4from .._utils import set_module 

5from .numeric import uint8, ndarray, dtype 

6from numpy.compat import os_fspath, is_pathlib_path 

7 

8__all__ = ['memmap'] 

9 

10dtypedescr = dtype 

11valid_filemodes = ["r", "c", "r+", "w+"] 

12writeable_filemodes = ["r+", "w+"] 

13 

14mode_equivalents = { 

15 "readonly":"r", 

16 "copyonwrite":"c", 

17 "readwrite":"r+", 

18 "write":"w+" 

19 } 

20 

21 

22@set_module('numpy') 

23class memmap(ndarray): 

24 """Create a memory-map to an array stored in a *binary* file on disk. 

25 

26 Memory-mapped files are used for accessing small segments of large files 

27 on disk, without reading the entire file into memory. NumPy's 

28 memmap's are array-like objects. This differs from Python's ``mmap`` 

29 module, which uses file-like objects. 

30 

31 This subclass of ndarray has some unpleasant interactions with 

32 some operations, because it doesn't quite fit properly as a subclass. 

33 An alternative to using this subclass is to create the ``mmap`` 

34 object yourself, then create an ndarray with ndarray.__new__ directly, 

35 passing the object created in its 'buffer=' parameter. 

36 

37 This class may at some point be turned into a factory function 

38 which returns a view into an mmap buffer. 

39 

40 Flush the memmap instance to write the changes to the file. Currently there 

41 is no API to close the underlying ``mmap``. It is tricky to ensure the 

42 resource is actually closed, since it may be shared between different 

43 memmap instances. 

44 

45 

46 Parameters 

47 ---------- 

48 filename : str, file-like object, or pathlib.Path instance 

49 The file name or file object to be used as the array data buffer. 

50 dtype : data-type, optional 

51 The data-type used to interpret the file contents. 

52 Default is `uint8`. 

53 mode : {'r+', 'r', 'w+', 'c'}, optional 

54 The file is opened in this mode: 

55 

56 +------+-------------------------------------------------------------+ 

57 | 'r' | Open existing file for reading only. | 

58 +------+-------------------------------------------------------------+ 

59 | 'r+' | Open existing file for reading and writing. | 

60 +------+-------------------------------------------------------------+ 

61 | 'w+' | Create or overwrite existing file for reading and writing. | 

62 | | If ``mode == 'w+'`` then `shape` must also be specified. | 

63 +------+-------------------------------------------------------------+ 

64 | 'c' | Copy-on-write: assignments affect data in memory, but | 

65 | | changes are not saved to disk. The file on disk is | 

66 | | read-only. | 

67 +------+-------------------------------------------------------------+ 

68 

69 Default is 'r+'. 

70 offset : int, optional 

71 In the file, array data starts at this offset. Since `offset` is 

72 measured in bytes, it should normally be a multiple of the byte-size 

73 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of 

74 file are valid; The file will be extended to accommodate the 

75 additional data. By default, ``memmap`` will start at the beginning of 

76 the file, even if ``filename`` is a file pointer ``fp`` and 

77 ``fp.tell() != 0``. 

78 shape : tuple, optional 

79 The desired shape of the array. If ``mode == 'r'`` and the number 

80 of remaining bytes after `offset` is not a multiple of the byte-size 

81 of `dtype`, you must specify `shape`. By default, the returned array 

82 will be 1-D with the number of elements determined by file size 

83 and data-type. 

84 order : {'C', 'F'}, optional 

85 Specify the order of the ndarray memory layout: 

86 :term:`row-major`, C-style or :term:`column-major`, 

87 Fortran-style. This only has an effect if the shape is 

88 greater than 1-D. The default order is 'C'. 

89 

90 Attributes 

91 ---------- 

92 filename : str or pathlib.Path instance 

93 Path to the mapped file. 

94 offset : int 

95 Offset position in the file. 

96 mode : str 

97 File mode. 

98 

99 Methods 

100 ------- 

101 flush 

102 Flush any changes in memory to file on disk. 

103 When you delete a memmap object, flush is called first to write 

104 changes to disk. 

105 

106 

107 See also 

108 -------- 

109 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

110 

111 Notes 

112 ----- 

113 The memmap object can be used anywhere an ndarray is accepted. 

114 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns 

115 ``True``. 

116 

117 Memory-mapped files cannot be larger than 2GB on 32-bit systems. 

118 

119 When a memmap causes a file to be created or extended beyond its 

120 current size in the filesystem, the contents of the new part are 

121 unspecified. On systems with POSIX filesystem semantics, the extended 

122 part will be filled with zero bytes. 

123 

124 Examples 

125 -------- 

126 >>> data = np.arange(12, dtype='float32') 

127 >>> data.resize((3,4)) 

128 

129 This example uses a temporary file so that doctest doesn't write 

130 files to your directory. You would use a 'normal' filename. 

131 

132 >>> from tempfile import mkdtemp 

133 >>> import os.path as path 

134 >>> filename = path.join(mkdtemp(), 'newfile.dat') 

135 

136 Create a memmap with dtype and shape that matches our data: 

137 

138 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) 

139 >>> fp 

140 memmap([[0., 0., 0., 0.], 

141 [0., 0., 0., 0.], 

142 [0., 0., 0., 0.]], dtype=float32) 

143 

144 Write data to memmap array: 

145 

146 >>> fp[:] = data[:] 

147 >>> fp 

148 memmap([[ 0., 1., 2., 3.], 

149 [ 4., 5., 6., 7.], 

150 [ 8., 9., 10., 11.]], dtype=float32) 

151 

152 >>> fp.filename == path.abspath(filename) 

153 True 

154 

155 Flushes memory changes to disk in order to read them back 

156 

157 >>> fp.flush() 

158 

159 Load the memmap and verify data was stored: 

160 

161 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

162 >>> newfp 

163 memmap([[ 0., 1., 2., 3.], 

164 [ 4., 5., 6., 7.], 

165 [ 8., 9., 10., 11.]], dtype=float32) 

166 

167 Read-only memmap: 

168 

169 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

170 >>> fpr.flags.writeable 

171 False 

172 

173 Copy-on-write memmap: 

174 

175 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) 

176 >>> fpc.flags.writeable 

177 True 

178 

179 It's possible to assign to copy-on-write array, but values are only 

180 written into the memory copy of the array, and not written to disk: 

181 

182 >>> fpc 

183 memmap([[ 0., 1., 2., 3.], 

184 [ 4., 5., 6., 7.], 

185 [ 8., 9., 10., 11.]], dtype=float32) 

186 >>> fpc[0,:] = 0 

187 >>> fpc 

188 memmap([[ 0., 0., 0., 0.], 

189 [ 4., 5., 6., 7.], 

190 [ 8., 9., 10., 11.]], dtype=float32) 

191 

192 File on disk is unchanged: 

193 

194 >>> fpr 

195 memmap([[ 0., 1., 2., 3.], 

196 [ 4., 5., 6., 7.], 

197 [ 8., 9., 10., 11.]], dtype=float32) 

198 

199 Offset into a memmap: 

200 

201 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) 

202 >>> fpo 

203 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) 

204 

205 """ 

206 

207 __array_priority__ = -100.0 

208 

209 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, 

210 shape=None, order='C'): 

211 # Import here to minimize 'import numpy' overhead 

212 import mmap 

213 import os.path 

214 try: 

215 mode = mode_equivalents[mode] 

216 except KeyError as e: 

217 if mode not in valid_filemodes: 

218 raise ValueError( 

219 "mode must be one of {!r} (got {!r})" 

220 .format(valid_filemodes + list(mode_equivalents.keys()), mode) 

221 ) from None 

222 

223 if mode == 'w+' and shape is None: 

224 raise ValueError("shape must be given if mode == 'w+'") 

225 

226 if hasattr(filename, 'read'): 

227 f_ctx = nullcontext(filename) 

228 else: 

229 f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') 

230 

231 with f_ctx as fid: 

232 fid.seek(0, 2) 

233 flen = fid.tell() 

234 descr = dtypedescr(dtype) 

235 _dbytes = descr.itemsize 

236 

237 if shape is None: 

238 bytes = flen - offset 

239 if bytes % _dbytes: 

240 raise ValueError("Size of available data is not a " 

241 "multiple of the data-type size.") 

242 size = bytes // _dbytes 

243 shape = (size,) 

244 else: 

245 if not isinstance(shape, tuple): 

246 shape = (shape,) 

247 size = np.intp(1) # avoid default choice of np.int_, which might overflow 

248 for k in shape: 

249 size *= k 

250 

251 bytes = int(offset + size*_dbytes) 

252 

253 if mode in ('w+', 'r+') and flen < bytes: 

254 fid.seek(bytes - 1, 0) 

255 fid.write(b'\0') 

256 fid.flush() 

257 

258 if mode == 'c': 

259 acc = mmap.ACCESS_COPY 

260 elif mode == 'r': 

261 acc = mmap.ACCESS_READ 

262 else: 

263 acc = mmap.ACCESS_WRITE 

264 

265 start = offset - offset % mmap.ALLOCATIONGRANULARITY 

266 bytes -= start 

267 array_offset = offset - start 

268 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) 

269 

270 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, 

271 offset=array_offset, order=order) 

272 self._mmap = mm 

273 self.offset = offset 

274 self.mode = mode 

275 

276 if is_pathlib_path(filename): 

277 # special case - if we were constructed with a pathlib.path, 

278 # then filename is a path object, not a string 

279 self.filename = filename.resolve() 

280 elif hasattr(fid, "name") and isinstance(fid.name, str): 

281 # py3 returns int for TemporaryFile().name 

282 self.filename = os.path.abspath(fid.name) 

283 # same as memmap copies (e.g. memmap + 1) 

284 else: 

285 self.filename = None 

286 

287 return self 

288 

289 def __array_finalize__(self, obj): 

290 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): 

291 self._mmap = obj._mmap 

292 self.filename = obj.filename 

293 self.offset = obj.offset 

294 self.mode = obj.mode 

295 else: 

296 self._mmap = None 

297 self.filename = None 

298 self.offset = None 

299 self.mode = None 

300 

301 def flush(self): 

302 """ 

303 Write any changes in the array to the file on disk. 

304 

305 For further information, see `memmap`. 

306 

307 Parameters 

308 ---------- 

309 None 

310 

311 See Also 

312 -------- 

313 memmap 

314 

315 """ 

316 if self.base is not None and hasattr(self.base, 'flush'): 

317 self.base.flush() 

318 

319 def __array_wrap__(self, arr, context=None): 

320 arr = super().__array_wrap__(arr, context) 

321 

322 # Return a memmap if a memmap was given as the output of the 

323 # ufunc. Leave the arr class unchanged if self is not a memmap 

324 # to keep original memmap subclasses behavior 

325 if self is arr or type(self) is not memmap: 

326 return arr 

327 # Return scalar instead of 0d memmap, e.g. for np.sum with 

328 # axis=None 

329 if arr.shape == (): 

330 return arr[()] 

331 # Return ndarray otherwise 

332 return arr.view(np.ndarray) 

333 

334 def __getitem__(self, index): 

335 res = super().__getitem__(index) 

336 if type(res) is memmap and res._mmap is None: 

337 return res.view(type=ndarray) 

338 return res