Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/

1from contextlib import nullcontext

2import operator

3import numpy as np

4from .._utils import set_module

5from .numeric import uint8, ndarray, dtype

7__all__ = ['memmap']

9dtypedescr = dtype

10valid_filemodes = ["r", "c", "r+", "w+"]

11writeable_filemodes = ["r+", "w+"]

13mode_equivalents = {

14 "readonly":"r",

15 "copyonwrite":"c",

16 "readwrite":"r+",

17 "write":"w+"

18 }

21@set_module('numpy')

22class memmap(ndarray):

23 """Create a memory-map to an array stored in a *binary* file on disk.

25 Memory-mapped files are used for accessing small segments of large files

26 on disk, without reading the entire file into memory. NumPy's

27 memmap's are array-like objects. This differs from Python's ``mmap``

28 module, which uses file-like objects.

30 This subclass of ndarray has some unpleasant interactions with

31 some operations, because it doesn't quite fit properly as a subclass.

32 An alternative to using this subclass is to create the ``mmap``

33 object yourself, then create an ndarray with ndarray.__new__ directly,

34 passing the object created in its 'buffer=' parameter.

36 This class may at some point be turned into a factory function

37 which returns a view into an mmap buffer.

39 Flush the memmap instance to write the changes to the file. Currently there

40 is no API to close the underlying ``mmap``. It is tricky to ensure the

41 resource is actually closed, since it may be shared between different

42 memmap instances.

45 Parameters

46 ----------

47 filename : str, file-like object, or pathlib.Path instance

48 The file name or file object to be used as the array data buffer.

49 dtype : data-type, optional

50 The data-type used to interpret the file contents.

51 Default is `uint8`.

52 mode : {'r+', 'r', 'w+', 'c'}, optional

53 The file is opened in this mode:

55 +------+-------------------------------------------------------------+

56 | 'r' | Open existing file for reading only. |

57 +------+-------------------------------------------------------------+

58 | 'r+' | Open existing file for reading and writing. |

59 +------+-------------------------------------------------------------+

60 | 'w+' | Create or overwrite existing file for reading and writing. |

61 | | If ``mode == 'w+'`` then `shape` must also be specified. |

62 +------+-------------------------------------------------------------+

63 | 'c' | Copy-on-write: assignments affect data in memory, but |

64 | | changes are not saved to disk. The file on disk is |

65 | | read-only. |

66 +------+-------------------------------------------------------------+

68 Default is 'r+'.

69 offset : int, optional

70 In the file, array data starts at this offset. Since `offset` is

71 measured in bytes, it should normally be a multiple of the byte-size

72 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of

73 file are valid; The file will be extended to accommodate the

74 additional data. By default, ``memmap`` will start at the beginning of

75 the file, even if ``filename`` is a file pointer ``fp`` and

76 ``fp.tell() != 0``.

77 shape : int or sequence of ints, optional

78 The desired shape of the array. If ``mode == 'r'`` and the number

79 of remaining bytes after `offset` is not a multiple of the byte-size

80 of `dtype`, you must specify `shape`. By default, the returned array

81 will be 1-D with the number of elements determined by file size

82 and data-type.

84 .. versionchanged:: 2.0

85 The shape parameter can now be any integer sequence type, previously

86 types were limited to tuple and int.

88 order : {'C', 'F'}, optional

89 Specify the order of the ndarray memory layout:

90 :term:`row-major`, C-style or :term:`column-major`,

91 Fortran-style. This only has an effect if the shape is

92 greater than 1-D. The default order is 'C'.

94 Attributes

95 ----------

96 filename : str or pathlib.Path instance

97 Path to the mapped file.

98 offset : int

99 Offset position in the file.

100 mode : str

101 File mode.

102

103 Methods

104 -------

105 flush

106 Flush any changes in memory to file on disk.

107 When you delete a memmap object, flush is called first to write

108 changes to disk.

109

110

111 See also

112 --------

113 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.

114

115 Notes

116 -----

117 The memmap object can be used anywhere an ndarray is accepted.

118 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns

119 ``True``.

120

121 Memory-mapped files cannot be larger than 2GB on 32-bit systems.

122

123 When a memmap causes a file to be created or extended beyond its

124 current size in the filesystem, the contents of the new part are

125 unspecified. On systems with POSIX filesystem semantics, the extended

126 part will be filled with zero bytes.

127

128 Examples

129 --------

130 >>> import numpy as np

131 >>> data = np.arange(12, dtype='float32')

132 >>> data.resize((3,4))

133

134 This example uses a temporary file so that doctest doesn't write

135 files to your directory. You would use a 'normal' filename.

136

137 >>> from tempfile import mkdtemp

138 >>> import os.path as path

139 >>> filename = path.join(mkdtemp(), 'newfile.dat')

140

141 Create a memmap with dtype and shape that matches our data:

142

143 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4))

144 >>> fp

145 memmap([[0., 0., 0., 0.],

146 [0., 0., 0., 0.],

147 [0., 0., 0., 0.]], dtype=float32)

148

149 Write data to memmap array:

150

151 >>> fp[:] = data[:]

152 >>> fp

153 memmap([[ 0., 1., 2., 3.],

154 [ 4., 5., 6., 7.],

155 [ 8., 9., 10., 11.]], dtype=float32)

156

157 >>> fp.filename == path.abspath(filename)

158 True

159

160 Flushes memory changes to disk in order to read them back

161

162 >>> fp.flush()

163

164 Load the memmap and verify data was stored:

165

166 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))

167 >>> newfp

168 memmap([[ 0., 1., 2., 3.],

169 [ 4., 5., 6., 7.],

170 [ 8., 9., 10., 11.]], dtype=float32)

171

172 Read-only memmap:

173

174 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))

175 >>> fpr.flags.writeable

176 False

177

178 Copy-on-write memmap:

179

180 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4))

181 >>> fpc.flags.writeable

182 True

183

184 It's possible to assign to copy-on-write array, but values are only

185 written into the memory copy of the array, and not written to disk:

186

187 >>> fpc

188 memmap([[ 0., 1., 2., 3.],

189 [ 4., 5., 6., 7.],

190 [ 8., 9., 10., 11.]], dtype=float32)

191 >>> fpc[0,:] = 0

192 >>> fpc

193 memmap([[ 0., 0., 0., 0.],

194 [ 4., 5., 6., 7.],

195 [ 8., 9., 10., 11.]], dtype=float32)

196

197 File on disk is unchanged:

198

199 >>> fpr

200 memmap([[ 0., 1., 2., 3.],

201 [ 4., 5., 6., 7.],

202 [ 8., 9., 10., 11.]], dtype=float32)

203

204 Offset into a memmap:

205

206 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16)

207 >>> fpo

208 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32)

209

210 """

211

212 __array_priority__ = -100.0

213

214 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,

215 shape=None, order='C'):

216 # Import here to minimize 'import numpy' overhead

217 import mmap

218 import os.path

219 try:

220 mode = mode_equivalents[mode]

221 except KeyError as e:

222 if mode not in valid_filemodes:

223 raise ValueError(

224 "mode must be one of {!r} (got {!r})"

225 .format(valid_filemodes + list(mode_equivalents.keys()), mode)

226 ) from None

227

228 if mode == 'w+' and shape is None:

229 raise ValueError("shape must be given if mode == 'w+'")

230

231 if hasattr(filename, 'read'):

232 f_ctx = nullcontext(filename)

233 else:

234 f_ctx = open(

235 os.fspath(filename),

236 ('r' if mode == 'c' else mode)+'b'

237 )

238

239 with f_ctx as fid:

240 fid.seek(0, 2)

241 flen = fid.tell()

242 descr = dtypedescr(dtype)

243 _dbytes = descr.itemsize

244

245 if shape is None:

246 bytes = flen - offset

247 if bytes % _dbytes:

248 raise ValueError("Size of available data is not a "

249 "multiple of the data-type size.")

250 size = bytes // _dbytes

251 shape = (size,)

252 else:

253 if type(shape) not in (tuple, list):

254 try:

255 shape = [operator.index(shape)]

256 except TypeError:

257 pass

258 shape = tuple(shape)

259 size = np.intp(1) # avoid default choice of np.int_, which might overflow

260 for k in shape:

261 size *= k

262

263 bytes = int(offset + size*_dbytes)

264

265 if mode in ('w+', 'r+'):

266 # gh-27723

267 # if bytes == 0, we write out 1 byte to allow empty memmap.

268 bytes = max(bytes, 1)

269 if flen < bytes:

270 fid.seek(bytes - 1, 0)

271 fid.write(b'\0')

272 fid.flush()

273

274 if mode == 'c':

275 acc = mmap.ACCESS_COPY

276 elif mode == 'r':

277 acc = mmap.ACCESS_READ

278 else:

279 acc = mmap.ACCESS_WRITE

280

281 start = offset - offset % mmap.ALLOCATIONGRANULARITY

282 bytes -= start

283 # bytes == 0 is problematic as in mmap length=0 maps the full file.

284 # See PR gh-27723 for a more detailed explanation.

285 if bytes == 0 and start > 0:

286 bytes += mmap.ALLOCATIONGRANULARITY

287 start -= mmap.ALLOCATIONGRANULARITY

288 array_offset = offset - start

289 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)

290

291 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,

292 offset=array_offset, order=order)

293 self._mmap = mm

294 self.offset = offset

295 self.mode = mode

296

297 if isinstance(filename, os.PathLike):

298 # special case - if we were constructed with a pathlib.path,

299 # then filename is a path object, not a string

300 self.filename = filename.resolve()

301 elif hasattr(fid, "name") and isinstance(fid.name, str):

302 # py3 returns int for TemporaryFile().name

303 self.filename = os.path.abspath(fid.name)

304 # same as memmap copies (e.g. memmap + 1)

305 else:

306 self.filename = None

307

308 return self

309

310 def __array_finalize__(self, obj):

311 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj):

312 self._mmap = obj._mmap

313 self.filename = obj.filename

314 self.offset = obj.offset

315 self.mode = obj.mode

316 else:

317 self._mmap = None

318 self.filename = None

319 self.offset = None

320 self.mode = None

321

322 def flush(self):

323 """

324 Write any changes in the array to the file on disk.

325

326 For further information, see `memmap`.

327

328 Parameters

329 ----------

330 None

331

332 See Also

333 --------

334 memmap

335

336 """

337 if self.base is not None and hasattr(self.base, 'flush'):

338 self.base.flush()

339

340 def __array_wrap__(self, arr, context=None, return_scalar=False):

341 arr = super().__array_wrap__(arr, context)

342

343 # Return a memmap if a memmap was given as the output of the

344 # ufunc. Leave the arr class unchanged if self is not a memmap

345 # to keep original memmap subclasses behavior

346 if self is arr or type(self) is not memmap:

347 return arr

348

349 # Return scalar instead of 0d memmap, e.g. for np.sum with

350 # axis=None (note that subclasses will not reach here)

351 if return_scalar:

352 return arr[()]

353

354 # Return ndarray otherwise

355 return arr.view(np.ndarray)

356

357 def __getitem__(self, index):

358 res = super().__getitem__(index)

359 if type(res) is memmap and res._mmap is None:

360 return res.view(type=ndarray)

361 return res

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/_core/memmap.py: 18%

100 statements