Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/core/memmap.py: 20%
91 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
1from contextlib import nullcontext
3import numpy as np
4from .._utils import set_module
5from .numeric import uint8, ndarray, dtype
6from numpy.compat import os_fspath, is_pathlib_path
8__all__ = ['memmap']
10dtypedescr = dtype
11valid_filemodes = ["r", "c", "r+", "w+"]
12writeable_filemodes = ["r+", "w+"]
14mode_equivalents = {
15 "readonly":"r",
16 "copyonwrite":"c",
17 "readwrite":"r+",
18 "write":"w+"
19 }
22@set_module('numpy')
23class memmap(ndarray):
24 """Create a memory-map to an array stored in a *binary* file on disk.
26 Memory-mapped files are used for accessing small segments of large files
27 on disk, without reading the entire file into memory. NumPy's
28 memmap's are array-like objects. This differs from Python's ``mmap``
29 module, which uses file-like objects.
31 This subclass of ndarray has some unpleasant interactions with
32 some operations, because it doesn't quite fit properly as a subclass.
33 An alternative to using this subclass is to create the ``mmap``
34 object yourself, then create an ndarray with ndarray.__new__ directly,
35 passing the object created in its 'buffer=' parameter.
37 This class may at some point be turned into a factory function
38 which returns a view into an mmap buffer.
40 Flush the memmap instance to write the changes to the file. Currently there
41 is no API to close the underlying ``mmap``. It is tricky to ensure the
42 resource is actually closed, since it may be shared between different
43 memmap instances.
46 Parameters
47 ----------
48 filename : str, file-like object, or pathlib.Path instance
49 The file name or file object to be used as the array data buffer.
50 dtype : data-type, optional
51 The data-type used to interpret the file contents.
52 Default is `uint8`.
53 mode : {'r+', 'r', 'w+', 'c'}, optional
54 The file is opened in this mode:
56 +------+-------------------------------------------------------------+
57 | 'r' | Open existing file for reading only. |
58 +------+-------------------------------------------------------------+
59 | 'r+' | Open existing file for reading and writing. |
60 +------+-------------------------------------------------------------+
61 | 'w+' | Create or overwrite existing file for reading and writing. |
62 | | If ``mode == 'w+'`` then `shape` must also be specified. |
63 +------+-------------------------------------------------------------+
64 | 'c' | Copy-on-write: assignments affect data in memory, but |
65 | | changes are not saved to disk. The file on disk is |
66 | | read-only. |
67 +------+-------------------------------------------------------------+
69 Default is 'r+'.
70 offset : int, optional
71 In the file, array data starts at this offset. Since `offset` is
72 measured in bytes, it should normally be a multiple of the byte-size
73 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of
74 file are valid; The file will be extended to accommodate the
75 additional data. By default, ``memmap`` will start at the beginning of
76 the file, even if ``filename`` is a file pointer ``fp`` and
77 ``fp.tell() != 0``.
78 shape : tuple, optional
79 The desired shape of the array. If ``mode == 'r'`` and the number
80 of remaining bytes after `offset` is not a multiple of the byte-size
81 of `dtype`, you must specify `shape`. By default, the returned array
82 will be 1-D with the number of elements determined by file size
83 and data-type.
84 order : {'C', 'F'}, optional
85 Specify the order of the ndarray memory layout:
86 :term:`row-major`, C-style or :term:`column-major`,
87 Fortran-style. This only has an effect if the shape is
88 greater than 1-D. The default order is 'C'.
90 Attributes
91 ----------
92 filename : str or pathlib.Path instance
93 Path to the mapped file.
94 offset : int
95 Offset position in the file.
96 mode : str
97 File mode.
99 Methods
100 -------
101 flush
102 Flush any changes in memory to file on disk.
103 When you delete a memmap object, flush is called first to write
104 changes to disk.
107 See also
108 --------
109 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.
111 Notes
112 -----
113 The memmap object can be used anywhere an ndarray is accepted.
114 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns
115 ``True``.
117 Memory-mapped files cannot be larger than 2GB on 32-bit systems.
119 When a memmap causes a file to be created or extended beyond its
120 current size in the filesystem, the contents of the new part are
121 unspecified. On systems with POSIX filesystem semantics, the extended
122 part will be filled with zero bytes.
124 Examples
125 --------
126 >>> data = np.arange(12, dtype='float32')
127 >>> data.resize((3,4))
129 This example uses a temporary file so that doctest doesn't write
130 files to your directory. You would use a 'normal' filename.
132 >>> from tempfile import mkdtemp
133 >>> import os.path as path
134 >>> filename = path.join(mkdtemp(), 'newfile.dat')
136 Create a memmap with dtype and shape that matches our data:
138 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4))
139 >>> fp
140 memmap([[0., 0., 0., 0.],
141 [0., 0., 0., 0.],
142 [0., 0., 0., 0.]], dtype=float32)
144 Write data to memmap array:
146 >>> fp[:] = data[:]
147 >>> fp
148 memmap([[ 0., 1., 2., 3.],
149 [ 4., 5., 6., 7.],
150 [ 8., 9., 10., 11.]], dtype=float32)
152 >>> fp.filename == path.abspath(filename)
153 True
155 Flushes memory changes to disk in order to read them back
157 >>> fp.flush()
159 Load the memmap and verify data was stored:
161 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
162 >>> newfp
163 memmap([[ 0., 1., 2., 3.],
164 [ 4., 5., 6., 7.],
165 [ 8., 9., 10., 11.]], dtype=float32)
167 Read-only memmap:
169 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
170 >>> fpr.flags.writeable
171 False
173 Copy-on-write memmap:
175 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4))
176 >>> fpc.flags.writeable
177 True
179 It's possible to assign to copy-on-write array, but values are only
180 written into the memory copy of the array, and not written to disk:
182 >>> fpc
183 memmap([[ 0., 1., 2., 3.],
184 [ 4., 5., 6., 7.],
185 [ 8., 9., 10., 11.]], dtype=float32)
186 >>> fpc[0,:] = 0
187 >>> fpc
188 memmap([[ 0., 0., 0., 0.],
189 [ 4., 5., 6., 7.],
190 [ 8., 9., 10., 11.]], dtype=float32)
192 File on disk is unchanged:
194 >>> fpr
195 memmap([[ 0., 1., 2., 3.],
196 [ 4., 5., 6., 7.],
197 [ 8., 9., 10., 11.]], dtype=float32)
199 Offset into a memmap:
201 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16)
202 >>> fpo
203 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32)
205 """
207 __array_priority__ = -100.0
209 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
210 shape=None, order='C'):
211 # Import here to minimize 'import numpy' overhead
212 import mmap
213 import os.path
214 try:
215 mode = mode_equivalents[mode]
216 except KeyError as e:
217 if mode not in valid_filemodes:
218 raise ValueError(
219 "mode must be one of {!r} (got {!r})"
220 .format(valid_filemodes + list(mode_equivalents.keys()), mode)
221 ) from None
223 if mode == 'w+' and shape is None:
224 raise ValueError("shape must be given if mode == 'w+'")
226 if hasattr(filename, 'read'):
227 f_ctx = nullcontext(filename)
228 else:
229 f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
231 with f_ctx as fid:
232 fid.seek(0, 2)
233 flen = fid.tell()
234 descr = dtypedescr(dtype)
235 _dbytes = descr.itemsize
237 if shape is None:
238 bytes = flen - offset
239 if bytes % _dbytes:
240 raise ValueError("Size of available data is not a "
241 "multiple of the data-type size.")
242 size = bytes // _dbytes
243 shape = (size,)
244 else:
245 if not isinstance(shape, tuple):
246 shape = (shape,)
247 size = np.intp(1) # avoid default choice of np.int_, which might overflow
248 for k in shape:
249 size *= k
251 bytes = int(offset + size*_dbytes)
253 if mode in ('w+', 'r+') and flen < bytes:
254 fid.seek(bytes - 1, 0)
255 fid.write(b'\0')
256 fid.flush()
258 if mode == 'c':
259 acc = mmap.ACCESS_COPY
260 elif mode == 'r':
261 acc = mmap.ACCESS_READ
262 else:
263 acc = mmap.ACCESS_WRITE
265 start = offset - offset % mmap.ALLOCATIONGRANULARITY
266 bytes -= start
267 array_offset = offset - start
268 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
270 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
271 offset=array_offset, order=order)
272 self._mmap = mm
273 self.offset = offset
274 self.mode = mode
276 if is_pathlib_path(filename):
277 # special case - if we were constructed with a pathlib.path,
278 # then filename is a path object, not a string
279 self.filename = filename.resolve()
280 elif hasattr(fid, "name") and isinstance(fid.name, str):
281 # py3 returns int for TemporaryFile().name
282 self.filename = os.path.abspath(fid.name)
283 # same as memmap copies (e.g. memmap + 1)
284 else:
285 self.filename = None
287 return self
289 def __array_finalize__(self, obj):
290 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj):
291 self._mmap = obj._mmap
292 self.filename = obj.filename
293 self.offset = obj.offset
294 self.mode = obj.mode
295 else:
296 self._mmap = None
297 self.filename = None
298 self.offset = None
299 self.mode = None
301 def flush(self):
302 """
303 Write any changes in the array to the file on disk.
305 For further information, see `memmap`.
307 Parameters
308 ----------
309 None
311 See Also
312 --------
313 memmap
315 """
316 if self.base is not None and hasattr(self.base, 'flush'):
317 self.base.flush()
319 def __array_wrap__(self, arr, context=None):
320 arr = super().__array_wrap__(arr, context)
322 # Return a memmap if a memmap was given as the output of the
323 # ufunc. Leave the arr class unchanged if self is not a memmap
324 # to keep original memmap subclasses behavior
325 if self is arr or type(self) is not memmap:
326 return arr
327 # Return scalar instead of 0d memmap, e.g. for np.sum with
328 # axis=None
329 if arr.shape == ():
330 return arr[()]
331 # Return ndarray otherwise
332 return arr.view(np.ndarray)
334 def __getitem__(self, index):
335 res = super().__getitem__(index)
336 if type(res) is memmap and res._mmap is None:
337 return res.view(type=ndarray)
338 return res