Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio4.py: 2%
282 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
1''' Classes for read / write of matlab (TM) 4 files
2'''
3import sys
4import warnings
5import math
7import numpy as np
9import scipy.sparse
11from ._miobase import (MatFileReader, docfiller, matdims, read_dtype,
12 convert_dtypes, arr_to_chars, arr_dtype_number)
14from ._mio_utils import squeeze_element, chars_to_strings
15from functools import reduce
18__all__ = [
19 'MatFile4Reader', 'MatFile4Writer', 'SYS_LITTLE_ENDIAN',
20 'VarHeader4', 'VarReader4', 'VarWriter4', 'arr_to_2d', 'mclass_info',
21 'mdtypes_template', 'miDOUBLE', 'miINT16', 'miINT32', 'miSINGLE',
22 'miUINT16', 'miUINT8', 'mxCHAR_CLASS', 'mxFULL_CLASS', 'mxSPARSE_CLASS',
23 'np_to_mtypes', 'order_codes'
24]
27SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
29miDOUBLE = 0
30miSINGLE = 1
31miINT32 = 2
32miINT16 = 3
33miUINT16 = 4
34miUINT8 = 5
36mdtypes_template = {
37 miDOUBLE: 'f8',
38 miSINGLE: 'f4',
39 miINT32: 'i4',
40 miINT16: 'i2',
41 miUINT16: 'u2',
42 miUINT8: 'u1',
43 'header': [('mopt', 'i4'),
44 ('mrows', 'i4'),
45 ('ncols', 'i4'),
46 ('imagf', 'i4'),
47 ('namlen', 'i4')],
48 'U1': 'U1',
49 }
51np_to_mtypes = {
52 'f8': miDOUBLE,
53 'c32': miDOUBLE,
54 'c24': miDOUBLE,
55 'c16': miDOUBLE,
56 'f4': miSINGLE,
57 'c8': miSINGLE,
58 'i4': miINT32,
59 'i2': miINT16,
60 'u2': miUINT16,
61 'u1': miUINT8,
62 'S1': miUINT8,
63 }
65# matrix classes
66mxFULL_CLASS = 0
67mxCHAR_CLASS = 1
68mxSPARSE_CLASS = 2
70order_codes = {
71 0: '<',
72 1: '>',
73 2: 'VAX D-float', # !
74 3: 'VAX G-float',
75 4: 'Cray', # !!
76 }
78mclass_info = {
79 mxFULL_CLASS: 'double',
80 mxCHAR_CLASS: 'char',
81 mxSPARSE_CLASS: 'sparse',
82 }
85class VarHeader4:
86 # Mat4 variables never logical or global
87 is_logical = False
88 is_global = False
90 def __init__(self,
91 name,
92 dtype,
93 mclass,
94 dims,
95 is_complex):
96 self.name = name
97 self.dtype = dtype
98 self.mclass = mclass
99 self.dims = dims
100 self.is_complex = is_complex
103class VarReader4:
104 ''' Class to read matlab 4 variables '''
106 def __init__(self, file_reader):
107 self.file_reader = file_reader
108 self.mat_stream = file_reader.mat_stream
109 self.dtypes = file_reader.dtypes
110 self.chars_as_strings = file_reader.chars_as_strings
111 self.squeeze_me = file_reader.squeeze_me
113 def read_header(self):
114 ''' Read and return header for variable '''
115 data = read_dtype(self.mat_stream, self.dtypes['header'])
116 name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
117 if data['mopt'] < 0 or data['mopt'] > 5000:
118 raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
119 M, rest = divmod(data['mopt'], 1000) # order code
120 if M not in (0, 1):
121 warnings.warn("We do not support byte ordering '%s'; returned "
122 "data may be corrupt" % order_codes[M],
123 UserWarning, stacklevel=3)
124 O, rest = divmod(rest, 100) # unused, should be 0
125 if O != 0:
126 raise ValueError('O in MOPT integer should be 0, wrong format?')
127 P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above)
128 T = rest # matrix type code e.g., mxFULL_CLASS (see above)
129 dims = (data['mrows'], data['ncols'])
130 is_complex = data['imagf'] == 1
131 dtype = self.dtypes[P]
132 return VarHeader4(
133 name,
134 dtype,
135 T,
136 dims,
137 is_complex)
139 def array_from_header(self, hdr, process=True):
140 mclass = hdr.mclass
141 if mclass == mxFULL_CLASS:
142 arr = self.read_full_array(hdr)
143 elif mclass == mxCHAR_CLASS:
144 arr = self.read_char_array(hdr)
145 if process and self.chars_as_strings:
146 arr = chars_to_strings(arr)
147 elif mclass == mxSPARSE_CLASS:
148 # no current processing (below) makes sense for sparse
149 return self.read_sparse_array(hdr)
150 else:
151 raise TypeError('No reader for class code %s' % mclass)
152 if process and self.squeeze_me:
153 return squeeze_element(arr)
154 return arr
156 def read_sub_array(self, hdr, copy=True):
157 ''' Mat4 read using header `hdr` dtype and dims
159 Parameters
160 ----------
161 hdr : object
162 object with attributes ``dtype``, ``dims``. dtype is assumed to be
163 the correct endianness
164 copy : bool, optional
165 copies array before return if True (default True)
166 (buffer is usually read only)
168 Returns
169 -------
170 arr : ndarray
171 of dtype given by `hdr` ``dtype`` and shape given by `hdr` ``dims``
172 '''
173 dt = hdr.dtype
174 dims = hdr.dims
175 num_bytes = dt.itemsize
176 for d in dims:
177 num_bytes *= d
178 buffer = self.mat_stream.read(int(num_bytes))
179 if len(buffer) != num_bytes:
180 raise ValueError("Not enough bytes to read matrix '%s'; is this "
181 "a badly-formed file? Consider listing matrices "
182 "with `whosmat` and loading named matrices with "
183 "`variable_names` kwarg to `loadmat`" % hdr.name)
184 arr = np.ndarray(shape=dims,
185 dtype=dt,
186 buffer=buffer,
187 order='F')
188 if copy:
189 arr = arr.copy()
190 return arr
192 def read_full_array(self, hdr):
193 ''' Full (rather than sparse) matrix getter
195 Read matrix (array) can be real or complex
197 Parameters
198 ----------
199 hdr : ``VarHeader4`` instance
201 Returns
202 -------
203 arr : ndarray
204 complex array if ``hdr.is_complex`` is True, otherwise a real
205 numeric array
206 '''
207 if hdr.is_complex:
208 # avoid array copy to save memory
209 res = self.read_sub_array(hdr, copy=False)
210 res_j = self.read_sub_array(hdr, copy=False)
211 return res + (res_j * 1j)
212 return self.read_sub_array(hdr)
214 def read_char_array(self, hdr):
215 ''' latin-1 text matrix (char matrix) reader
217 Parameters
218 ----------
219 hdr : ``VarHeader4`` instance
221 Returns
222 -------
223 arr : ndarray
224 with dtype 'U1', shape given by `hdr` ``dims``
225 '''
226 arr = self.read_sub_array(hdr).astype(np.uint8)
227 S = arr.tobytes().decode('latin-1')
228 return np.ndarray(shape=hdr.dims,
229 dtype=np.dtype('U1'),
230 buffer=np.array(S)).copy()
232 def read_sparse_array(self, hdr):
233 ''' Read and return sparse matrix type
235 Parameters
236 ----------
237 hdr : ``VarHeader4`` instance
239 Returns
240 -------
241 arr : ``scipy.sparse.coo_matrix``
242 with dtype ``float`` and shape read from the sparse matrix data
244 Notes
245 -----
246 MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
247 N is the number of non-zero values. Column 1 values [0:N] are the
248 (1-based) row indices of the each non-zero value, column 2 [0:N] are the
249 column indices, column 3 [0:N] are the (real) values. The last values
250 [-1,0:2] of the rows, column indices are shape[0] and shape[1]
251 respectively of the output matrix. The last value for the values column
252 is a padding 0. mrows and ncols values from the header give the shape of
253 the stored matrix, here [N+1, 3]. Complex data are saved as a 4 column
254 matrix, where the fourth column contains the imaginary component; the
255 last value is again 0. Complex sparse data do *not* have the header
256 ``imagf`` field set to True; the fact that the data are complex is only
257 detectable because there are 4 storage columns.
258 '''
259 res = self.read_sub_array(hdr)
260 tmp = res[:-1,:]
261 # All numbers are float64 in Matlab, but SciPy sparse expects int shape
262 dims = (int(res[-1,0]), int(res[-1,1]))
263 I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also
264 J = np.ascontiguousarray(tmp[:,1],dtype='intc')
265 I -= 1 # for 1-based indexing
266 J -= 1
267 if res.shape[1] == 3:
268 V = np.ascontiguousarray(tmp[:,2],dtype='float')
269 else:
270 V = np.ascontiguousarray(tmp[:,2],dtype='complex')
271 V.imag = tmp[:,3]
272 return scipy.sparse.coo_matrix((V,(I,J)), dims)
274 def shape_from_header(self, hdr):
275 '''Read the shape of the array described by the header.
276 The file position after this call is unspecified.
277 '''
278 mclass = hdr.mclass
279 if mclass == mxFULL_CLASS:
280 shape = tuple(map(int, hdr.dims))
281 elif mclass == mxCHAR_CLASS:
282 shape = tuple(map(int, hdr.dims))
283 if self.chars_as_strings:
284 shape = shape[:-1]
285 elif mclass == mxSPARSE_CLASS:
286 dt = hdr.dtype
287 dims = hdr.dims
289 if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
290 return ()
292 # Read only the row and column counts
293 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
294 rows = np.ndarray(shape=(), dtype=dt,
295 buffer=self.mat_stream.read(dt.itemsize))
296 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
297 cols = np.ndarray(shape=(), dtype=dt,
298 buffer=self.mat_stream.read(dt.itemsize))
300 shape = (int(rows), int(cols))
301 else:
302 raise TypeError('No reader for class code %s' % mclass)
304 if self.squeeze_me:
305 shape = tuple([x for x in shape if x != 1])
306 return shape
309class MatFile4Reader(MatFileReader):
310 ''' Reader for Mat4 files '''
311 @docfiller
312 def __init__(self, mat_stream, *args, **kwargs):
313 ''' Initialize matlab 4 file reader
315 %(matstream_arg)s
316 %(load_args)s
317 '''
318 super().__init__(mat_stream, *args, **kwargs)
319 self._matrix_reader = None
321 def guess_byte_order(self):
322 self.mat_stream.seek(0)
323 mopt = read_dtype(self.mat_stream, np.dtype('i4'))
324 self.mat_stream.seek(0)
325 if mopt == 0:
326 return '<'
327 if mopt < 0 or mopt > 5000:
328 # Number must have been byteswapped
329 return SYS_LITTLE_ENDIAN and '>' or '<'
330 # Not byteswapped
331 return SYS_LITTLE_ENDIAN and '<' or '>'
333 def initialize_read(self):
334 ''' Run when beginning read of variables
336 Sets up readers from parameters in `self`
337 '''
338 self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
339 self._matrix_reader = VarReader4(self)
341 def read_var_header(self):
342 ''' Read and return header, next position
344 Parameters
345 ----------
346 None
348 Returns
349 -------
350 header : object
351 object that can be passed to self.read_var_array, and that
352 has attributes ``name`` and ``is_global``
353 next_position : int
354 position in stream of next variable
355 '''
356 hdr = self._matrix_reader.read_header()
357 n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product
358 remaining_bytes = hdr.dtype.itemsize * n
359 if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
360 remaining_bytes *= 2
361 next_position = self.mat_stream.tell() + remaining_bytes
362 return hdr, next_position
364 def read_var_array(self, header, process=True):
365 ''' Read array, given `header`
367 Parameters
368 ----------
369 header : header object
370 object with fields defining variable header
371 process : {True, False}, optional
372 If True, apply recursive post-processing during loading of array.
374 Returns
375 -------
376 arr : array
377 array with post-processing applied or not according to
378 `process`.
379 '''
380 return self._matrix_reader.array_from_header(header, process)
382 def get_variables(self, variable_names=None):
383 ''' get variables from stream as dictionary
385 Parameters
386 ----------
387 variable_names : None or str or sequence of str, optional
388 variable name, or sequence of variable names to get from Mat file /
389 file stream. If None, then get all variables in file.
390 '''
391 if isinstance(variable_names, str):
392 variable_names = [variable_names]
393 elif variable_names is not None:
394 variable_names = list(variable_names)
395 self.mat_stream.seek(0)
396 # set up variable reader
397 self.initialize_read()
398 mdict = {}
399 while not self.end_of_stream():
400 hdr, next_position = self.read_var_header()
401 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
402 if variable_names is not None and name not in variable_names:
403 self.mat_stream.seek(next_position)
404 continue
405 mdict[name] = self.read_var_array(hdr)
406 self.mat_stream.seek(next_position)
407 if variable_names is not None:
408 variable_names.remove(name)
409 if len(variable_names) == 0:
410 break
411 return mdict
413 def list_variables(self):
414 ''' list variables from stream '''
415 self.mat_stream.seek(0)
416 # set up variable reader
417 self.initialize_read()
418 vars = []
419 while not self.end_of_stream():
420 hdr, next_position = self.read_var_header()
421 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
422 shape = self._matrix_reader.shape_from_header(hdr)
423 info = mclass_info.get(hdr.mclass, 'unknown')
424 vars.append((name, shape, info))
426 self.mat_stream.seek(next_position)
427 return vars
430def arr_to_2d(arr, oned_as='row'):
431 ''' Make ``arr`` exactly two dimensional
433 If `arr` has more than 2 dimensions, raise a ValueError
435 Parameters
436 ----------
437 arr : array
438 oned_as : {'row', 'column'}, optional
439 Whether to reshape 1-D vectors as row vectors or column vectors.
440 See documentation for ``matdims`` for more detail
442 Returns
443 -------
444 arr2d : array
445 2-D version of the array
446 '''
447 dims = matdims(arr, oned_as)
448 if len(dims) > 2:
449 raise ValueError('Matlab 4 files cannot save arrays with more than '
450 '2 dimensions')
451 return arr.reshape(dims)
454class VarWriter4:
455 def __init__(self, file_writer):
456 self.file_stream = file_writer.file_stream
457 self.oned_as = file_writer.oned_as
459 def write_bytes(self, arr):
460 self.file_stream.write(arr.tobytes(order='F'))
462 def write_string(self, s):
463 self.file_stream.write(s)
465 def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
466 ''' Write header for given data options
468 Parameters
469 ----------
470 name : str
471 name of variable
472 shape : sequence
473 Shape of array as it will be read in matlab
474 P : int, optional
475 code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
476 miINT16, miUINT16, miUINT8``
477 T : int, optional
478 code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
479 mxSPARSE_CLASS``
480 imagf : int, optional
481 flag indicating complex
482 '''
483 header = np.empty((), mdtypes_template['header'])
484 M = not SYS_LITTLE_ENDIAN
485 O = 0
486 header['mopt'] = (M * 1000 +
487 O * 100 +
488 P * 10 +
489 T)
490 header['mrows'] = shape[0]
491 header['ncols'] = shape[1]
492 header['imagf'] = imagf
493 header['namlen'] = len(name) + 1
494 self.write_bytes(header)
495 data = name + '\0'
496 self.write_string(data.encode('latin1'))
498 def write(self, arr, name):
499 ''' Write matrix `arr`, with name `name`
501 Parameters
502 ----------
503 arr : array_like
504 array to write
505 name : str
506 name in matlab workspace
507 '''
508 # we need to catch sparse first, because np.asarray returns an
509 # an object array for scipy.sparse
510 if scipy.sparse.issparse(arr):
511 self.write_sparse(arr, name)
512 return
513 arr = np.asarray(arr)
514 dt = arr.dtype
515 if not dt.isnative:
516 arr = arr.astype(dt.newbyteorder('='))
517 dtt = dt.type
518 if dtt is np.object_:
519 raise TypeError('Cannot save object arrays in Mat4')
520 elif dtt is np.void:
521 raise TypeError('Cannot save void type arrays')
522 elif dtt in (np.str_, np.bytes_):
523 self.write_char(arr, name)
524 return
525 self.write_numeric(arr, name)
527 def write_numeric(self, arr, name):
528 arr = arr_to_2d(arr, self.oned_as)
529 imagf = arr.dtype.kind == 'c'
530 try:
531 P = np_to_mtypes[arr.dtype.str[1:]]
532 except KeyError:
533 if imagf:
534 arr = arr.astype('c128')
535 else:
536 arr = arr.astype('f8')
537 P = miDOUBLE
538 self.write_header(name,
539 arr.shape,
540 P=P,
541 T=mxFULL_CLASS,
542 imagf=imagf)
543 if imagf:
544 self.write_bytes(arr.real)
545 self.write_bytes(arr.imag)
546 else:
547 self.write_bytes(arr)
549 def write_char(self, arr, name):
550 if arr.dtype.type == np.str_ and arr.dtype.itemsize != np.dtype('U1').itemsize:
551 arr = arr_to_chars(arr)
552 arr = arr_to_2d(arr, self.oned_as)
553 dims = arr.shape
554 self.write_header(
555 name,
556 dims,
557 P=miUINT8,
558 T=mxCHAR_CLASS)
559 if arr.dtype.kind == 'U':
560 # Recode unicode to latin1
561 n_chars = math.prod(dims)
562 st_arr = np.ndarray(shape=(),
563 dtype=arr_dtype_number(arr, n_chars),
564 buffer=arr)
565 st = st_arr.item().encode('latin-1')
566 arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
567 self.write_bytes(arr)
569 def write_sparse(self, arr, name):
570 ''' Sparse matrices are 2-D
572 See docstring for VarReader4.read_sparse_array
573 '''
574 A = arr.tocoo() # convert to sparse COO format (ijv)
575 imagf = A.dtype.kind == 'c'
576 ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
577 ijv[:-1,0] = A.row
578 ijv[:-1,1] = A.col
579 ijv[:-1,0:2] += 1 # 1 based indexing
580 if imagf:
581 ijv[:-1,2] = A.data.real
582 ijv[:-1,3] = A.data.imag
583 else:
584 ijv[:-1,2] = A.data
585 ijv[-1,0:2] = A.shape
586 self.write_header(
587 name,
588 ijv.shape,
589 P=miDOUBLE,
590 T=mxSPARSE_CLASS)
591 self.write_bytes(ijv)
594class MatFile4Writer:
595 ''' Class for writing matlab 4 format files '''
596 def __init__(self, file_stream, oned_as=None):
597 self.file_stream = file_stream
598 if oned_as is None:
599 oned_as = 'row'
600 self.oned_as = oned_as
601 self._matrix_writer = None
603 def put_variables(self, mdict, write_header=None):
604 ''' Write variables in `mdict` to stream
606 Parameters
607 ----------
608 mdict : mapping
609 mapping with method ``items`` return name, contents pairs
610 where ``name`` which will appeak in the matlab workspace in
611 file load, and ``contents`` is something writeable to a
612 matlab file, such as a NumPy array.
613 write_header : {None, True, False}
614 If True, then write the matlab file header before writing the
615 variables. If None (the default) then write the file header
616 if we are at position 0 in the stream. By setting False
617 here, and setting the stream position to the end of the file,
618 you can append variables to a matlab file
619 '''
620 # there is no header for a matlab 4 mat file, so we ignore the
621 # ``write_header`` input argument. It's there for compatibility
622 # with the matlab 5 version of this method
623 self._matrix_writer = VarWriter4(self)
624 for name, var in mdict.items():
625 self._matrix_writer.write(var, name)