Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio4.py: 20%
280 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1''' Classes for read / write of matlab (TM) 4 files
2'''
3import sys
4import warnings
6import numpy as np
8import scipy.sparse
10from ._miobase import (MatFileReader, docfiller, matdims, read_dtype,
11 convert_dtypes, arr_to_chars, arr_dtype_number)
13from ._mio_utils import squeeze_element, chars_to_strings
14from functools import reduce
17__all__ = [
18 'MatFile4Reader', 'MatFile4Writer', 'SYS_LITTLE_ENDIAN',
19 'VarHeader4', 'VarReader4', 'VarWriter4', 'arr_to_2d', 'mclass_info',
20 'mdtypes_template', 'miDOUBLE', 'miINT16', 'miINT32', 'miSINGLE',
21 'miUINT16', 'miUINT8', 'mxCHAR_CLASS', 'mxFULL_CLASS', 'mxSPARSE_CLASS',
22 'np_to_mtypes', 'order_codes'
23]
26SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
28miDOUBLE = 0
29miSINGLE = 1
30miINT32 = 2
31miINT16 = 3
32miUINT16 = 4
33miUINT8 = 5
35mdtypes_template = {
36 miDOUBLE: 'f8',
37 miSINGLE: 'f4',
38 miINT32: 'i4',
39 miINT16: 'i2',
40 miUINT16: 'u2',
41 miUINT8: 'u1',
42 'header': [('mopt', 'i4'),
43 ('mrows', 'i4'),
44 ('ncols', 'i4'),
45 ('imagf', 'i4'),
46 ('namlen', 'i4')],
47 'U1': 'U1',
48 }
50np_to_mtypes = {
51 'f8': miDOUBLE,
52 'c32': miDOUBLE,
53 'c24': miDOUBLE,
54 'c16': miDOUBLE,
55 'f4': miSINGLE,
56 'c8': miSINGLE,
57 'i4': miINT32,
58 'i2': miINT16,
59 'u2': miUINT16,
60 'u1': miUINT8,
61 'S1': miUINT8,
62 }
64# matrix classes
65mxFULL_CLASS = 0
66mxCHAR_CLASS = 1
67mxSPARSE_CLASS = 2
69order_codes = {
70 0: '<',
71 1: '>',
72 2: 'VAX D-float', # !
73 3: 'VAX G-float',
74 4: 'Cray', # !!
75 }
77mclass_info = {
78 mxFULL_CLASS: 'double',
79 mxCHAR_CLASS: 'char',
80 mxSPARSE_CLASS: 'sparse',
81 }
84class VarHeader4:
85 # Mat4 variables never logical or global
86 is_logical = False
87 is_global = False
89 def __init__(self,
90 name,
91 dtype,
92 mclass,
93 dims,
94 is_complex):
95 self.name = name
96 self.dtype = dtype
97 self.mclass = mclass
98 self.dims = dims
99 self.is_complex = is_complex
102class VarReader4:
103 ''' Class to read matlab 4 variables '''
105 def __init__(self, file_reader):
106 self.file_reader = file_reader
107 self.mat_stream = file_reader.mat_stream
108 self.dtypes = file_reader.dtypes
109 self.chars_as_strings = file_reader.chars_as_strings
110 self.squeeze_me = file_reader.squeeze_me
112 def read_header(self):
113 ''' Read and return header for variable '''
114 data = read_dtype(self.mat_stream, self.dtypes['header'])
115 name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
116 if data['mopt'] < 0 or data['mopt'] > 5000:
117 raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
118 M, rest = divmod(data['mopt'], 1000) # order code
119 if M not in (0, 1):
120 warnings.warn("We do not support byte ordering '%s'; returned "
121 "data may be corrupt" % order_codes[M],
122 UserWarning)
123 O, rest = divmod(rest, 100) # unused, should be 0
124 if O != 0:
125 raise ValueError('O in MOPT integer should be 0, wrong format?')
126 P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above)
127 T = rest # matrix type code e.g., mxFULL_CLASS (see above)
128 dims = (data['mrows'], data['ncols'])
129 is_complex = data['imagf'] == 1
130 dtype = self.dtypes[P]
131 return VarHeader4(
132 name,
133 dtype,
134 T,
135 dims,
136 is_complex)
138 def array_from_header(self, hdr, process=True):
139 mclass = hdr.mclass
140 if mclass == mxFULL_CLASS:
141 arr = self.read_full_array(hdr)
142 elif mclass == mxCHAR_CLASS:
143 arr = self.read_char_array(hdr)
144 if process and self.chars_as_strings:
145 arr = chars_to_strings(arr)
146 elif mclass == mxSPARSE_CLASS:
147 # no current processing (below) makes sense for sparse
148 return self.read_sparse_array(hdr)
149 else:
150 raise TypeError('No reader for class code %s' % mclass)
151 if process and self.squeeze_me:
152 return squeeze_element(arr)
153 return arr
155 def read_sub_array(self, hdr, copy=True):
156 ''' Mat4 read using header `hdr` dtype and dims
158 Parameters
159 ----------
160 hdr : object
161 object with attributes ``dtype``, ``dims``. dtype is assumed to be
162 the correct endianness
163 copy : bool, optional
164 copies array before return if True (default True)
165 (buffer is usually read only)
167 Returns
168 -------
169 arr : ndarray
170 of dtype given by `hdr` ``dtype`` and shape given by `hdr` ``dims``
171 '''
172 dt = hdr.dtype
173 dims = hdr.dims
174 num_bytes = dt.itemsize
175 for d in dims:
176 num_bytes *= d
177 buffer = self.mat_stream.read(int(num_bytes))
178 if len(buffer) != num_bytes:
179 raise ValueError("Not enough bytes to read matrix '%s'; is this "
180 "a badly-formed file? Consider listing matrices "
181 "with `whosmat` and loading named matrices with "
182 "`variable_names` kwarg to `loadmat`" % hdr.name)
183 arr = np.ndarray(shape=dims,
184 dtype=dt,
185 buffer=buffer,
186 order='F')
187 if copy:
188 arr = arr.copy()
189 return arr
191 def read_full_array(self, hdr):
192 ''' Full (rather than sparse) matrix getter
194 Read matrix (array) can be real or complex
196 Parameters
197 ----------
198 hdr : ``VarHeader4`` instance
200 Returns
201 -------
202 arr : ndarray
203 complex array if ``hdr.is_complex`` is True, otherwise a real
204 numeric array
205 '''
206 if hdr.is_complex:
207 # avoid array copy to save memory
208 res = self.read_sub_array(hdr, copy=False)
209 res_j = self.read_sub_array(hdr, copy=False)
210 return res + (res_j * 1j)
211 return self.read_sub_array(hdr)
213 def read_char_array(self, hdr):
214 ''' latin-1 text matrix (char matrix) reader
216 Parameters
217 ----------
218 hdr : ``VarHeader4`` instance
220 Returns
221 -------
222 arr : ndarray
223 with dtype 'U1', shape given by `hdr` ``dims``
224 '''
225 arr = self.read_sub_array(hdr).astype(np.uint8)
226 S = arr.tobytes().decode('latin-1')
227 return np.ndarray(shape=hdr.dims,
228 dtype=np.dtype('U1'),
229 buffer=np.array(S)).copy()
231 def read_sparse_array(self, hdr):
232 ''' Read and return sparse matrix type
234 Parameters
235 ----------
236 hdr : ``VarHeader4`` instance
238 Returns
239 -------
240 arr : ``scipy.sparse.coo_matrix``
241 with dtype ``float`` and shape read from the sparse matrix data
243 Notes
244 -----
245 MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
246 N is the number of non-zero values. Column 1 values [0:N] are the
247 (1-based) row indices of the each non-zero value, column 2 [0:N] are the
248 column indices, column 3 [0:N] are the (real) values. The last values
249 [-1,0:2] of the rows, column indices are shape[0] and shape[1]
250 respectively of the output matrix. The last value for the values column
251 is a padding 0. mrows and ncols values from the header give the shape of
252 the stored matrix, here [N+1, 3]. Complex data are saved as a 4 column
253 matrix, where the fourth column contains the imaginary component; the
254 last value is again 0. Complex sparse data do *not* have the header
255 ``imagf`` field set to True; the fact that the data are complex is only
256 detectable because there are 4 storage columns.
257 '''
258 res = self.read_sub_array(hdr)
259 tmp = res[:-1,:]
260 # All numbers are float64 in Matlab, but SciPy sparse expects int shape
261 dims = (int(res[-1,0]), int(res[-1,1]))
262 I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also
263 J = np.ascontiguousarray(tmp[:,1],dtype='intc')
264 I -= 1 # for 1-based indexing
265 J -= 1
266 if res.shape[1] == 3:
267 V = np.ascontiguousarray(tmp[:,2],dtype='float')
268 else:
269 V = np.ascontiguousarray(tmp[:,2],dtype='complex')
270 V.imag = tmp[:,3]
271 return scipy.sparse.coo_matrix((V,(I,J)), dims)
273 def shape_from_header(self, hdr):
274 '''Read the shape of the array described by the header.
275 The file position after this call is unspecified.
276 '''
277 mclass = hdr.mclass
278 if mclass == mxFULL_CLASS:
279 shape = tuple(map(int, hdr.dims))
280 elif mclass == mxCHAR_CLASS:
281 shape = tuple(map(int, hdr.dims))
282 if self.chars_as_strings:
283 shape = shape[:-1]
284 elif mclass == mxSPARSE_CLASS:
285 dt = hdr.dtype
286 dims = hdr.dims
288 if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
289 return ()
291 # Read only the row and column counts
292 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
293 rows = np.ndarray(shape=(), dtype=dt,
294 buffer=self.mat_stream.read(dt.itemsize))
295 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
296 cols = np.ndarray(shape=(), dtype=dt,
297 buffer=self.mat_stream.read(dt.itemsize))
299 shape = (int(rows), int(cols))
300 else:
301 raise TypeError('No reader for class code %s' % mclass)
303 if self.squeeze_me:
304 shape = tuple([x for x in shape if x != 1])
305 return shape
308class MatFile4Reader(MatFileReader):
309 ''' Reader for Mat4 files '''
310 @docfiller
311 def __init__(self, mat_stream, *args, **kwargs):
312 ''' Initialize matlab 4 file reader
314 %(matstream_arg)s
315 %(load_args)s
316 '''
317 super().__init__(mat_stream, *args, **kwargs)
318 self._matrix_reader = None
320 def guess_byte_order(self):
321 self.mat_stream.seek(0)
322 mopt = read_dtype(self.mat_stream, np.dtype('i4'))
323 self.mat_stream.seek(0)
324 if mopt == 0:
325 return '<'
326 if mopt < 0 or mopt > 5000:
327 # Number must have been byteswapped
328 return SYS_LITTLE_ENDIAN and '>' or '<'
329 # Not byteswapped
330 return SYS_LITTLE_ENDIAN and '<' or '>'
332 def initialize_read(self):
333 ''' Run when beginning read of variables
335 Sets up readers from parameters in `self`
336 '''
337 self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
338 self._matrix_reader = VarReader4(self)
340 def read_var_header(self):
341 ''' Read and return header, next position
343 Parameters
344 ----------
345 None
347 Returns
348 -------
349 header : object
350 object that can be passed to self.read_var_array, and that
351 has attributes ``name`` and ``is_global``
352 next_position : int
353 position in stream of next variable
354 '''
355 hdr = self._matrix_reader.read_header()
356 n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product
357 remaining_bytes = hdr.dtype.itemsize * n
358 if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
359 remaining_bytes *= 2
360 next_position = self.mat_stream.tell() + remaining_bytes
361 return hdr, next_position
363 def read_var_array(self, header, process=True):
364 ''' Read array, given `header`
366 Parameters
367 ----------
368 header : header object
369 object with fields defining variable header
370 process : {True, False}, optional
371 If True, apply recursive post-processing during loading of array.
373 Returns
374 -------
375 arr : array
376 array with post-processing applied or not according to
377 `process`.
378 '''
379 return self._matrix_reader.array_from_header(header, process)
381 def get_variables(self, variable_names=None):
382 ''' get variables from stream as dictionary
384 Parameters
385 ----------
386 variable_names : None or str or sequence of str, optional
387 variable name, or sequence of variable names to get from Mat file /
388 file stream. If None, then get all variables in file.
389 '''
390 if isinstance(variable_names, str):
391 variable_names = [variable_names]
392 elif variable_names is not None:
393 variable_names = list(variable_names)
394 self.mat_stream.seek(0)
395 # set up variable reader
396 self.initialize_read()
397 mdict = {}
398 while not self.end_of_stream():
399 hdr, next_position = self.read_var_header()
400 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
401 if variable_names is not None and name not in variable_names:
402 self.mat_stream.seek(next_position)
403 continue
404 mdict[name] = self.read_var_array(hdr)
405 self.mat_stream.seek(next_position)
406 if variable_names is not None:
407 variable_names.remove(name)
408 if len(variable_names) == 0:
409 break
410 return mdict
412 def list_variables(self):
413 ''' list variables from stream '''
414 self.mat_stream.seek(0)
415 # set up variable reader
416 self.initialize_read()
417 vars = []
418 while not self.end_of_stream():
419 hdr, next_position = self.read_var_header()
420 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
421 shape = self._matrix_reader.shape_from_header(hdr)
422 info = mclass_info.get(hdr.mclass, 'unknown')
423 vars.append((name, shape, info))
425 self.mat_stream.seek(next_position)
426 return vars
429def arr_to_2d(arr, oned_as='row'):
430 ''' Make ``arr`` exactly two dimensional
432 If `arr` has more than 2 dimensions, raise a ValueError
434 Parameters
435 ----------
436 arr : array
437 oned_as : {'row', 'column'}, optional
438 Whether to reshape 1-D vectors as row vectors or column vectors.
439 See documentation for ``matdims`` for more detail
441 Returns
442 -------
443 arr2d : array
444 2-D version of the array
445 '''
446 dims = matdims(arr, oned_as)
447 if len(dims) > 2:
448 raise ValueError('Matlab 4 files cannot save arrays with more than '
449 '2 dimensions')
450 return arr.reshape(dims)
453class VarWriter4:
454 def __init__(self, file_writer):
455 self.file_stream = file_writer.file_stream
456 self.oned_as = file_writer.oned_as
458 def write_bytes(self, arr):
459 self.file_stream.write(arr.tobytes(order='F'))
461 def write_string(self, s):
462 self.file_stream.write(s)
464 def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
465 ''' Write header for given data options
467 Parameters
468 ----------
469 name : str
470 name of variable
471 shape : sequence
472 Shape of array as it will be read in matlab
473 P : int, optional
474 code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
475 miINT16, miUINT16, miUINT8``
476 T : int, optional
477 code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
478 mxSPARSE_CLASS``
479 imagf : int, optional
480 flag indicating complex
481 '''
482 header = np.empty((), mdtypes_template['header'])
483 M = not SYS_LITTLE_ENDIAN
484 O = 0
485 header['mopt'] = (M * 1000 +
486 O * 100 +
487 P * 10 +
488 T)
489 header['mrows'] = shape[0]
490 header['ncols'] = shape[1]
491 header['imagf'] = imagf
492 header['namlen'] = len(name) + 1
493 self.write_bytes(header)
494 data = name + '\0'
495 self.write_string(data.encode('latin1'))
497 def write(self, arr, name):
498 ''' Write matrix `arr`, with name `name`
500 Parameters
501 ----------
502 arr : array_like
503 array to write
504 name : str
505 name in matlab workspace
506 '''
507 # we need to catch sparse first, because np.asarray returns an
508 # an object array for scipy.sparse
509 if scipy.sparse.issparse(arr):
510 self.write_sparse(arr, name)
511 return
512 arr = np.asarray(arr)
513 dt = arr.dtype
514 if not dt.isnative:
515 arr = arr.astype(dt.newbyteorder('='))
516 dtt = dt.type
517 if dtt is np.object_:
518 raise TypeError('Cannot save object arrays in Mat4')
519 elif dtt is np.void:
520 raise TypeError('Cannot save void type arrays')
521 elif dtt in (np.str_, np.bytes_):
522 self.write_char(arr, name)
523 return
524 self.write_numeric(arr, name)
526 def write_numeric(self, arr, name):
527 arr = arr_to_2d(arr, self.oned_as)
528 imagf = arr.dtype.kind == 'c'
529 try:
530 P = np_to_mtypes[arr.dtype.str[1:]]
531 except KeyError:
532 if imagf:
533 arr = arr.astype('c128')
534 else:
535 arr = arr.astype('f8')
536 P = miDOUBLE
537 self.write_header(name,
538 arr.shape,
539 P=P,
540 T=mxFULL_CLASS,
541 imagf=imagf)
542 if imagf:
543 self.write_bytes(arr.real)
544 self.write_bytes(arr.imag)
545 else:
546 self.write_bytes(arr)
548 def write_char(self, arr, name):
549 arr = arr_to_chars(arr)
550 arr = arr_to_2d(arr, self.oned_as)
551 dims = arr.shape
552 self.write_header(
553 name,
554 dims,
555 P=miUINT8,
556 T=mxCHAR_CLASS)
557 if arr.dtype.kind == 'U':
558 # Recode unicode to latin1
559 n_chars = np.prod(dims)
560 st_arr = np.ndarray(shape=(),
561 dtype=arr_dtype_number(arr, n_chars),
562 buffer=arr)
563 st = st_arr.item().encode('latin-1')
564 arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
565 self.write_bytes(arr)
567 def write_sparse(self, arr, name):
568 ''' Sparse matrices are 2-D
570 See docstring for VarReader4.read_sparse_array
571 '''
572 A = arr.tocoo() # convert to sparse COO format (ijv)
573 imagf = A.dtype.kind == 'c'
574 ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
575 ijv[:-1,0] = A.row
576 ijv[:-1,1] = A.col
577 ijv[:-1,0:2] += 1 # 1 based indexing
578 if imagf:
579 ijv[:-1,2] = A.data.real
580 ijv[:-1,3] = A.data.imag
581 else:
582 ijv[:-1,2] = A.data
583 ijv[-1,0:2] = A.shape
584 self.write_header(
585 name,
586 ijv.shape,
587 P=miDOUBLE,
588 T=mxSPARSE_CLASS)
589 self.write_bytes(ijv)
592class MatFile4Writer:
593 ''' Class for writing matlab 4 format files '''
594 def __init__(self, file_stream, oned_as=None):
595 self.file_stream = file_stream
596 if oned_as is None:
597 oned_as = 'row'
598 self.oned_as = oned_as
599 self._matrix_writer = None
601 def put_variables(self, mdict, write_header=None):
602 ''' Write variables in `mdict` to stream
604 Parameters
605 ----------
606 mdict : mapping
607 mapping with method ``items`` return name, contents pairs
608 where ``name`` which will appeak in the matlab workspace in
609 file load, and ``contents`` is something writeable to a
610 matlab file, such as a NumPy array.
611 write_header : {None, True, False}
612 If True, then write the matlab file header before writing the
613 variables. If None (the default) then write the file header
614 if we are at position 0 in the stream. By setting False
615 here, and setting the stream position to the end of the file,
616 you can append variables to a matlab file
617 '''
618 # there is no header for a matlab 4 mat file, so we ignore the
619 # ``write_header`` input argument. It's there for compatibility
620 # with the matlab 5 version of this method
621 self._matrix_writer = VarWriter4(self)
622 for name, var in mdict.items():
623 self._matrix_writer.write(var, name)