Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_miobase.py: 26%
108 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-03 06:39 +0000
1# Authors: Travis Oliphant, Matthew Brett
3"""
4Base classes for MATLAB file stream reading.
6MATLAB is a registered trademark of the Mathworks inc.
7"""
9import numpy as np
10from scipy._lib import doccer
12from . import _byteordercodes as boc
14__all__ = [
15 'MatFileReader', 'MatReadError', 'MatReadWarning',
16 'MatVarReader', 'MatWriteError', 'arr_dtype_number',
17 'arr_to_chars', 'convert_dtypes', 'doc_dict',
18 'docfiller', 'get_matfile_version',
19 'matdims', 'read_dtype'
20]
22class MatReadError(Exception):
23 """Exception indicating a read issue."""
26class MatWriteError(Exception):
27 """Exception indicating a write issue."""
30class MatReadWarning(UserWarning):
31 """Warning class for read issues."""
34doc_dict = \
35 {'file_arg':
36 '''file_name : str
37 Name of the mat file (do not need .mat extension if
38 appendmat==True) Can also pass open file-like object.''',
39 'append_arg':
40 '''appendmat : bool, optional
41 True to append the .mat extension to the end of the given
42 filename, if not already present. Default is True.''',
43 'load_args':
44 '''byte_order : str or None, optional
45 None by default, implying byte order guessed from mat
46 file. Otherwise can be one of ('native', '=', 'little', '<',
47 'BIG', '>').
48mat_dtype : bool, optional
49 If True, return arrays in same dtype as would be loaded into
50 MATLAB (instead of the dtype with which they are saved).
51squeeze_me : bool, optional
52 Whether to squeeze unit matrix dimensions or not.
53chars_as_strings : bool, optional
54 Whether to convert char arrays to string arrays.
55matlab_compatible : bool, optional
56 Returns matrices as would be loaded by MATLAB (implies
57 squeeze_me=False, chars_as_strings=False, mat_dtype=True,
58 struct_as_record=True).''',
59 'struct_arg':
60 '''struct_as_record : bool, optional
61 Whether to load MATLAB structs as NumPy record arrays, or as
62 old-style NumPy arrays with dtype=object. Setting this flag to
63 False replicates the behavior of SciPy version 0.7.x (returning
64 numpy object arrays). The default setting is True, because it
65 allows easier round-trip load and save of MATLAB files.''',
66 'matstream_arg':
67 '''mat_stream : file-like
68 Object with file API, open for reading.''',
69 'long_fields':
70 '''long_field_names : bool, optional
71 * False - maximum field name length in a structure is 31 characters
72 which is the documented maximum length. This is the default.
73 * True - maximum field name length in a structure is 63 characters
74 which works for MATLAB 7.6''',
75 'do_compression':
76 '''do_compression : bool, optional
77 Whether to compress matrices on write. Default is False.''',
78 'oned_as':
79 '''oned_as : {'row', 'column'}, optional
80 If 'column', write 1-D NumPy arrays as column vectors.
81 If 'row', write 1D NumPy arrays as row vectors.''',
82 'unicode_strings':
83 '''unicode_strings : bool, optional
84 If True, write strings as Unicode, else MATLAB usual encoding.'''}
86docfiller = doccer.filldoc(doc_dict)
88'''
90 Note on architecture
91======================
93There are three sets of parameters relevant for reading files. The
94first are *file read parameters* - containing options that are common
95for reading the whole file, and therefore every variable within that
96file. At the moment these are:
98* mat_stream
99* dtypes (derived from byte code)
100* byte_order
101* chars_as_strings
102* squeeze_me
103* struct_as_record (MATLAB 5 files)
104* class_dtypes (derived from order code, MATLAB 5 files)
105* codecs (MATLAB 5 files)
106* uint16_codec (MATLAB 5 files)
108Another set of parameters are those that apply only to the current
109variable being read - the *header*:
111* header related variables (different for v4 and v5 mat files)
112* is_complex
113* mclass
114* var_stream
116With the header, we need ``next_position`` to tell us where the next
117variable in the stream is.
119Then, for each element in a matrix, there can be *element read
120parameters*. An element is, for example, one element in a MATLAB cell
121array. At the moment, these are:
123* mat_dtype
125The file-reading object contains the *file read parameters*. The
126*header* is passed around as a data object, or may be read and discarded
127in a single function. The *element read parameters* - the mat_dtype in
128this instance, is passed into a general post-processing function - see
129``mio_utils`` for details.
130'''
133def convert_dtypes(dtype_template, order_code):
134 ''' Convert dtypes in mapping to given order
136 Parameters
137 ----------
138 dtype_template : mapping
139 mapping with values returning numpy dtype from ``np.dtype(val)``
140 order_code : str
141 an order code suitable for using in ``dtype.newbyteorder()``
143 Returns
144 -------
145 dtypes : mapping
146 mapping where values have been replaced by
147 ``np.dtype(val).newbyteorder(order_code)``
149 '''
150 dtypes = dtype_template.copy()
151 for k in dtypes:
152 dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code)
153 return dtypes
156def read_dtype(mat_stream, a_dtype):
157 """
158 Generic get of byte stream data of known type
160 Parameters
161 ----------
162 mat_stream : file_like object
163 MATLAB (tm) mat file stream
164 a_dtype : dtype
165 dtype of array to read. `a_dtype` is assumed to be correct
166 endianness.
168 Returns
169 -------
170 arr : ndarray
171 Array of dtype `a_dtype` read from stream.
173 """
174 num_bytes = a_dtype.itemsize
175 arr = np.ndarray(shape=(),
176 dtype=a_dtype,
177 buffer=mat_stream.read(num_bytes),
178 order='F')
179 return arr
182def matfile_version(file_name, *, appendmat=True):
183 """
184 Return major, minor tuple depending on apparent mat file type
186 Where:
188 #. 0,x -> version 4 format mat files
189 #. 1,x -> version 5 format mat files
190 #. 2,x -> version 7.3 format mat files (HDF format)
192 Parameters
193 ----------
194 file_name : str
195 Name of the mat file (do not need .mat extension if
196 appendmat==True). Can also pass open file-like object.
197 appendmat : bool, optional
198 True to append the .mat extension to the end of the given
199 filename, if not already present. Default is True.
201 Returns
202 -------
203 major_version : {0, 1, 2}
204 major MATLAB File format version
205 minor_version : int
206 minor MATLAB file format version
208 Raises
209 ------
210 MatReadError
211 If the file is empty.
212 ValueError
213 The matfile version is unknown.
215 Notes
216 -----
217 Has the side effect of setting the file read pointer to 0
218 """
219 from ._mio import _open_file_context
220 with _open_file_context(file_name, appendmat=appendmat) as fileobj:
221 return _get_matfile_version(fileobj)
224get_matfile_version = matfile_version
227def _get_matfile_version(fileobj):
228 # Mat4 files have a zero somewhere in first 4 bytes
229 fileobj.seek(0)
230 mopt_bytes = fileobj.read(4)
231 if len(mopt_bytes) == 0:
232 raise MatReadError("Mat file appears to be empty")
233 mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes)
234 if 0 in mopt_ints:
235 fileobj.seek(0)
236 return (0,0)
237 # For 5 format or 7.3 format we need to read an integer in the
238 # header. Bytes 124 through 128 contain a version integer and an
239 # endian test string
240 fileobj.seek(124)
241 tst_str = fileobj.read(4)
242 fileobj.seek(0)
243 maj_ind = int(tst_str[2] == b'I'[0])
244 maj_val = int(tst_str[maj_ind])
245 min_val = int(tst_str[1 - maj_ind])
246 ret = (maj_val, min_val)
247 if maj_val in (1, 2):
248 return ret
249 raise ValueError('Unknown mat file type, version {}, {}'.format(*ret))
252def matdims(arr, oned_as='column'):
253 """
254 Determine equivalent MATLAB dimensions for given array
256 Parameters
257 ----------
258 arr : ndarray
259 Input array
260 oned_as : {'column', 'row'}, optional
261 Whether 1-D arrays are returned as MATLAB row or column matrices.
262 Default is 'column'.
264 Returns
265 -------
266 dims : tuple
267 Shape tuple, in the form MATLAB expects it.
269 Notes
270 -----
271 We had to decide what shape a 1 dimensional array would be by
272 default. ``np.atleast_2d`` thinks it is a row vector. The
273 default for a vector in MATLAB (e.g., ``>> 1:12``) is a row vector.
275 Versions of scipy up to and including 0.11 resulted (accidentally)
276 in 1-D arrays being read as column vectors. For the moment, we
277 maintain the same tradition here.
279 Examples
280 --------
281 >>> import numpy as np
282 >>> from scipy.io.matlab._miobase import matdims
283 >>> matdims(np.array(1)) # NumPy scalar
284 (1, 1)
285 >>> matdims(np.array([1])) # 1-D array, 1 element
286 (1, 1)
287 >>> matdims(np.array([1,2])) # 1-D array, 2 elements
288 (2, 1)
289 >>> matdims(np.array([[2],[3]])) # 2-D array, column vector
290 (2, 1)
291 >>> matdims(np.array([[2,3]])) # 2-D array, row vector
292 (1, 2)
293 >>> matdims(np.array([[[2,3]]])) # 3-D array, rowish vector
294 (1, 1, 2)
295 >>> matdims(np.array([])) # empty 1-D array
296 (0, 0)
297 >>> matdims(np.array([[]])) # empty 2-D array
298 (0, 0)
299 >>> matdims(np.array([[[]]])) # empty 3-D array
300 (0, 0, 0)
302 Optional argument flips 1-D shape behavior.
304 >>> matdims(np.array([1,2]), 'row') # 1-D array, 2 elements
305 (1, 2)
307 The argument has to make sense though
309 >>> matdims(np.array([1,2]), 'bizarre')
310 Traceback (most recent call last):
311 ...
312 ValueError: 1-D option "bizarre" is strange
314 """
315 shape = arr.shape
316 if shape == (): # scalar
317 return (1, 1)
318 if len(shape) == 1: # 1D
319 if shape[0] == 0:
320 return (0, 0)
321 elif oned_as == 'column':
322 return shape + (1,)
323 elif oned_as == 'row':
324 return (1,) + shape
325 else:
326 raise ValueError('1-D option "%s" is strange'
327 % oned_as)
328 return shape
331class MatVarReader:
332 ''' Abstract class defining required interface for var readers'''
333 def __init__(self, file_reader):
334 pass
336 def read_header(self):
337 ''' Returns header '''
338 pass
340 def array_from_header(self, header):
341 ''' Reads array given header '''
342 pass
345class MatFileReader:
346 """ Base object for reading mat files
348 To make this class functional, you will need to override the
349 following methods:
351 matrix_getter_factory - gives object to fetch next matrix from stream
352 guess_byte_order - guesses file byte order from file
353 """
355 @docfiller
356 def __init__(self, mat_stream,
357 byte_order=None,
358 mat_dtype=False,
359 squeeze_me=False,
360 chars_as_strings=True,
361 matlab_compatible=False,
362 struct_as_record=True,
363 verify_compressed_data_integrity=True,
364 simplify_cells=False):
365 '''
366 Initializer for mat file reader
368 mat_stream : file-like
369 object with file API, open for reading
370 %(load_args)s
371 '''
372 # Initialize stream
373 self.mat_stream = mat_stream
374 self.dtypes = {}
375 if not byte_order:
376 byte_order = self.guess_byte_order()
377 else:
378 byte_order = boc.to_numpy_code(byte_order)
379 self.byte_order = byte_order
380 self.struct_as_record = struct_as_record
381 if matlab_compatible:
382 self.set_matlab_compatible()
383 else:
384 self.squeeze_me = squeeze_me
385 self.chars_as_strings = chars_as_strings
386 self.mat_dtype = mat_dtype
387 self.verify_compressed_data_integrity = verify_compressed_data_integrity
388 self.simplify_cells = simplify_cells
389 if simplify_cells:
390 self.squeeze_me = True
391 self.struct_as_record = False
393 def set_matlab_compatible(self):
394 ''' Sets options to return arrays as MATLAB loads them '''
395 self.mat_dtype = True
396 self.squeeze_me = False
397 self.chars_as_strings = False
399 def guess_byte_order(self):
400 ''' As we do not know what file type we have, assume native '''
401 return boc.native_code
403 def end_of_stream(self):
404 b = self.mat_stream.read(1)
405 curpos = self.mat_stream.tell()
406 self.mat_stream.seek(curpos-1)
407 return len(b) == 0
410def arr_dtype_number(arr, num):
411 ''' Return dtype for given number of items per element'''
412 return np.dtype(arr.dtype.str[:2] + str(num))
415def arr_to_chars(arr):
416 ''' Convert string array to char array '''
417 dims = list(arr.shape)
418 if not dims:
419 dims = [1]
420 dims.append(int(arr.dtype.str[2:]))
421 arr = np.ndarray(shape=dims,
422 dtype=arr_dtype_number(arr, 1),
423 buffer=arr)
424 empties = [arr == np.array('', dtype=arr.dtype)]
425 if not np.any(empties):
426 return arr
427 arr = arr.copy()
428 arr[tuple(empties)] = ' '
429 return arr