Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio5.py: 25%
403 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
1''' Classes for read / write of matlab (TM) 5 files
3The matfile specification last found here:
5https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
7(as of December 5 2008)
9=================================
10 Note on functions and mat files
11=================================
13The document above does not give any hints as to the storage of matlab
14function handles, or anonymous function handles. I had, therefore, to
15guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and
16``mxOPAQUE_CLASS`` by looking at example mat files.
18``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to
19contain a struct matrix with a set pattern of fields. For anonymous
20functions, a sub-fields of one of these fields seems to contain the
21well-named ``mxOPAQUE_CLASS``. This seems to contain:
23* array flags as for any matlab matrix
24* 3 int8 strings
25* a matrix
27It seems that whenever the mat file contains a ``mxOPAQUE_CLASS``
28instance, there is also an un-named matrix (name == '') at the end of
29the mat file. I'll call this the ``__function_workspace__`` matrix.
31When I saved two anonymous functions in a mat file, or appended another
32anonymous function to the mat file, there was still only one
33``__function_workspace__`` un-named matrix at the end, but larger than
34that for a mat file with a single anonymous function, suggesting that
35the workspaces for the two functions had been merged.
37The ``__function_workspace__`` matrix appears to be of double class
38(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in
39the format of a mini .mat file, without the first 124 bytes of the file
40header (the description and the subsystem_offset), but with the version
41U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes,
42presumably for 8 byte padding, and then a series of ``miMATRIX``
43entries, as in a standard mat file. The ``miMATRIX`` entries appear to
44be series of un-named (name == '') matrices, and may also contain arrays
45of this same mini-mat format.
47I guess that:
49* saving an anonymous function back to a mat file will need the
50 associated ``__function_workspace__`` matrix saved as well for the
51 anonymous function to work correctly.
52* appending to a mat file that has a ``__function_workspace__`` would
53 involve first pulling off this workspace, appending, checking whether
54 there were any more anonymous functions appended, and then somehow
55 merging the relevant workspaces, and saving at the end of the mat
56 file.
58The mat files I was playing with are in ``tests/data``:
60* sqr.mat
61* parabola.mat
62* some_functions.mat
64See ``tests/test_mio.py:test_mio_funcs.py`` for the debugging
65script I was working with.
67Small fragments of current code adapted from matfile.py by Heiko
68Henkelmann; parts of the code for simplify_cells=True adapted from
69http://blog.nephics.com/2019/08/28/better-loadmat-for-scipy/.
70'''
72import os
73import time
74import sys
75import zlib
77from io import BytesIO
79import warnings
81import numpy as np
83import scipy.sparse
85from ._byteordercodes import native_code, swapped_code
87from ._miobase import (MatFileReader, docfiller, matdims, read_dtype,
88 arr_to_chars, arr_dtype_number, MatWriteError,
89 MatReadError, MatReadWarning)
91# Reader object for matlab 5 format variables
92from ._mio5_utils import VarReader5
94# Constants and helper objects
95from ._mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES,
96 NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8,
97 miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS,
98 mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS,
99 mxDOUBLE_CLASS, mclass_info, mat_struct)
101from ._streams import ZlibInputStream
104def _has_struct(elem):
105 """Determine if elem is an array and if first array item is a struct."""
106 return (isinstance(elem, np.ndarray) and (elem.size > 0) and (elem.ndim > 0) and
107 isinstance(elem[0], mat_struct))
110def _inspect_cell_array(ndarray):
111 """Construct lists from cell arrays (loaded as numpy ndarrays), recursing
112 into items if they contain mat_struct objects."""
113 elem_list = []
114 for sub_elem in ndarray:
115 if isinstance(sub_elem, mat_struct):
116 elem_list.append(_matstruct_to_dict(sub_elem))
117 elif _has_struct(sub_elem):
118 elem_list.append(_inspect_cell_array(sub_elem))
119 else:
120 elem_list.append(sub_elem)
121 return elem_list
124def _matstruct_to_dict(matobj):
125 """Construct nested dicts from mat_struct objects."""
126 d = {}
127 for f in matobj._fieldnames:
128 elem = matobj.__dict__[f]
129 if isinstance(elem, mat_struct):
130 d[f] = _matstruct_to_dict(elem)
131 elif _has_struct(elem):
132 d[f] = _inspect_cell_array(elem)
133 else:
134 d[f] = elem
135 return d
138def _simplify_cells(d):
139 """Convert mat objects in dict to nested dicts."""
140 for key in d:
141 if isinstance(d[key], mat_struct):
142 d[key] = _matstruct_to_dict(d[key])
143 elif _has_struct(d[key]):
144 d[key] = _inspect_cell_array(d[key])
145 return d
148class MatFile5Reader(MatFileReader):
149 ''' Reader for Mat 5 mat files
150 Adds the following attribute to base class
152 uint16_codec - char codec to use for uint16 char arrays
153 (defaults to system default codec)
155 Uses variable reader that has the following standard interface (see
156 abstract class in ``miobase``::
158 __init__(self, file_reader)
159 read_header(self)
160 array_from_header(self)
162 and added interface::
164 set_stream(self, stream)
165 read_full_tag(self)
167 '''
168 @docfiller
169 def __init__(self,
170 mat_stream,
171 byte_order=None,
172 mat_dtype=False,
173 squeeze_me=False,
174 chars_as_strings=True,
175 matlab_compatible=False,
176 struct_as_record=True,
177 verify_compressed_data_integrity=True,
178 uint16_codec=None,
179 simplify_cells=False):
180 '''Initializer for matlab 5 file format reader
182 %(matstream_arg)s
183 %(load_args)s
184 %(struct_arg)s
185 uint16_codec : {None, string}
186 Set codec to use for uint16 char arrays (e.g., 'utf-8').
187 Use system default codec if None
188 '''
189 super().__init__(
190 mat_stream,
191 byte_order,
192 mat_dtype,
193 squeeze_me,
194 chars_as_strings,
195 matlab_compatible,
196 struct_as_record,
197 verify_compressed_data_integrity,
198 simplify_cells)
199 # Set uint16 codec
200 if not uint16_codec:
201 uint16_codec = sys.getdefaultencoding()
202 self.uint16_codec = uint16_codec
203 # placeholders for readers - see initialize_read method
204 self._file_reader = None
205 self._matrix_reader = None
207 def guess_byte_order(self):
208 ''' Guess byte order.
209 Sets stream pointer to 0'''
210 self.mat_stream.seek(126)
211 mi = self.mat_stream.read(2)
212 self.mat_stream.seek(0)
213 return mi == b'IM' and '<' or '>'
215 def read_file_header(self):
216 ''' Read in mat 5 file header '''
217 hdict = {}
218 hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header']
219 hdr = read_dtype(self.mat_stream, hdr_dtype)
220 hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000')
221 v_major = hdr['version'] >> 8
222 v_minor = hdr['version'] & 0xFF
223 hdict['__version__'] = '%d.%d' % (v_major, v_minor)
224 return hdict
226 def initialize_read(self):
227 ''' Run when beginning read of variables
229 Sets up readers from parameters in `self`
230 '''
231 # reader for top level stream. We need this extra top-level
232 # reader because we use the matrix_reader object to contain
233 # compressed matrices (so they have their own stream)
234 self._file_reader = VarReader5(self)
235 # reader for matrix streams
236 self._matrix_reader = VarReader5(self)
238 def read_var_header(self):
239 ''' Read header, return header, next position
241 Header has to define at least .name and .is_global
243 Parameters
244 ----------
245 None
247 Returns
248 -------
249 header : object
250 object that can be passed to self.read_var_array, and that
251 has attributes .name and .is_global
252 next_position : int
253 position in stream of next variable
254 '''
255 mdtype, byte_count = self._file_reader.read_full_tag()
256 if not byte_count > 0:
257 raise ValueError("Did not read any bytes")
258 next_pos = self.mat_stream.tell() + byte_count
259 if mdtype == miCOMPRESSED:
260 # Make new stream from compressed data
261 stream = ZlibInputStream(self.mat_stream, byte_count)
262 self._matrix_reader.set_stream(stream)
263 check_stream_limit = self.verify_compressed_data_integrity
264 mdtype, byte_count = self._matrix_reader.read_full_tag()
265 else:
266 check_stream_limit = False
267 self._matrix_reader.set_stream(self.mat_stream)
268 if not mdtype == miMATRIX:
269 raise TypeError('Expecting miMATRIX type here, got %d' % mdtype)
270 header = self._matrix_reader.read_header(check_stream_limit)
271 return header, next_pos
273 def read_var_array(self, header, process=True):
274 ''' Read array, given `header`
276 Parameters
277 ----------
278 header : header object
279 object with fields defining variable header
280 process : {True, False} bool, optional
281 If True, apply recursive post-processing during loading of
282 array.
284 Returns
285 -------
286 arr : array
287 array with post-processing applied or not according to
288 `process`.
289 '''
290 return self._matrix_reader.array_from_header(header, process)
292 def get_variables(self, variable_names=None):
293 ''' get variables from stream as dictionary
295 variable_names - optional list of variable names to get
297 If variable_names is None, then get all variables in file
298 '''
299 if isinstance(variable_names, str):
300 variable_names = [variable_names]
301 elif variable_names is not None:
302 variable_names = list(variable_names)
304 self.mat_stream.seek(0)
305 # Here we pass all the parameters in self to the reading objects
306 self.initialize_read()
307 mdict = self.read_file_header()
308 mdict['__globals__'] = []
309 while not self.end_of_stream():
310 hdr, next_position = self.read_var_header()
311 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
312 if name in mdict:
313 warnings.warn('Duplicate variable name "%s" in stream'
314 ' - replacing previous with new\n'
315 'Consider mio5.varmats_from_mat to split '
316 'file into single variable files' % name,
317 MatReadWarning, stacklevel=2)
318 if name == '':
319 # can only be a matlab 7 function workspace
320 name = '__function_workspace__'
321 # We want to keep this raw because mat_dtype processing
322 # will break the format (uint8 as mxDOUBLE_CLASS)
323 process = False
324 else:
325 process = True
326 if variable_names is not None and name not in variable_names:
327 self.mat_stream.seek(next_position)
328 continue
329 try:
330 res = self.read_var_array(hdr, process)
331 except MatReadError as err:
332 warnings.warn(
333 f'Unreadable variable "{name}", because "{err}"',
334 Warning, stacklevel=2)
335 res = "Read error: %s" % err
336 self.mat_stream.seek(next_position)
337 mdict[name] = res
338 if hdr.is_global:
339 mdict['__globals__'].append(name)
340 if variable_names is not None:
341 variable_names.remove(name)
342 if len(variable_names) == 0:
343 break
344 if self.simplify_cells:
345 return _simplify_cells(mdict)
346 else:
347 return mdict
349 def list_variables(self):
350 ''' list variables from stream '''
351 self.mat_stream.seek(0)
352 # Here we pass all the parameters in self to the reading objects
353 self.initialize_read()
354 self.read_file_header()
355 vars = []
356 while not self.end_of_stream():
357 hdr, next_position = self.read_var_header()
358 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
359 if name == '':
360 # can only be a matlab 7 function workspace
361 name = '__function_workspace__'
363 shape = self._matrix_reader.shape_from_header(hdr)
364 if hdr.is_logical:
365 info = 'logical'
366 else:
367 info = mclass_info.get(hdr.mclass, 'unknown')
368 vars.append((name, shape, info))
370 self.mat_stream.seek(next_position)
371 return vars
374def varmats_from_mat(file_obj):
375 """ Pull variables out of mat 5 file as a sequence of mat file objects
377 This can be useful with a difficult mat file, containing unreadable
378 variables. This routine pulls the variables out in raw form and puts them,
379 unread, back into a file stream for saving or reading. Another use is the
380 pathological case where there is more than one variable of the same name in
381 the file; this routine returns the duplicates, whereas the standard reader
382 will overwrite duplicates in the returned dictionary.
384 The file pointer in `file_obj` will be undefined. File pointers for the
385 returned file-like objects are set at 0.
387 Parameters
388 ----------
389 file_obj : file-like
390 file object containing mat file
392 Returns
393 -------
394 named_mats : list
395 list contains tuples of (name, BytesIO) where BytesIO is a file-like
396 object containing mat file contents as for a single variable. The
397 BytesIO contains a string with the original header and a single var. If
398 ``var_file_obj`` is an individual BytesIO instance, then save as a mat
399 file with something like ``open('test.mat',
400 'wb').write(var_file_obj.read())``
402 Examples
403 --------
404 >>> import scipy.io
405 >>> import numpy as np
406 >>> from io import BytesIO
407 >>> from scipy.io.matlab._mio5 import varmats_from_mat
408 >>> mat_fileobj = BytesIO()
409 >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
410 >>> varmats = varmats_from_mat(mat_fileobj)
411 >>> sorted([name for name, str_obj in varmats])
412 ['a', 'b']
413 """
414 rdr = MatFile5Reader(file_obj)
415 file_obj.seek(0)
416 # Raw read of top-level file header
417 hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
418 raw_hdr = file_obj.read(hdr_len)
419 # Initialize variable reading
420 file_obj.seek(0)
421 rdr.initialize_read()
422 rdr.read_file_header()
423 next_position = file_obj.tell()
424 named_mats = []
425 while not rdr.end_of_stream():
426 start_position = next_position
427 hdr, next_position = rdr.read_var_header()
428 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
429 # Read raw variable string
430 file_obj.seek(start_position)
431 byte_count = next_position - start_position
432 var_str = file_obj.read(byte_count)
433 # write to stringio object
434 out_obj = BytesIO()
435 out_obj.write(raw_hdr)
436 out_obj.write(var_str)
437 out_obj.seek(0)
438 named_mats.append((name, out_obj))
439 return named_mats
442class EmptyStructMarker:
443 """ Class to indicate presence of empty matlab struct on output """
446def to_writeable(source):
447 ''' Convert input object ``source`` to something we can write
449 Parameters
450 ----------
451 source : object
453 Returns
454 -------
455 arr : None or ndarray or EmptyStructMarker
456 If `source` cannot be converted to something we can write to a matfile,
457 return None. If `source` is equivalent to an empty dictionary, return
458 ``EmptyStructMarker``. Otherwise return `source` converted to an
459 ndarray with contents for writing to matfile.
460 '''
461 if isinstance(source, np.ndarray):
462 return source
463 if source is None:
464 return None
465 if hasattr(source, "__array__"):
466 return np.asarray(source)
467 # Objects that implement mappings
468 is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and
469 hasattr(source, 'items'))
470 # Objects that don't implement mappings, but do have dicts
471 if isinstance(source, np.generic):
472 # NumPy scalars are never mappings (PyPy issue workaround)
473 pass
474 elif not is_mapping and hasattr(source, '__dict__'):
475 source = {key: value for key, value in source.__dict__.items()
476 if not key.startswith('_')}
477 is_mapping = True
478 if is_mapping:
479 dtype = []
480 values = []
481 for field, value in source.items():
482 if (isinstance(field, str) and
483 field[0] not in '_0123456789'):
484 dtype.append((str(field), object))
485 values.append(value)
486 if dtype:
487 return np.array([tuple(values)], dtype)
488 else:
489 return EmptyStructMarker
490 # Next try and convert to an array
491 try:
492 narr = np.asanyarray(source)
493 except ValueError:
494 narr = np.asanyarray(source, dtype=object)
495 if narr.dtype.type in (object, np.object_) and \
496 narr.shape == () and narr == source:
497 # No interesting conversion possible
498 return None
499 return narr
502# Native byte ordered dtypes for convenience for writers
503NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
504NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
505NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
506NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']
509class VarWriter5:
510 ''' Generic matlab matrix writing class '''
511 mat_tag = np.zeros((), NDT_TAG_FULL)
512 mat_tag['mdtype'] = miMATRIX
514 def __init__(self, file_writer):
515 self.file_stream = file_writer.file_stream
516 self.unicode_strings = file_writer.unicode_strings
517 self.long_field_names = file_writer.long_field_names
518 self.oned_as = file_writer.oned_as
519 # These are used for top level writes, and unset after
520 self._var_name = None
521 self._var_is_global = False
523 def write_bytes(self, arr):
524 self.file_stream.write(arr.tobytes(order='F'))
526 def write_string(self, s):
527 self.file_stream.write(s)
529 def write_element(self, arr, mdtype=None):
530 ''' write tag and data '''
531 if mdtype is None:
532 mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
533 # Array needs to be in native byte order
534 if arr.dtype.byteorder == swapped_code:
535 arr = arr.byteswap().view(arr.dtype.newbyteorder())
536 byte_count = arr.size*arr.itemsize
537 if byte_count <= 4:
538 self.write_smalldata_element(arr, mdtype, byte_count)
539 else:
540 self.write_regular_element(arr, mdtype, byte_count)
542 def write_smalldata_element(self, arr, mdtype, byte_count):
543 # write tag with embedded data
544 tag = np.zeros((), NDT_TAG_SMALL)
545 tag['byte_count_mdtype'] = (byte_count << 16) + mdtype
546 # if arr.tobytes is < 4, the element will be zero-padded as needed.
547 tag['data'] = arr.tobytes(order='F')
548 self.write_bytes(tag)
550 def write_regular_element(self, arr, mdtype, byte_count):
551 # write tag, data
552 tag = np.zeros((), NDT_TAG_FULL)
553 tag['mdtype'] = mdtype
554 tag['byte_count'] = byte_count
555 self.write_bytes(tag)
556 self.write_bytes(arr)
557 # pad to next 64-bit boundary
558 bc_mod_8 = byte_count % 8
559 if bc_mod_8:
560 self.file_stream.write(b'\x00' * (8-bc_mod_8))
562 def write_header(self,
563 shape,
564 mclass,
565 is_complex=False,
566 is_logical=False,
567 nzmax=0):
568 ''' Write header for given data options
569 shape : sequence
570 array shape
571 mclass - mat5 matrix class
572 is_complex - True if matrix is complex
573 is_logical - True if matrix is logical
574 nzmax - max non zero elements for sparse arrays
576 We get the name and the global flag from the object, and reset
577 them to defaults after we've used them
578 '''
579 # get name and is_global from one-shot object store
580 name = self._var_name
581 is_global = self._var_is_global
582 # initialize the top-level matrix tag, store position
583 self._mat_tag_pos = self.file_stream.tell()
584 self.write_bytes(self.mat_tag)
585 # write array flags (complex, global, logical, class, nzmax)
586 af = np.zeros((), NDT_ARRAY_FLAGS)
587 af['data_type'] = miUINT32
588 af['byte_count'] = 8
589 flags = is_complex << 3 | is_global << 2 | is_logical << 1
590 af['flags_class'] = mclass | flags << 8
591 af['nzmax'] = nzmax
592 self.write_bytes(af)
593 # shape
594 self.write_element(np.array(shape, dtype='i4'))
595 # write name
596 name = np.asarray(name)
597 if name == '': # empty string zero-terminated
598 self.write_smalldata_element(name, miINT8, 0)
599 else:
600 self.write_element(name, miINT8)
601 # reset the one-shot store to defaults
602 self._var_name = ''
603 self._var_is_global = False
605 def update_matrix_tag(self, start_pos):
606 curr_pos = self.file_stream.tell()
607 self.file_stream.seek(start_pos)
608 byte_count = curr_pos - start_pos - 8
609 if byte_count >= 2**32:
610 raise MatWriteError("Matrix too large to save with Matlab "
611 "5 format")
612 self.mat_tag['byte_count'] = byte_count
613 self.write_bytes(self.mat_tag)
614 self.file_stream.seek(curr_pos)
616 def write_top(self, arr, name, is_global):
617 """ Write variable at top level of mat file
619 Parameters
620 ----------
621 arr : array_like
622 array-like object to create writer for
623 name : str, optional
624 name as it will appear in matlab workspace
625 default is empty string
626 is_global : {False, True}, optional
627 whether variable will be global on load into matlab
628 """
629 # these are set before the top-level header write, and unset at
630 # the end of the same write, because they do not apply for lower levels
631 self._var_is_global = is_global
632 self._var_name = name
633 # write the header and data
634 self.write(arr)
636 def write(self, arr):
637 ''' Write `arr` to stream at top and sub levels
639 Parameters
640 ----------
641 arr : array_like
642 array-like object to create writer for
643 '''
644 # store position, so we can update the matrix tag
645 mat_tag_pos = self.file_stream.tell()
646 # First check if these are sparse
647 if scipy.sparse.issparse(arr):
648 self.write_sparse(arr)
649 self.update_matrix_tag(mat_tag_pos)
650 return
651 # Try to convert things that aren't arrays
652 narr = to_writeable(arr)
653 if narr is None:
654 raise TypeError(f'Could not convert {arr} (type {type(arr)}) to array')
655 if isinstance(narr, MatlabObject):
656 self.write_object(narr)
657 elif isinstance(narr, MatlabFunction):
658 raise MatWriteError('Cannot write matlab functions')
659 elif narr is EmptyStructMarker: # empty struct array
660 self.write_empty_struct()
661 elif narr.dtype.fields: # struct array
662 self.write_struct(narr)
663 elif narr.dtype.hasobject: # cell array
664 self.write_cells(narr)
665 elif narr.dtype.kind in ('U', 'S'):
666 if self.unicode_strings:
667 codec = 'UTF8'
668 else:
669 codec = 'ascii'
670 self.write_char(narr, codec)
671 else:
672 self.write_numeric(narr)
673 self.update_matrix_tag(mat_tag_pos)
675 def write_numeric(self, arr):
676 imagf = arr.dtype.kind == 'c'
677 logif = arr.dtype.kind == 'b'
678 try:
679 mclass = NP_TO_MXTYPES[arr.dtype.str[1:]]
680 except KeyError:
681 # No matching matlab type, probably complex256 / float128 / float96
682 # Cast data to complex128 / float64.
683 if imagf:
684 arr = arr.astype('c128')
685 elif logif:
686 arr = arr.astype('i1') # Should only contain 0/1
687 else:
688 arr = arr.astype('f8')
689 mclass = mxDOUBLE_CLASS
690 self.write_header(matdims(arr, self.oned_as),
691 mclass,
692 is_complex=imagf,
693 is_logical=logif)
694 if imagf:
695 self.write_element(arr.real)
696 self.write_element(arr.imag)
697 else:
698 self.write_element(arr)
700 def write_char(self, arr, codec='ascii'):
701 ''' Write string array `arr` with given `codec`
702 '''
703 if arr.size == 0 or np.all(arr == ''):
704 # This an empty string array or a string array containing
705 # only empty strings. Matlab cannot distinguish between a
706 # string array that is empty, and a string array containing
707 # only empty strings, because it stores strings as arrays of
708 # char. There is no way of having an array of char that is
709 # not empty, but contains an empty string. We have to
710 # special-case the array-with-empty-strings because even
711 # empty strings have zero padding, which would otherwise
712 # appear in matlab as a string with a space.
713 shape = (0,) * np.max([arr.ndim, 2])
714 self.write_header(shape, mxCHAR_CLASS)
715 self.write_smalldata_element(arr, miUTF8, 0)
716 return
717 # non-empty string.
718 #
719 # Convert to char array
720 arr = arr_to_chars(arr)
721 # We have to write the shape directly, because we are going
722 # recode the characters, and the resulting stream of chars
723 # may have a different length
724 shape = arr.shape
725 self.write_header(shape, mxCHAR_CLASS)
726 if arr.dtype.kind == 'U' and arr.size:
727 # Make one long string from all the characters. We need to
728 # transpose here, because we're flattening the array, before
729 # we write the bytes. The bytes have to be written in
730 # Fortran order.
731 n_chars = np.prod(shape)
732 st_arr = np.ndarray(shape=(),
733 dtype=arr_dtype_number(arr, n_chars),
734 buffer=arr.T.copy()) # Fortran order
735 # Recode with codec to give byte string
736 st = st_arr.item().encode(codec)
737 # Reconstruct as 1-D byte array
738 arr = np.ndarray(shape=(len(st),),
739 dtype='S1',
740 buffer=st)
741 self.write_element(arr, mdtype=miUTF8)
743 def write_sparse(self, arr):
744 ''' Sparse matrices are 2D
745 '''
746 A = arr.tocsc() # convert to sparse CSC format
747 A.sort_indices() # MATLAB expects sorted row indices
748 is_complex = (A.dtype.kind == 'c')
749 is_logical = (A.dtype.kind == 'b')
750 nz = A.nnz
751 self.write_header(matdims(arr, self.oned_as),
752 mxSPARSE_CLASS,
753 is_complex=is_complex,
754 is_logical=is_logical,
755 # matlab won't load file with 0 nzmax
756 nzmax=1 if nz == 0 else nz)
757 self.write_element(A.indices.astype('i4'))
758 self.write_element(A.indptr.astype('i4'))
759 self.write_element(A.data.real)
760 if is_complex:
761 self.write_element(A.data.imag)
763 def write_cells(self, arr):
764 self.write_header(matdims(arr, self.oned_as),
765 mxCELL_CLASS)
766 # loop over data, column major
767 A = np.atleast_2d(arr).flatten('F')
768 for el in A:
769 self.write(el)
771 def write_empty_struct(self):
772 self.write_header((1, 1), mxSTRUCT_CLASS)
773 # max field name length set to 1 in an example matlab struct
774 self.write_element(np.array(1, dtype=np.int32))
775 # Field names element is empty
776 self.write_element(np.array([], dtype=np.int8))
778 def write_struct(self, arr):
779 self.write_header(matdims(arr, self.oned_as),
780 mxSTRUCT_CLASS)
781 self._write_items(arr)
783 def _write_items(self, arr):
784 # write fieldnames
785 fieldnames = [f[0] for f in arr.dtype.descr]
786 length = max([len(fieldname) for fieldname in fieldnames])+1
787 max_length = (self.long_field_names and 64) or 32
788 if length > max_length:
789 raise ValueError("Field names are restricted to %d characters" %
790 (max_length-1))
791 self.write_element(np.array([length], dtype='i4'))
792 self.write_element(
793 np.array(fieldnames, dtype='S%d' % (length)),
794 mdtype=miINT8)
795 A = np.atleast_2d(arr).flatten('F')
796 for el in A:
797 for f in fieldnames:
798 self.write(el[f])
800 def write_object(self, arr):
801 '''Same as writing structs, except different mx class, and extra
802 classname element after header
803 '''
804 self.write_header(matdims(arr, self.oned_as),
805 mxOBJECT_CLASS)
806 self.write_element(np.array(arr.classname, dtype='S'),
807 mdtype=miINT8)
808 self._write_items(arr)
811class MatFile5Writer:
812 ''' Class for writing mat5 files '''
814 @docfiller
815 def __init__(self, file_stream,
816 do_compression=False,
817 unicode_strings=False,
818 global_vars=None,
819 long_field_names=False,
820 oned_as='row'):
821 ''' Initialize writer for matlab 5 format files
823 Parameters
824 ----------
825 %(do_compression)s
826 %(unicode_strings)s
827 global_vars : None or sequence of strings, optional
828 Names of variables to be marked as global for matlab
829 %(long_fields)s
830 %(oned_as)s
831 '''
832 self.file_stream = file_stream
833 self.do_compression = do_compression
834 self.unicode_strings = unicode_strings
835 if global_vars:
836 self.global_vars = global_vars
837 else:
838 self.global_vars = []
839 self.long_field_names = long_field_names
840 self.oned_as = oned_as
841 self._matrix_writer = None
843 def write_file_header(self):
844 # write header
845 hdr = np.zeros((), NDT_FILE_HDR)
846 hdr['description'] = (f'MATLAB 5.0 MAT-file Platform: {os.name}, '
847 f'Created on: {time.asctime()}')
848 hdr['version'] = 0x0100
849 hdr['endian_test'] = np.ndarray(shape=(),
850 dtype='S2',
851 buffer=np.uint16(0x4d49))
852 self.file_stream.write(hdr.tobytes())
854 def put_variables(self, mdict, write_header=None):
855 ''' Write variables in `mdict` to stream
857 Parameters
858 ----------
859 mdict : mapping
860 mapping with method ``items`` returns name, contents pairs where
861 ``name`` which will appear in the matlab workspace in file load, and
862 ``contents`` is something writeable to a matlab file, such as a NumPy
863 array.
864 write_header : {None, True, False}, optional
865 If True, then write the matlab file header before writing the
866 variables. If None (the default) then write the file header
867 if we are at position 0 in the stream. By setting False
868 here, and setting the stream position to the end of the file,
869 you can append variables to a matlab file
870 '''
871 # write header if requested, or None and start of file
872 if write_header is None:
873 write_header = self.file_stream.tell() == 0
874 if write_header:
875 self.write_file_header()
876 self._matrix_writer = VarWriter5(self)
877 for name, var in mdict.items():
878 if name[0] == '_':
879 continue
880 is_global = name in self.global_vars
881 if self.do_compression:
882 stream = BytesIO()
883 self._matrix_writer.file_stream = stream
884 self._matrix_writer.write_top(var, name.encode('latin1'), is_global)
885 out_str = zlib.compress(stream.getvalue())
886 tag = np.empty((), NDT_TAG_FULL)
887 tag['mdtype'] = miCOMPRESSED
888 tag['byte_count'] = len(out_str)
889 self.file_stream.write(tag.tobytes())
890 self.file_stream.write(out_str)
891 else: # not compressing
892 self._matrix_writer.write_top(var, name.encode('latin1'), is_global)