Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio5.py: 25%
403 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1''' Classes for read / write of matlab (TM) 5 files
3The matfile specification last found here:
5https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
7(as of December 5 2008)
9=================================
10 Note on functions and mat files
11=================================
13The document above does not give any hints as to the storage of matlab
14function handles, or anonymous function handles. I had, therefore, to
15guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and
16``mxOPAQUE_CLASS`` by looking at example mat files.
18``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to
19contain a struct matrix with a set pattern of fields. For anonymous
20functions, a sub-fields of one of these fields seems to contain the
21well-named ``mxOPAQUE_CLASS``. This seems to contain:
23* array flags as for any matlab matrix
24* 3 int8 strings
25* a matrix
27It seems that whenever the mat file contains a ``mxOPAQUE_CLASS``
28instance, there is also an un-named matrix (name == '') at the end of
29the mat file. I'll call this the ``__function_workspace__`` matrix.
31When I saved two anonymous functions in a mat file, or appended another
32anonymous function to the mat file, there was still only one
33``__function_workspace__`` un-named matrix at the end, but larger than
34that for a mat file with a single anonymous function, suggesting that
35the workspaces for the two functions had been merged.
37The ``__function_workspace__`` matrix appears to be of double class
38(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in
39the format of a mini .mat file, without the first 124 bytes of the file
40header (the description and the subsystem_offset), but with the version
41U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes,
42presumably for 8 byte padding, and then a series of ``miMATRIX``
43entries, as in a standard mat file. The ``miMATRIX`` entries appear to
44be series of un-named (name == '') matrices, and may also contain arrays
45of this same mini-mat format.
47I guess that:
49* saving an anonymous function back to a mat file will need the
50 associated ``__function_workspace__`` matrix saved as well for the
51 anonymous function to work correctly.
52* appending to a mat file that has a ``__function_workspace__`` would
53 involve first pulling off this workspace, appending, checking whether
54 there were any more anonymous functions appended, and then somehow
55 merging the relevant workspaces, and saving at the end of the mat
56 file.
58The mat files I was playing with are in ``tests/data``:
60* sqr.mat
61* parabola.mat
62* some_functions.mat
64See ``tests/test_mio.py:test_mio_funcs.py`` for the debugging
65script I was working with.
67Small fragments of current code adapted from matfile.py by Heiko
68Henkelmann; parts of the code for simplify_cells=True adapted from
69http://blog.nephics.com/2019/08/28/better-loadmat-for-scipy/.
70'''
72import os
73import time
74import sys
75import zlib
77from io import BytesIO
79import warnings
81import numpy as np
83import scipy.sparse
85from ._byteordercodes import native_code, swapped_code
87from ._miobase import (MatFileReader, docfiller, matdims, read_dtype,
88 arr_to_chars, arr_dtype_number, MatWriteError,
89 MatReadError, MatReadWarning)
91# Reader object for matlab 5 format variables
92from ._mio5_utils import VarReader5
94# Constants and helper objects
95from ._mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES,
96 NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8,
97 miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS,
98 mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS,
99 mxDOUBLE_CLASS, mclass_info, mat_struct)
101from ._streams import ZlibInputStream
104def _has_struct(elem):
105 """Determine if elem is an array and if first array item is a struct."""
106 return (isinstance(elem, np.ndarray) and (elem.size > 0) and (elem.ndim > 0) and
107 isinstance(elem[0], mat_struct))
110def _inspect_cell_array(ndarray):
111 """Construct lists from cell arrays (loaded as numpy ndarrays), recursing
112 into items if they contain mat_struct objects."""
113 elem_list = []
114 for sub_elem in ndarray:
115 if isinstance(sub_elem, mat_struct):
116 elem_list.append(_matstruct_to_dict(sub_elem))
117 elif _has_struct(sub_elem):
118 elem_list.append(_inspect_cell_array(sub_elem))
119 else:
120 elem_list.append(sub_elem)
121 return elem_list
124def _matstruct_to_dict(matobj):
125 """Construct nested dicts from mat_struct objects."""
126 d = {}
127 for f in matobj._fieldnames:
128 elem = matobj.__dict__[f]
129 if isinstance(elem, mat_struct):
130 d[f] = _matstruct_to_dict(elem)
131 elif _has_struct(elem):
132 d[f] = _inspect_cell_array(elem)
133 else:
134 d[f] = elem
135 return d
138def _simplify_cells(d):
139 """Convert mat objects in dict to nested dicts."""
140 for key in d:
141 if isinstance(d[key], mat_struct):
142 d[key] = _matstruct_to_dict(d[key])
143 elif _has_struct(d[key]):
144 d[key] = _inspect_cell_array(d[key])
145 return d
148class MatFile5Reader(MatFileReader):
149 ''' Reader for Mat 5 mat files
150 Adds the following attribute to base class
152 uint16_codec - char codec to use for uint16 char arrays
153 (defaults to system default codec)
155 Uses variable reader that has the following stardard interface (see
156 abstract class in ``miobase``::
158 __init__(self, file_reader)
159 read_header(self)
160 array_from_header(self)
162 and added interface::
164 set_stream(self, stream)
165 read_full_tag(self)
167 '''
168 @docfiller
169 def __init__(self,
170 mat_stream,
171 byte_order=None,
172 mat_dtype=False,
173 squeeze_me=False,
174 chars_as_strings=True,
175 matlab_compatible=False,
176 struct_as_record=True,
177 verify_compressed_data_integrity=True,
178 uint16_codec=None,
179 simplify_cells=False):
180 '''Initializer for matlab 5 file format reader
182 %(matstream_arg)s
183 %(load_args)s
184 %(struct_arg)s
185 uint16_codec : {None, string}
186 Set codec to use for uint16 char arrays (e.g., 'utf-8').
187 Use system default codec if None
188 '''
189 super().__init__(
190 mat_stream,
191 byte_order,
192 mat_dtype,
193 squeeze_me,
194 chars_as_strings,
195 matlab_compatible,
196 struct_as_record,
197 verify_compressed_data_integrity,
198 simplify_cells)
199 # Set uint16 codec
200 if not uint16_codec:
201 uint16_codec = sys.getdefaultencoding()
202 self.uint16_codec = uint16_codec
203 # placeholders for readers - see initialize_read method
204 self._file_reader = None
205 self._matrix_reader = None
207 def guess_byte_order(self):
208 ''' Guess byte order.
209 Sets stream pointer to 0'''
210 self.mat_stream.seek(126)
211 mi = self.mat_stream.read(2)
212 self.mat_stream.seek(0)
213 return mi == b'IM' and '<' or '>'
215 def read_file_header(self):
216 ''' Read in mat 5 file header '''
217 hdict = {}
218 hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header']
219 hdr = read_dtype(self.mat_stream, hdr_dtype)
220 hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000')
221 v_major = hdr['version'] >> 8
222 v_minor = hdr['version'] & 0xFF
223 hdict['__version__'] = '%d.%d' % (v_major, v_minor)
224 return hdict
226 def initialize_read(self):
227 ''' Run when beginning read of variables
229 Sets up readers from parameters in `self`
230 '''
231 # reader for top level stream. We need this extra top-level
232 # reader because we use the matrix_reader object to contain
233 # compressed matrices (so they have their own stream)
234 self._file_reader = VarReader5(self)
235 # reader for matrix streams
236 self._matrix_reader = VarReader5(self)
238 def read_var_header(self):
239 ''' Read header, return header, next position
241 Header has to define at least .name and .is_global
243 Parameters
244 ----------
245 None
247 Returns
248 -------
249 header : object
250 object that can be passed to self.read_var_array, and that
251 has attributes .name and .is_global
252 next_position : int
253 position in stream of next variable
254 '''
255 mdtype, byte_count = self._file_reader.read_full_tag()
256 if not byte_count > 0:
257 raise ValueError("Did not read any bytes")
258 next_pos = self.mat_stream.tell() + byte_count
259 if mdtype == miCOMPRESSED:
260 # Make new stream from compressed data
261 stream = ZlibInputStream(self.mat_stream, byte_count)
262 self._matrix_reader.set_stream(stream)
263 check_stream_limit = self.verify_compressed_data_integrity
264 mdtype, byte_count = self._matrix_reader.read_full_tag()
265 else:
266 check_stream_limit = False
267 self._matrix_reader.set_stream(self.mat_stream)
268 if not mdtype == miMATRIX:
269 raise TypeError('Expecting miMATRIX type here, got %d' % mdtype)
270 header = self._matrix_reader.read_header(check_stream_limit)
271 return header, next_pos
273 def read_var_array(self, header, process=True):
274 ''' Read array, given `header`
276 Parameters
277 ----------
278 header : header object
279 object with fields defining variable header
280 process : {True, False} bool, optional
281 If True, apply recursive post-processing during loading of
282 array.
284 Returns
285 -------
286 arr : array
287 array with post-processing applied or not according to
288 `process`.
289 '''
290 return self._matrix_reader.array_from_header(header, process)
292 def get_variables(self, variable_names=None):
293 ''' get variables from stream as dictionary
295 variable_names - optional list of variable names to get
297 If variable_names is None, then get all variables in file
298 '''
299 if isinstance(variable_names, str):
300 variable_names = [variable_names]
301 elif variable_names is not None:
302 variable_names = list(variable_names)
304 self.mat_stream.seek(0)
305 # Here we pass all the parameters in self to the reading objects
306 self.initialize_read()
307 mdict = self.read_file_header()
308 mdict['__globals__'] = []
309 while not self.end_of_stream():
310 hdr, next_position = self.read_var_header()
311 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
312 if name in mdict:
313 warnings.warn('Duplicate variable name "%s" in stream'
314 ' - replacing previous with new\n'
315 'Consider mio5.varmats_from_mat to split '
316 'file into single variable files' % name,
317 MatReadWarning, stacklevel=2)
318 if name == '':
319 # can only be a matlab 7 function workspace
320 name = '__function_workspace__'
321 # We want to keep this raw because mat_dtype processing
322 # will break the format (uint8 as mxDOUBLE_CLASS)
323 process = False
324 else:
325 process = True
326 if variable_names is not None and name not in variable_names:
327 self.mat_stream.seek(next_position)
328 continue
329 try:
330 res = self.read_var_array(hdr, process)
331 except MatReadError as err:
332 warnings.warn(
333 'Unreadable variable "%s", because "%s"' %
334 (name, err),
335 Warning, stacklevel=2)
336 res = "Read error: %s" % err
337 self.mat_stream.seek(next_position)
338 mdict[name] = res
339 if hdr.is_global:
340 mdict['__globals__'].append(name)
341 if variable_names is not None:
342 variable_names.remove(name)
343 if len(variable_names) == 0:
344 break
345 if self.simplify_cells:
346 return _simplify_cells(mdict)
347 else:
348 return mdict
350 def list_variables(self):
351 ''' list variables from stream '''
352 self.mat_stream.seek(0)
353 # Here we pass all the parameters in self to the reading objects
354 self.initialize_read()
355 self.read_file_header()
356 vars = []
357 while not self.end_of_stream():
358 hdr, next_position = self.read_var_header()
359 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
360 if name == '':
361 # can only be a matlab 7 function workspace
362 name = '__function_workspace__'
364 shape = self._matrix_reader.shape_from_header(hdr)
365 if hdr.is_logical:
366 info = 'logical'
367 else:
368 info = mclass_info.get(hdr.mclass, 'unknown')
369 vars.append((name, shape, info))
371 self.mat_stream.seek(next_position)
372 return vars
375def varmats_from_mat(file_obj):
376 """ Pull variables out of mat 5 file as a sequence of mat file objects
378 This can be useful with a difficult mat file, containing unreadable
379 variables. This routine pulls the variables out in raw form and puts them,
380 unread, back into a file stream for saving or reading. Another use is the
381 pathological case where there is more than one variable of the same name in
382 the file; this routine returns the duplicates, whereas the standard reader
383 will overwrite duplicates in the returned dictionary.
385 The file pointer in `file_obj` will be undefined. File pointers for the
386 returned file-like objects are set at 0.
388 Parameters
389 ----------
390 file_obj : file-like
391 file object containing mat file
393 Returns
394 -------
395 named_mats : list
396 list contains tuples of (name, BytesIO) where BytesIO is a file-like
397 object containing mat file contents as for a single variable. The
398 BytesIO contains a string with the original header and a single var. If
399 ``var_file_obj`` is an individual BytesIO instance, then save as a mat
400 file with something like ``open('test.mat',
401 'wb').write(var_file_obj.read())``
403 Examples
404 --------
405 >>> import scipy.io
406 >>> import numpy as np
407 >>> from io import BytesIO
408 >>> from scipy.io.matlab._mio5 import varmats_from_mat
409 >>> mat_fileobj = BytesIO()
410 >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
411 >>> varmats = varmats_from_mat(mat_fileobj)
412 >>> sorted([name for name, str_obj in varmats])
413 ['a', 'b']
414 """
415 rdr = MatFile5Reader(file_obj)
416 file_obj.seek(0)
417 # Raw read of top-level file header
418 hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
419 raw_hdr = file_obj.read(hdr_len)
420 # Initialize variable reading
421 file_obj.seek(0)
422 rdr.initialize_read()
423 rdr.read_file_header()
424 next_position = file_obj.tell()
425 named_mats = []
426 while not rdr.end_of_stream():
427 start_position = next_position
428 hdr, next_position = rdr.read_var_header()
429 name = 'None' if hdr.name is None else hdr.name.decode('latin1')
430 # Read raw variable string
431 file_obj.seek(start_position)
432 byte_count = next_position - start_position
433 var_str = file_obj.read(byte_count)
434 # write to stringio object
435 out_obj = BytesIO()
436 out_obj.write(raw_hdr)
437 out_obj.write(var_str)
438 out_obj.seek(0)
439 named_mats.append((name, out_obj))
440 return named_mats
443class EmptyStructMarker:
444 """ Class to indicate presence of empty matlab struct on output """
447def to_writeable(source):
448 ''' Convert input object ``source`` to something we can write
450 Parameters
451 ----------
452 source : object
454 Returns
455 -------
456 arr : None or ndarray or EmptyStructMarker
457 If `source` cannot be converted to something we can write to a matfile,
458 return None. If `source` is equivalent to an empty dictionary, return
459 ``EmptyStructMarker``. Otherwise return `source` converted to an
460 ndarray with contents for writing to matfile.
461 '''
462 if isinstance(source, np.ndarray):
463 return source
464 if source is None:
465 return None
466 if hasattr(source, "__array__"):
467 return np.asarray(source)
468 # Objects that implement mappings
469 is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and
470 hasattr(source, 'items'))
471 # Objects that don't implement mappings, but do have dicts
472 if isinstance(source, np.generic):
473 # NumPy scalars are never mappings (PyPy issue workaround)
474 pass
475 elif not is_mapping and hasattr(source, '__dict__'):
476 source = {key: value for key, value in source.__dict__.items()
477 if not key.startswith('_')}
478 is_mapping = True
479 if is_mapping:
480 dtype = []
481 values = []
482 for field, value in source.items():
483 if (isinstance(field, str) and
484 field[0] not in '_0123456789'):
485 dtype.append((str(field), object))
486 values.append(value)
487 if dtype:
488 return np.array([tuple(values)], dtype)
489 else:
490 return EmptyStructMarker
491 # Next try and convert to an array
492 try:
493 narr = np.asanyarray(source)
494 except ValueError:
495 narr = np.asanyarray(source, dtype=object)
496 if narr.dtype.type in (object, np.object_) and \
497 narr.shape == () and narr == source:
498 # No interesting conversion possible
499 return None
500 return narr
503# Native byte ordered dtypes for convenience for writers
504NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
505NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
506NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
507NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']
510class VarWriter5:
511 ''' Generic matlab matrix writing class '''
512 mat_tag = np.zeros((), NDT_TAG_FULL)
513 mat_tag['mdtype'] = miMATRIX
515 def __init__(self, file_writer):
516 self.file_stream = file_writer.file_stream
517 self.unicode_strings = file_writer.unicode_strings
518 self.long_field_names = file_writer.long_field_names
519 self.oned_as = file_writer.oned_as
520 # These are used for top level writes, and unset after
521 self._var_name = None
522 self._var_is_global = False
524 def write_bytes(self, arr):
525 self.file_stream.write(arr.tobytes(order='F'))
527 def write_string(self, s):
528 self.file_stream.write(s)
530 def write_element(self, arr, mdtype=None):
531 ''' write tag and data '''
532 if mdtype is None:
533 mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
534 # Array needs to be in native byte order
535 if arr.dtype.byteorder == swapped_code:
536 arr = arr.byteswap().view(arr.dtype.newbyteorder())
537 byte_count = arr.size*arr.itemsize
538 if byte_count <= 4:
539 self.write_smalldata_element(arr, mdtype, byte_count)
540 else:
541 self.write_regular_element(arr, mdtype, byte_count)
543 def write_smalldata_element(self, arr, mdtype, byte_count):
544 # write tag with embedded data
545 tag = np.zeros((), NDT_TAG_SMALL)
546 tag['byte_count_mdtype'] = (byte_count << 16) + mdtype
547 # if arr.tobytes is < 4, the element will be zero-padded as needed.
548 tag['data'] = arr.tobytes(order='F')
549 self.write_bytes(tag)
551 def write_regular_element(self, arr, mdtype, byte_count):
552 # write tag, data
553 tag = np.zeros((), NDT_TAG_FULL)
554 tag['mdtype'] = mdtype
555 tag['byte_count'] = byte_count
556 self.write_bytes(tag)
557 self.write_bytes(arr)
558 # pad to next 64-bit boundary
559 bc_mod_8 = byte_count % 8
560 if bc_mod_8:
561 self.file_stream.write(b'\x00' * (8-bc_mod_8))
563 def write_header(self,
564 shape,
565 mclass,
566 is_complex=False,
567 is_logical=False,
568 nzmax=0):
569 ''' Write header for given data options
570 shape : sequence
571 array shape
572 mclass - mat5 matrix class
573 is_complex - True if matrix is complex
574 is_logical - True if matrix is logical
575 nzmax - max non zero elements for sparse arrays
577 We get the name and the global flag from the object, and reset
578 them to defaults after we've used them
579 '''
580 # get name and is_global from one-shot object store
581 name = self._var_name
582 is_global = self._var_is_global
583 # initialize the top-level matrix tag, store position
584 self._mat_tag_pos = self.file_stream.tell()
585 self.write_bytes(self.mat_tag)
586 # write array flags (complex, global, logical, class, nzmax)
587 af = np.zeros((), NDT_ARRAY_FLAGS)
588 af['data_type'] = miUINT32
589 af['byte_count'] = 8
590 flags = is_complex << 3 | is_global << 2 | is_logical << 1
591 af['flags_class'] = mclass | flags << 8
592 af['nzmax'] = nzmax
593 self.write_bytes(af)
594 # shape
595 self.write_element(np.array(shape, dtype='i4'))
596 # write name
597 name = np.asarray(name)
598 if name == '': # empty string zero-terminated
599 self.write_smalldata_element(name, miINT8, 0)
600 else:
601 self.write_element(name, miINT8)
602 # reset the one-shot store to defaults
603 self._var_name = ''
604 self._var_is_global = False
606 def update_matrix_tag(self, start_pos):
607 curr_pos = self.file_stream.tell()
608 self.file_stream.seek(start_pos)
609 byte_count = curr_pos - start_pos - 8
610 if byte_count >= 2**32:
611 raise MatWriteError("Matrix too large to save with Matlab "
612 "5 format")
613 self.mat_tag['byte_count'] = byte_count
614 self.write_bytes(self.mat_tag)
615 self.file_stream.seek(curr_pos)
617 def write_top(self, arr, name, is_global):
618 """ Write variable at top level of mat file
620 Parameters
621 ----------
622 arr : array_like
623 array-like object to create writer for
624 name : str, optional
625 name as it will appear in matlab workspace
626 default is empty string
627 is_global : {False, True}, optional
628 whether variable will be global on load into matlab
629 """
630 # these are set before the top-level header write, and unset at
631 # the end of the same write, because they do not apply for lower levels
632 self._var_is_global = is_global
633 self._var_name = name
634 # write the header and data
635 self.write(arr)
637 def write(self, arr):
638 ''' Write `arr` to stream at top and sub levels
640 Parameters
641 ----------
642 arr : array_like
643 array-like object to create writer for
644 '''
645 # store position, so we can update the matrix tag
646 mat_tag_pos = self.file_stream.tell()
647 # First check if these are sparse
648 if scipy.sparse.issparse(arr):
649 self.write_sparse(arr)
650 self.update_matrix_tag(mat_tag_pos)
651 return
652 # Try to convert things that aren't arrays
653 narr = to_writeable(arr)
654 if narr is None:
655 raise TypeError('Could not convert %s (type %s) to array'
656 % (arr, type(arr)))
657 if isinstance(narr, MatlabObject):
658 self.write_object(narr)
659 elif isinstance(narr, MatlabFunction):
660 raise MatWriteError('Cannot write matlab functions')
661 elif narr is EmptyStructMarker: # empty struct array
662 self.write_empty_struct()
663 elif narr.dtype.fields: # struct array
664 self.write_struct(narr)
665 elif narr.dtype.hasobject: # cell array
666 self.write_cells(narr)
667 elif narr.dtype.kind in ('U', 'S'):
668 if self.unicode_strings:
669 codec = 'UTF8'
670 else:
671 codec = 'ascii'
672 self.write_char(narr, codec)
673 else:
674 self.write_numeric(narr)
675 self.update_matrix_tag(mat_tag_pos)
677 def write_numeric(self, arr):
678 imagf = arr.dtype.kind == 'c'
679 logif = arr.dtype.kind == 'b'
680 try:
681 mclass = NP_TO_MXTYPES[arr.dtype.str[1:]]
682 except KeyError:
683 # No matching matlab type, probably complex256 / float128 / float96
684 # Cast data to complex128 / float64.
685 if imagf:
686 arr = arr.astype('c128')
687 elif logif:
688 arr = arr.astype('i1') # Should only contain 0/1
689 else:
690 arr = arr.astype('f8')
691 mclass = mxDOUBLE_CLASS
692 self.write_header(matdims(arr, self.oned_as),
693 mclass,
694 is_complex=imagf,
695 is_logical=logif)
696 if imagf:
697 self.write_element(arr.real)
698 self.write_element(arr.imag)
699 else:
700 self.write_element(arr)
702 def write_char(self, arr, codec='ascii'):
703 ''' Write string array `arr` with given `codec`
704 '''
705 if arr.size == 0 or np.all(arr == ''):
706 # This an empty string array or a string array containing
707 # only empty strings. Matlab cannot distinguish between a
708 # string array that is empty, and a string array containing
709 # only empty strings, because it stores strings as arrays of
710 # char. There is no way of having an array of char that is
711 # not empty, but contains an empty string. We have to
712 # special-case the array-with-empty-strings because even
713 # empty strings have zero padding, which would otherwise
714 # appear in matlab as a string with a space.
715 shape = (0,) * np.max([arr.ndim, 2])
716 self.write_header(shape, mxCHAR_CLASS)
717 self.write_smalldata_element(arr, miUTF8, 0)
718 return
719 # non-empty string.
720 #
721 # Convert to char array
722 arr = arr_to_chars(arr)
723 # We have to write the shape directly, because we are going
724 # recode the characters, and the resulting stream of chars
725 # may have a different length
726 shape = arr.shape
727 self.write_header(shape, mxCHAR_CLASS)
728 if arr.dtype.kind == 'U' and arr.size:
729 # Make one long string from all the characters. We need to
730 # transpose here, because we're flattening the array, before
731 # we write the bytes. The bytes have to be written in
732 # Fortran order.
733 n_chars = np.prod(shape)
734 st_arr = np.ndarray(shape=(),
735 dtype=arr_dtype_number(arr, n_chars),
736 buffer=arr.T.copy()) # Fortran order
737 # Recode with codec to give byte string
738 st = st_arr.item().encode(codec)
739 # Reconstruct as 1-D byte array
740 arr = np.ndarray(shape=(len(st),),
741 dtype='S1',
742 buffer=st)
743 self.write_element(arr, mdtype=miUTF8)
745 def write_sparse(self, arr):
746 ''' Sparse matrices are 2D
747 '''
748 A = arr.tocsc() # convert to sparse CSC format
749 A.sort_indices() # MATLAB expects sorted row indices
750 is_complex = (A.dtype.kind == 'c')
751 is_logical = (A.dtype.kind == 'b')
752 nz = A.nnz
753 self.write_header(matdims(arr, self.oned_as),
754 mxSPARSE_CLASS,
755 is_complex=is_complex,
756 is_logical=is_logical,
757 # matlab won't load file with 0 nzmax
758 nzmax=1 if nz == 0 else nz)
759 self.write_element(A.indices.astype('i4'))
760 self.write_element(A.indptr.astype('i4'))
761 self.write_element(A.data.real)
762 if is_complex:
763 self.write_element(A.data.imag)
765 def write_cells(self, arr):
766 self.write_header(matdims(arr, self.oned_as),
767 mxCELL_CLASS)
768 # loop over data, column major
769 A = np.atleast_2d(arr).flatten('F')
770 for el in A:
771 self.write(el)
773 def write_empty_struct(self):
774 self.write_header((1, 1), mxSTRUCT_CLASS)
775 # max field name length set to 1 in an example matlab struct
776 self.write_element(np.array(1, dtype=np.int32))
777 # Field names element is empty
778 self.write_element(np.array([], dtype=np.int8))
780 def write_struct(self, arr):
781 self.write_header(matdims(arr, self.oned_as),
782 mxSTRUCT_CLASS)
783 self._write_items(arr)
785 def _write_items(self, arr):
786 # write fieldnames
787 fieldnames = [f[0] for f in arr.dtype.descr]
788 length = max([len(fieldname) for fieldname in fieldnames])+1
789 max_length = (self.long_field_names and 64) or 32
790 if length > max_length:
791 raise ValueError("Field names are restricted to %d characters" %
792 (max_length-1))
793 self.write_element(np.array([length], dtype='i4'))
794 self.write_element(
795 np.array(fieldnames, dtype='S%d' % (length)),
796 mdtype=miINT8)
797 A = np.atleast_2d(arr).flatten('F')
798 for el in A:
799 for f in fieldnames:
800 self.write(el[f])
802 def write_object(self, arr):
803 '''Same as writing structs, except different mx class, and extra
804 classname element after header
805 '''
806 self.write_header(matdims(arr, self.oned_as),
807 mxOBJECT_CLASS)
808 self.write_element(np.array(arr.classname, dtype='S'),
809 mdtype=miINT8)
810 self._write_items(arr)
813class MatFile5Writer:
814 ''' Class for writing mat5 files '''
816 @docfiller
817 def __init__(self, file_stream,
818 do_compression=False,
819 unicode_strings=False,
820 global_vars=None,
821 long_field_names=False,
822 oned_as='row'):
823 ''' Initialize writer for matlab 5 format files
825 Parameters
826 ----------
827 %(do_compression)s
828 %(unicode_strings)s
829 global_vars : None or sequence of strings, optional
830 Names of variables to be marked as global for matlab
831 %(long_fields)s
832 %(oned_as)s
833 '''
834 self.file_stream = file_stream
835 self.do_compression = do_compression
836 self.unicode_strings = unicode_strings
837 if global_vars:
838 self.global_vars = global_vars
839 else:
840 self.global_vars = []
841 self.long_field_names = long_field_names
842 self.oned_as = oned_as
843 self._matrix_writer = None
845 def write_file_header(self):
846 # write header
847 hdr = np.zeros((), NDT_FILE_HDR)
848 hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \
849 % (os.name,time.asctime())
850 hdr['version'] = 0x0100
851 hdr['endian_test'] = np.ndarray(shape=(),
852 dtype='S2',
853 buffer=np.uint16(0x4d49))
854 self.file_stream.write(hdr.tobytes())
856 def put_variables(self, mdict, write_header=None):
857 ''' Write variables in `mdict` to stream
859 Parameters
860 ----------
861 mdict : mapping
862 mapping with method ``items`` returns name, contents pairs where
863 ``name`` which will appear in the matlab workspace in file load, and
864 ``contents`` is something writeable to a matlab file, such as a NumPy
865 array.
866 write_header : {None, True, False}, optional
867 If True, then write the matlab file header before writing the
868 variables. If None (the default) then write the file header
869 if we are at position 0 in the stream. By setting False
870 here, and setting the stream position to the end of the file,
871 you can append variables to a matlab file
872 '''
873 # write header if requested, or None and start of file
874 if write_header is None:
875 write_header = self.file_stream.tell() == 0
876 if write_header:
877 self.write_file_header()
878 self._matrix_writer = VarWriter5(self)
879 for name, var in mdict.items():
880 if name[0] == '_':
881 continue
882 is_global = name in self.global_vars
883 if self.do_compression:
884 stream = BytesIO()
885 self._matrix_writer.file_stream = stream
886 self._matrix_writer.write_top(var, name.encode('latin1'), is_global)
887 out_str = zlib.compress(stream.getvalue())
888 tag = np.empty((), NDT_TAG_FULL)
889 tag['mdtype'] = miCOMPRESSED
890 tag['byte_count'] = len(out_str)
891 self.file_stream.write(tag.tobytes())
892 self.file_stream.write(out_str)
893 else: # not compressing
894 self._matrix_writer.write_top(var, name.encode('latin1'), is_global)