Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio5.py: 25%

403 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1''' Classes for read / write of matlab (TM) 5 files 

2 

3The matfile specification last found here: 

4 

5https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf 

6 

7(as of December 5 2008) 

8 

9================================= 

10 Note on functions and mat files 

11================================= 

12 

13The document above does not give any hints as to the storage of matlab 

14function handles, or anonymous function handles. I had, therefore, to 

15guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and 

16``mxOPAQUE_CLASS`` by looking at example mat files. 

17 

18``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to 

19contain a struct matrix with a set pattern of fields. For anonymous 

20functions, a sub-fields of one of these fields seems to contain the 

21well-named ``mxOPAQUE_CLASS``. This seems to contain: 

22 

23* array flags as for any matlab matrix 

24* 3 int8 strings 

25* a matrix 

26 

27It seems that whenever the mat file contains a ``mxOPAQUE_CLASS`` 

28instance, there is also an un-named matrix (name == '') at the end of 

29the mat file. I'll call this the ``__function_workspace__`` matrix. 

30 

31When I saved two anonymous functions in a mat file, or appended another 

32anonymous function to the mat file, there was still only one 

33``__function_workspace__`` un-named matrix at the end, but larger than 

34that for a mat file with a single anonymous function, suggesting that 

35the workspaces for the two functions had been merged. 

36 

37The ``__function_workspace__`` matrix appears to be of double class 

38(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in 

39the format of a mini .mat file, without the first 124 bytes of the file 

40header (the description and the subsystem_offset), but with the version 

41U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes, 

42presumably for 8 byte padding, and then a series of ``miMATRIX`` 

43entries, as in a standard mat file. The ``miMATRIX`` entries appear to 

44be series of un-named (name == '') matrices, and may also contain arrays 

45of this same mini-mat format. 

46 

47I guess that: 

48 

49* saving an anonymous function back to a mat file will need the 

50 associated ``__function_workspace__`` matrix saved as well for the 

51 anonymous function to work correctly. 

52* appending to a mat file that has a ``__function_workspace__`` would 

53 involve first pulling off this workspace, appending, checking whether 

54 there were any more anonymous functions appended, and then somehow 

55 merging the relevant workspaces, and saving at the end of the mat 

56 file. 

57 

58The mat files I was playing with are in ``tests/data``: 

59 

60* sqr.mat 

61* parabola.mat 

62* some_functions.mat 

63 

64See ``tests/test_mio.py:test_mio_funcs.py`` for the debugging 

65script I was working with. 

66 

67Small fragments of current code adapted from matfile.py by Heiko 

68Henkelmann; parts of the code for simplify_cells=True adapted from 

69http://blog.nephics.com/2019/08/28/better-loadmat-for-scipy/. 

70''' 

71 

72import os 

73import time 

74import sys 

75import zlib 

76 

77from io import BytesIO 

78 

79import warnings 

80 

81import numpy as np 

82 

83import scipy.sparse 

84 

85from ._byteordercodes import native_code, swapped_code 

86 

87from ._miobase import (MatFileReader, docfiller, matdims, read_dtype, 

88 arr_to_chars, arr_dtype_number, MatWriteError, 

89 MatReadError, MatReadWarning) 

90 

91# Reader object for matlab 5 format variables 

92from ._mio5_utils import VarReader5 

93 

94# Constants and helper objects 

95from ._mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES, 

96 NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8, 

97 miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS, 

98 mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS, 

99 mxDOUBLE_CLASS, mclass_info, mat_struct) 

100 

101from ._streams import ZlibInputStream 

102 

103 

104def _has_struct(elem): 

105 """Determine if elem is an array and if first array item is a struct.""" 

106 return (isinstance(elem, np.ndarray) and (elem.size > 0) and (elem.ndim > 0) and 

107 isinstance(elem[0], mat_struct)) 

108 

109 

110def _inspect_cell_array(ndarray): 

111 """Construct lists from cell arrays (loaded as numpy ndarrays), recursing 

112 into items if they contain mat_struct objects.""" 

113 elem_list = [] 

114 for sub_elem in ndarray: 

115 if isinstance(sub_elem, mat_struct): 

116 elem_list.append(_matstruct_to_dict(sub_elem)) 

117 elif _has_struct(sub_elem): 

118 elem_list.append(_inspect_cell_array(sub_elem)) 

119 else: 

120 elem_list.append(sub_elem) 

121 return elem_list 

122 

123 

124def _matstruct_to_dict(matobj): 

125 """Construct nested dicts from mat_struct objects.""" 

126 d = {} 

127 for f in matobj._fieldnames: 

128 elem = matobj.__dict__[f] 

129 if isinstance(elem, mat_struct): 

130 d[f] = _matstruct_to_dict(elem) 

131 elif _has_struct(elem): 

132 d[f] = _inspect_cell_array(elem) 

133 else: 

134 d[f] = elem 

135 return d 

136 

137 

138def _simplify_cells(d): 

139 """Convert mat objects in dict to nested dicts.""" 

140 for key in d: 

141 if isinstance(d[key], mat_struct): 

142 d[key] = _matstruct_to_dict(d[key]) 

143 elif _has_struct(d[key]): 

144 d[key] = _inspect_cell_array(d[key]) 

145 return d 

146 

147 

148class MatFile5Reader(MatFileReader): 

149 ''' Reader for Mat 5 mat files 

150 Adds the following attribute to base class 

151 

152 uint16_codec - char codec to use for uint16 char arrays 

153 (defaults to system default codec) 

154 

155 Uses variable reader that has the following stardard interface (see 

156 abstract class in ``miobase``:: 

157 

158 __init__(self, file_reader) 

159 read_header(self) 

160 array_from_header(self) 

161 

162 and added interface:: 

163 

164 set_stream(self, stream) 

165 read_full_tag(self) 

166 

167 ''' 

168 @docfiller 

169 def __init__(self, 

170 mat_stream, 

171 byte_order=None, 

172 mat_dtype=False, 

173 squeeze_me=False, 

174 chars_as_strings=True, 

175 matlab_compatible=False, 

176 struct_as_record=True, 

177 verify_compressed_data_integrity=True, 

178 uint16_codec=None, 

179 simplify_cells=False): 

180 '''Initializer for matlab 5 file format reader 

181 

182 %(matstream_arg)s 

183 %(load_args)s 

184 %(struct_arg)s 

185 uint16_codec : {None, string} 

186 Set codec to use for uint16 char arrays (e.g., 'utf-8'). 

187 Use system default codec if None 

188 ''' 

189 super().__init__( 

190 mat_stream, 

191 byte_order, 

192 mat_dtype, 

193 squeeze_me, 

194 chars_as_strings, 

195 matlab_compatible, 

196 struct_as_record, 

197 verify_compressed_data_integrity, 

198 simplify_cells) 

199 # Set uint16 codec 

200 if not uint16_codec: 

201 uint16_codec = sys.getdefaultencoding() 

202 self.uint16_codec = uint16_codec 

203 # placeholders for readers - see initialize_read method 

204 self._file_reader = None 

205 self._matrix_reader = None 

206 

207 def guess_byte_order(self): 

208 ''' Guess byte order. 

209 Sets stream pointer to 0''' 

210 self.mat_stream.seek(126) 

211 mi = self.mat_stream.read(2) 

212 self.mat_stream.seek(0) 

213 return mi == b'IM' and '<' or '>' 

214 

215 def read_file_header(self): 

216 ''' Read in mat 5 file header ''' 

217 hdict = {} 

218 hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header'] 

219 hdr = read_dtype(self.mat_stream, hdr_dtype) 

220 hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000') 

221 v_major = hdr['version'] >> 8 

222 v_minor = hdr['version'] & 0xFF 

223 hdict['__version__'] = '%d.%d' % (v_major, v_minor) 

224 return hdict 

225 

226 def initialize_read(self): 

227 ''' Run when beginning read of variables 

228 

229 Sets up readers from parameters in `self` 

230 ''' 

231 # reader for top level stream. We need this extra top-level 

232 # reader because we use the matrix_reader object to contain 

233 # compressed matrices (so they have their own stream) 

234 self._file_reader = VarReader5(self) 

235 # reader for matrix streams 

236 self._matrix_reader = VarReader5(self) 

237 

238 def read_var_header(self): 

239 ''' Read header, return header, next position 

240 

241 Header has to define at least .name and .is_global 

242 

243 Parameters 

244 ---------- 

245 None 

246 

247 Returns 

248 ------- 

249 header : object 

250 object that can be passed to self.read_var_array, and that 

251 has attributes .name and .is_global 

252 next_position : int 

253 position in stream of next variable 

254 ''' 

255 mdtype, byte_count = self._file_reader.read_full_tag() 

256 if not byte_count > 0: 

257 raise ValueError("Did not read any bytes") 

258 next_pos = self.mat_stream.tell() + byte_count 

259 if mdtype == miCOMPRESSED: 

260 # Make new stream from compressed data 

261 stream = ZlibInputStream(self.mat_stream, byte_count) 

262 self._matrix_reader.set_stream(stream) 

263 check_stream_limit = self.verify_compressed_data_integrity 

264 mdtype, byte_count = self._matrix_reader.read_full_tag() 

265 else: 

266 check_stream_limit = False 

267 self._matrix_reader.set_stream(self.mat_stream) 

268 if not mdtype == miMATRIX: 

269 raise TypeError('Expecting miMATRIX type here, got %d' % mdtype) 

270 header = self._matrix_reader.read_header(check_stream_limit) 

271 return header, next_pos 

272 

273 def read_var_array(self, header, process=True): 

274 ''' Read array, given `header` 

275 

276 Parameters 

277 ---------- 

278 header : header object 

279 object with fields defining variable header 

280 process : {True, False} bool, optional 

281 If True, apply recursive post-processing during loading of 

282 array. 

283 

284 Returns 

285 ------- 

286 arr : array 

287 array with post-processing applied or not according to 

288 `process`. 

289 ''' 

290 return self._matrix_reader.array_from_header(header, process) 

291 

292 def get_variables(self, variable_names=None): 

293 ''' get variables from stream as dictionary 

294 

295 variable_names - optional list of variable names to get 

296 

297 If variable_names is None, then get all variables in file 

298 ''' 

299 if isinstance(variable_names, str): 

300 variable_names = [variable_names] 

301 elif variable_names is not None: 

302 variable_names = list(variable_names) 

303 

304 self.mat_stream.seek(0) 

305 # Here we pass all the parameters in self to the reading objects 

306 self.initialize_read() 

307 mdict = self.read_file_header() 

308 mdict['__globals__'] = [] 

309 while not self.end_of_stream(): 

310 hdr, next_position = self.read_var_header() 

311 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

312 if name in mdict: 

313 warnings.warn('Duplicate variable name "%s" in stream' 

314 ' - replacing previous with new\n' 

315 'Consider mio5.varmats_from_mat to split ' 

316 'file into single variable files' % name, 

317 MatReadWarning, stacklevel=2) 

318 if name == '': 

319 # can only be a matlab 7 function workspace 

320 name = '__function_workspace__' 

321 # We want to keep this raw because mat_dtype processing 

322 # will break the format (uint8 as mxDOUBLE_CLASS) 

323 process = False 

324 else: 

325 process = True 

326 if variable_names is not None and name not in variable_names: 

327 self.mat_stream.seek(next_position) 

328 continue 

329 try: 

330 res = self.read_var_array(hdr, process) 

331 except MatReadError as err: 

332 warnings.warn( 

333 'Unreadable variable "%s", because "%s"' % 

334 (name, err), 

335 Warning, stacklevel=2) 

336 res = "Read error: %s" % err 

337 self.mat_stream.seek(next_position) 

338 mdict[name] = res 

339 if hdr.is_global: 

340 mdict['__globals__'].append(name) 

341 if variable_names is not None: 

342 variable_names.remove(name) 

343 if len(variable_names) == 0: 

344 break 

345 if self.simplify_cells: 

346 return _simplify_cells(mdict) 

347 else: 

348 return mdict 

349 

350 def list_variables(self): 

351 ''' list variables from stream ''' 

352 self.mat_stream.seek(0) 

353 # Here we pass all the parameters in self to the reading objects 

354 self.initialize_read() 

355 self.read_file_header() 

356 vars = [] 

357 while not self.end_of_stream(): 

358 hdr, next_position = self.read_var_header() 

359 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

360 if name == '': 

361 # can only be a matlab 7 function workspace 

362 name = '__function_workspace__' 

363 

364 shape = self._matrix_reader.shape_from_header(hdr) 

365 if hdr.is_logical: 

366 info = 'logical' 

367 else: 

368 info = mclass_info.get(hdr.mclass, 'unknown') 

369 vars.append((name, shape, info)) 

370 

371 self.mat_stream.seek(next_position) 

372 return vars 

373 

374 

375def varmats_from_mat(file_obj): 

376 """ Pull variables out of mat 5 file as a sequence of mat file objects 

377 

378 This can be useful with a difficult mat file, containing unreadable 

379 variables. This routine pulls the variables out in raw form and puts them, 

380 unread, back into a file stream for saving or reading. Another use is the 

381 pathological case where there is more than one variable of the same name in 

382 the file; this routine returns the duplicates, whereas the standard reader 

383 will overwrite duplicates in the returned dictionary. 

384 

385 The file pointer in `file_obj` will be undefined. File pointers for the 

386 returned file-like objects are set at 0. 

387 

388 Parameters 

389 ---------- 

390 file_obj : file-like 

391 file object containing mat file 

392 

393 Returns 

394 ------- 

395 named_mats : list 

396 list contains tuples of (name, BytesIO) where BytesIO is a file-like 

397 object containing mat file contents as for a single variable. The 

398 BytesIO contains a string with the original header and a single var. If 

399 ``var_file_obj`` is an individual BytesIO instance, then save as a mat 

400 file with something like ``open('test.mat', 

401 'wb').write(var_file_obj.read())`` 

402 

403 Examples 

404 -------- 

405 >>> import scipy.io 

406 >>> import numpy as np 

407 >>> from io import BytesIO 

408 >>> from scipy.io.matlab._mio5 import varmats_from_mat 

409 >>> mat_fileobj = BytesIO() 

410 >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'}) 

411 >>> varmats = varmats_from_mat(mat_fileobj) 

412 >>> sorted([name for name, str_obj in varmats]) 

413 ['a', 'b'] 

414 """ 

415 rdr = MatFile5Reader(file_obj) 

416 file_obj.seek(0) 

417 # Raw read of top-level file header 

418 hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize 

419 raw_hdr = file_obj.read(hdr_len) 

420 # Initialize variable reading 

421 file_obj.seek(0) 

422 rdr.initialize_read() 

423 rdr.read_file_header() 

424 next_position = file_obj.tell() 

425 named_mats = [] 

426 while not rdr.end_of_stream(): 

427 start_position = next_position 

428 hdr, next_position = rdr.read_var_header() 

429 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

430 # Read raw variable string 

431 file_obj.seek(start_position) 

432 byte_count = next_position - start_position 

433 var_str = file_obj.read(byte_count) 

434 # write to stringio object 

435 out_obj = BytesIO() 

436 out_obj.write(raw_hdr) 

437 out_obj.write(var_str) 

438 out_obj.seek(0) 

439 named_mats.append((name, out_obj)) 

440 return named_mats 

441 

442 

443class EmptyStructMarker: 

444 """ Class to indicate presence of empty matlab struct on output """ 

445 

446 

447def to_writeable(source): 

448 ''' Convert input object ``source`` to something we can write 

449 

450 Parameters 

451 ---------- 

452 source : object 

453 

454 Returns 

455 ------- 

456 arr : None or ndarray or EmptyStructMarker 

457 If `source` cannot be converted to something we can write to a matfile, 

458 return None. If `source` is equivalent to an empty dictionary, return 

459 ``EmptyStructMarker``. Otherwise return `source` converted to an 

460 ndarray with contents for writing to matfile. 

461 ''' 

462 if isinstance(source, np.ndarray): 

463 return source 

464 if source is None: 

465 return None 

466 if hasattr(source, "__array__"): 

467 return np.asarray(source) 

468 # Objects that implement mappings 

469 is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and 

470 hasattr(source, 'items')) 

471 # Objects that don't implement mappings, but do have dicts 

472 if isinstance(source, np.generic): 

473 # NumPy scalars are never mappings (PyPy issue workaround) 

474 pass 

475 elif not is_mapping and hasattr(source, '__dict__'): 

476 source = {key: value for key, value in source.__dict__.items() 

477 if not key.startswith('_')} 

478 is_mapping = True 

479 if is_mapping: 

480 dtype = [] 

481 values = [] 

482 for field, value in source.items(): 

483 if (isinstance(field, str) and 

484 field[0] not in '_0123456789'): 

485 dtype.append((str(field), object)) 

486 values.append(value) 

487 if dtype: 

488 return np.array([tuple(values)], dtype) 

489 else: 

490 return EmptyStructMarker 

491 # Next try and convert to an array 

492 try: 

493 narr = np.asanyarray(source) 

494 except ValueError: 

495 narr = np.asanyarray(source, dtype=object) 

496 if narr.dtype.type in (object, np.object_) and \ 

497 narr.shape == () and narr == source: 

498 # No interesting conversion possible 

499 return None 

500 return narr 

501 

502 

503# Native byte ordered dtypes for convenience for writers 

504NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header'] 

505NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full'] 

506NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata'] 

507NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags'] 

508 

509 

510class VarWriter5: 

511 ''' Generic matlab matrix writing class ''' 

512 mat_tag = np.zeros((), NDT_TAG_FULL) 

513 mat_tag['mdtype'] = miMATRIX 

514 

515 def __init__(self, file_writer): 

516 self.file_stream = file_writer.file_stream 

517 self.unicode_strings = file_writer.unicode_strings 

518 self.long_field_names = file_writer.long_field_names 

519 self.oned_as = file_writer.oned_as 

520 # These are used for top level writes, and unset after 

521 self._var_name = None 

522 self._var_is_global = False 

523 

524 def write_bytes(self, arr): 

525 self.file_stream.write(arr.tobytes(order='F')) 

526 

527 def write_string(self, s): 

528 self.file_stream.write(s) 

529 

530 def write_element(self, arr, mdtype=None): 

531 ''' write tag and data ''' 

532 if mdtype is None: 

533 mdtype = NP_TO_MTYPES[arr.dtype.str[1:]] 

534 # Array needs to be in native byte order 

535 if arr.dtype.byteorder == swapped_code: 

536 arr = arr.byteswap().view(arr.dtype.newbyteorder()) 

537 byte_count = arr.size*arr.itemsize 

538 if byte_count <= 4: 

539 self.write_smalldata_element(arr, mdtype, byte_count) 

540 else: 

541 self.write_regular_element(arr, mdtype, byte_count) 

542 

543 def write_smalldata_element(self, arr, mdtype, byte_count): 

544 # write tag with embedded data 

545 tag = np.zeros((), NDT_TAG_SMALL) 

546 tag['byte_count_mdtype'] = (byte_count << 16) + mdtype 

547 # if arr.tobytes is < 4, the element will be zero-padded as needed. 

548 tag['data'] = arr.tobytes(order='F') 

549 self.write_bytes(tag) 

550 

551 def write_regular_element(self, arr, mdtype, byte_count): 

552 # write tag, data 

553 tag = np.zeros((), NDT_TAG_FULL) 

554 tag['mdtype'] = mdtype 

555 tag['byte_count'] = byte_count 

556 self.write_bytes(tag) 

557 self.write_bytes(arr) 

558 # pad to next 64-bit boundary 

559 bc_mod_8 = byte_count % 8 

560 if bc_mod_8: 

561 self.file_stream.write(b'\x00' * (8-bc_mod_8)) 

562 

563 def write_header(self, 

564 shape, 

565 mclass, 

566 is_complex=False, 

567 is_logical=False, 

568 nzmax=0): 

569 ''' Write header for given data options 

570 shape : sequence 

571 array shape 

572 mclass - mat5 matrix class 

573 is_complex - True if matrix is complex 

574 is_logical - True if matrix is logical 

575 nzmax - max non zero elements for sparse arrays 

576 

577 We get the name and the global flag from the object, and reset 

578 them to defaults after we've used them 

579 ''' 

580 # get name and is_global from one-shot object store 

581 name = self._var_name 

582 is_global = self._var_is_global 

583 # initialize the top-level matrix tag, store position 

584 self._mat_tag_pos = self.file_stream.tell() 

585 self.write_bytes(self.mat_tag) 

586 # write array flags (complex, global, logical, class, nzmax) 

587 af = np.zeros((), NDT_ARRAY_FLAGS) 

588 af['data_type'] = miUINT32 

589 af['byte_count'] = 8 

590 flags = is_complex << 3 | is_global << 2 | is_logical << 1 

591 af['flags_class'] = mclass | flags << 8 

592 af['nzmax'] = nzmax 

593 self.write_bytes(af) 

594 # shape 

595 self.write_element(np.array(shape, dtype='i4')) 

596 # write name 

597 name = np.asarray(name) 

598 if name == '': # empty string zero-terminated 

599 self.write_smalldata_element(name, miINT8, 0) 

600 else: 

601 self.write_element(name, miINT8) 

602 # reset the one-shot store to defaults 

603 self._var_name = '' 

604 self._var_is_global = False 

605 

606 def update_matrix_tag(self, start_pos): 

607 curr_pos = self.file_stream.tell() 

608 self.file_stream.seek(start_pos) 

609 byte_count = curr_pos - start_pos - 8 

610 if byte_count >= 2**32: 

611 raise MatWriteError("Matrix too large to save with Matlab " 

612 "5 format") 

613 self.mat_tag['byte_count'] = byte_count 

614 self.write_bytes(self.mat_tag) 

615 self.file_stream.seek(curr_pos) 

616 

617 def write_top(self, arr, name, is_global): 

618 """ Write variable at top level of mat file 

619 

620 Parameters 

621 ---------- 

622 arr : array_like 

623 array-like object to create writer for 

624 name : str, optional 

625 name as it will appear in matlab workspace 

626 default is empty string 

627 is_global : {False, True}, optional 

628 whether variable will be global on load into matlab 

629 """ 

630 # these are set before the top-level header write, and unset at 

631 # the end of the same write, because they do not apply for lower levels 

632 self._var_is_global = is_global 

633 self._var_name = name 

634 # write the header and data 

635 self.write(arr) 

636 

637 def write(self, arr): 

638 ''' Write `arr` to stream at top and sub levels 

639 

640 Parameters 

641 ---------- 

642 arr : array_like 

643 array-like object to create writer for 

644 ''' 

645 # store position, so we can update the matrix tag 

646 mat_tag_pos = self.file_stream.tell() 

647 # First check if these are sparse 

648 if scipy.sparse.issparse(arr): 

649 self.write_sparse(arr) 

650 self.update_matrix_tag(mat_tag_pos) 

651 return 

652 # Try to convert things that aren't arrays 

653 narr = to_writeable(arr) 

654 if narr is None: 

655 raise TypeError('Could not convert %s (type %s) to array' 

656 % (arr, type(arr))) 

657 if isinstance(narr, MatlabObject): 

658 self.write_object(narr) 

659 elif isinstance(narr, MatlabFunction): 

660 raise MatWriteError('Cannot write matlab functions') 

661 elif narr is EmptyStructMarker: # empty struct array 

662 self.write_empty_struct() 

663 elif narr.dtype.fields: # struct array 

664 self.write_struct(narr) 

665 elif narr.dtype.hasobject: # cell array 

666 self.write_cells(narr) 

667 elif narr.dtype.kind in ('U', 'S'): 

668 if self.unicode_strings: 

669 codec = 'UTF8' 

670 else: 

671 codec = 'ascii' 

672 self.write_char(narr, codec) 

673 else: 

674 self.write_numeric(narr) 

675 self.update_matrix_tag(mat_tag_pos) 

676 

677 def write_numeric(self, arr): 

678 imagf = arr.dtype.kind == 'c' 

679 logif = arr.dtype.kind == 'b' 

680 try: 

681 mclass = NP_TO_MXTYPES[arr.dtype.str[1:]] 

682 except KeyError: 

683 # No matching matlab type, probably complex256 / float128 / float96 

684 # Cast data to complex128 / float64. 

685 if imagf: 

686 arr = arr.astype('c128') 

687 elif logif: 

688 arr = arr.astype('i1') # Should only contain 0/1 

689 else: 

690 arr = arr.astype('f8') 

691 mclass = mxDOUBLE_CLASS 

692 self.write_header(matdims(arr, self.oned_as), 

693 mclass, 

694 is_complex=imagf, 

695 is_logical=logif) 

696 if imagf: 

697 self.write_element(arr.real) 

698 self.write_element(arr.imag) 

699 else: 

700 self.write_element(arr) 

701 

702 def write_char(self, arr, codec='ascii'): 

703 ''' Write string array `arr` with given `codec` 

704 ''' 

705 if arr.size == 0 or np.all(arr == ''): 

706 # This an empty string array or a string array containing 

707 # only empty strings. Matlab cannot distinguish between a 

708 # string array that is empty, and a string array containing 

709 # only empty strings, because it stores strings as arrays of 

710 # char. There is no way of having an array of char that is 

711 # not empty, but contains an empty string. We have to 

712 # special-case the array-with-empty-strings because even 

713 # empty strings have zero padding, which would otherwise 

714 # appear in matlab as a string with a space. 

715 shape = (0,) * np.max([arr.ndim, 2]) 

716 self.write_header(shape, mxCHAR_CLASS) 

717 self.write_smalldata_element(arr, miUTF8, 0) 

718 return 

719 # non-empty string. 

720 # 

721 # Convert to char array 

722 arr = arr_to_chars(arr) 

723 # We have to write the shape directly, because we are going 

724 # recode the characters, and the resulting stream of chars 

725 # may have a different length 

726 shape = arr.shape 

727 self.write_header(shape, mxCHAR_CLASS) 

728 if arr.dtype.kind == 'U' and arr.size: 

729 # Make one long string from all the characters. We need to 

730 # transpose here, because we're flattening the array, before 

731 # we write the bytes. The bytes have to be written in 

732 # Fortran order. 

733 n_chars = np.prod(shape) 

734 st_arr = np.ndarray(shape=(), 

735 dtype=arr_dtype_number(arr, n_chars), 

736 buffer=arr.T.copy()) # Fortran order 

737 # Recode with codec to give byte string 

738 st = st_arr.item().encode(codec) 

739 # Reconstruct as 1-D byte array 

740 arr = np.ndarray(shape=(len(st),), 

741 dtype='S1', 

742 buffer=st) 

743 self.write_element(arr, mdtype=miUTF8) 

744 

745 def write_sparse(self, arr): 

746 ''' Sparse matrices are 2D 

747 ''' 

748 A = arr.tocsc() # convert to sparse CSC format 

749 A.sort_indices() # MATLAB expects sorted row indices 

750 is_complex = (A.dtype.kind == 'c') 

751 is_logical = (A.dtype.kind == 'b') 

752 nz = A.nnz 

753 self.write_header(matdims(arr, self.oned_as), 

754 mxSPARSE_CLASS, 

755 is_complex=is_complex, 

756 is_logical=is_logical, 

757 # matlab won't load file with 0 nzmax 

758 nzmax=1 if nz == 0 else nz) 

759 self.write_element(A.indices.astype('i4')) 

760 self.write_element(A.indptr.astype('i4')) 

761 self.write_element(A.data.real) 

762 if is_complex: 

763 self.write_element(A.data.imag) 

764 

765 def write_cells(self, arr): 

766 self.write_header(matdims(arr, self.oned_as), 

767 mxCELL_CLASS) 

768 # loop over data, column major 

769 A = np.atleast_2d(arr).flatten('F') 

770 for el in A: 

771 self.write(el) 

772 

773 def write_empty_struct(self): 

774 self.write_header((1, 1), mxSTRUCT_CLASS) 

775 # max field name length set to 1 in an example matlab struct 

776 self.write_element(np.array(1, dtype=np.int32)) 

777 # Field names element is empty 

778 self.write_element(np.array([], dtype=np.int8)) 

779 

780 def write_struct(self, arr): 

781 self.write_header(matdims(arr, self.oned_as), 

782 mxSTRUCT_CLASS) 

783 self._write_items(arr) 

784 

785 def _write_items(self, arr): 

786 # write fieldnames 

787 fieldnames = [f[0] for f in arr.dtype.descr] 

788 length = max([len(fieldname) for fieldname in fieldnames])+1 

789 max_length = (self.long_field_names and 64) or 32 

790 if length > max_length: 

791 raise ValueError("Field names are restricted to %d characters" % 

792 (max_length-1)) 

793 self.write_element(np.array([length], dtype='i4')) 

794 self.write_element( 

795 np.array(fieldnames, dtype='S%d' % (length)), 

796 mdtype=miINT8) 

797 A = np.atleast_2d(arr).flatten('F') 

798 for el in A: 

799 for f in fieldnames: 

800 self.write(el[f]) 

801 

802 def write_object(self, arr): 

803 '''Same as writing structs, except different mx class, and extra 

804 classname element after header 

805 ''' 

806 self.write_header(matdims(arr, self.oned_as), 

807 mxOBJECT_CLASS) 

808 self.write_element(np.array(arr.classname, dtype='S'), 

809 mdtype=miINT8) 

810 self._write_items(arr) 

811 

812 

813class MatFile5Writer: 

814 ''' Class for writing mat5 files ''' 

815 

816 @docfiller 

817 def __init__(self, file_stream, 

818 do_compression=False, 

819 unicode_strings=False, 

820 global_vars=None, 

821 long_field_names=False, 

822 oned_as='row'): 

823 ''' Initialize writer for matlab 5 format files 

824 

825 Parameters 

826 ---------- 

827 %(do_compression)s 

828 %(unicode_strings)s 

829 global_vars : None or sequence of strings, optional 

830 Names of variables to be marked as global for matlab 

831 %(long_fields)s 

832 %(oned_as)s 

833 ''' 

834 self.file_stream = file_stream 

835 self.do_compression = do_compression 

836 self.unicode_strings = unicode_strings 

837 if global_vars: 

838 self.global_vars = global_vars 

839 else: 

840 self.global_vars = [] 

841 self.long_field_names = long_field_names 

842 self.oned_as = oned_as 

843 self._matrix_writer = None 

844 

845 def write_file_header(self): 

846 # write header 

847 hdr = np.zeros((), NDT_FILE_HDR) 

848 hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \ 

849 % (os.name,time.asctime()) 

850 hdr['version'] = 0x0100 

851 hdr['endian_test'] = np.ndarray(shape=(), 

852 dtype='S2', 

853 buffer=np.uint16(0x4d49)) 

854 self.file_stream.write(hdr.tobytes()) 

855 

856 def put_variables(self, mdict, write_header=None): 

857 ''' Write variables in `mdict` to stream 

858 

859 Parameters 

860 ---------- 

861 mdict : mapping 

862 mapping with method ``items`` returns name, contents pairs where 

863 ``name`` which will appear in the matlab workspace in file load, and 

864 ``contents`` is something writeable to a matlab file, such as a NumPy 

865 array. 

866 write_header : {None, True, False}, optional 

867 If True, then write the matlab file header before writing the 

868 variables. If None (the default) then write the file header 

869 if we are at position 0 in the stream. By setting False 

870 here, and setting the stream position to the end of the file, 

871 you can append variables to a matlab file 

872 ''' 

873 # write header if requested, or None and start of file 

874 if write_header is None: 

875 write_header = self.file_stream.tell() == 0 

876 if write_header: 

877 self.write_file_header() 

878 self._matrix_writer = VarWriter5(self) 

879 for name, var in mdict.items(): 

880 if name[0] == '_': 

881 continue 

882 is_global = name in self.global_vars 

883 if self.do_compression: 

884 stream = BytesIO() 

885 self._matrix_writer.file_stream = stream 

886 self._matrix_writer.write_top(var, name.encode('latin1'), is_global) 

887 out_str = zlib.compress(stream.getvalue()) 

888 tag = np.empty((), NDT_TAG_FULL) 

889 tag['mdtype'] = miCOMPRESSED 

890 tag['byte_count'] = len(out_str) 

891 self.file_stream.write(tag.tobytes()) 

892 self.file_stream.write(out_str) 

893 else: # not compressing 

894 self._matrix_writer.write_top(var, name.encode('latin1'), is_global)