Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio5.py: 15%

403 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-14 06:37 +0000

1''' Classes for read / write of matlab (TM) 5 files 

2 

3The matfile specification last found here: 

4 

5https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf 

6 

7(as of December 5 2008) 

8 

9================================= 

10 Note on functions and mat files 

11================================= 

12 

13The document above does not give any hints as to the storage of matlab 

14function handles, or anonymous function handles. I had, therefore, to 

15guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and 

16``mxOPAQUE_CLASS`` by looking at example mat files. 

17 

18``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to 

19contain a struct matrix with a set pattern of fields. For anonymous 

20functions, a sub-fields of one of these fields seems to contain the 

21well-named ``mxOPAQUE_CLASS``. This seems to contain: 

22 

23* array flags as for any matlab matrix 

24* 3 int8 strings 

25* a matrix 

26 

27It seems that whenever the mat file contains a ``mxOPAQUE_CLASS`` 

28instance, there is also an un-named matrix (name == '') at the end of 

29the mat file. I'll call this the ``__function_workspace__`` matrix. 

30 

31When I saved two anonymous functions in a mat file, or appended another 

32anonymous function to the mat file, there was still only one 

33``__function_workspace__`` un-named matrix at the end, but larger than 

34that for a mat file with a single anonymous function, suggesting that 

35the workspaces for the two functions had been merged. 

36 

37The ``__function_workspace__`` matrix appears to be of double class 

38(``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in 

39the format of a mini .mat file, without the first 124 bytes of the file 

40header (the description and the subsystem_offset), but with the version 

41U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes, 

42presumably for 8 byte padding, and then a series of ``miMATRIX`` 

43entries, as in a standard mat file. The ``miMATRIX`` entries appear to 

44be series of un-named (name == '') matrices, and may also contain arrays 

45of this same mini-mat format. 

46 

47I guess that: 

48 

49* saving an anonymous function back to a mat file will need the 

50 associated ``__function_workspace__`` matrix saved as well for the 

51 anonymous function to work correctly. 

52* appending to a mat file that has a ``__function_workspace__`` would 

53 involve first pulling off this workspace, appending, checking whether 

54 there were any more anonymous functions appended, and then somehow 

55 merging the relevant workspaces, and saving at the end of the mat 

56 file. 

57 

58The mat files I was playing with are in ``tests/data``: 

59 

60* sqr.mat 

61* parabola.mat 

62* some_functions.mat 

63 

64See ``tests/test_mio.py:test_mio_funcs.py`` for the debugging 

65script I was working with. 

66 

67Small fragments of current code adapted from matfile.py by Heiko 

68Henkelmann; parts of the code for simplify_cells=True adapted from 

69http://blog.nephics.com/2019/08/28/better-loadmat-for-scipy/. 

70''' 

71 

72import os 

73import time 

74import sys 

75import zlib 

76 

77from io import BytesIO 

78 

79import warnings 

80 

81import numpy as np 

82 

83import scipy.sparse 

84 

85from ._byteordercodes import native_code, swapped_code 

86 

87from ._miobase import (MatFileReader, docfiller, matdims, read_dtype, 

88 arr_to_chars, arr_dtype_number, MatWriteError, 

89 MatReadError, MatReadWarning) 

90 

91# Reader object for matlab 5 format variables 

92from ._mio5_utils import VarReader5 

93 

94# Constants and helper objects 

95from ._mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES, 

96 NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8, 

97 miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS, 

98 mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS, 

99 mxDOUBLE_CLASS, mclass_info, mat_struct) 

100 

101from ._streams import ZlibInputStream 

102 

103 

104def _has_struct(elem): 

105 """Determine if elem is an array and if first array item is a struct.""" 

106 return (isinstance(elem, np.ndarray) and (elem.size > 0) and (elem.ndim > 0) and 

107 isinstance(elem[0], mat_struct)) 

108 

109 

110def _inspect_cell_array(ndarray): 

111 """Construct lists from cell arrays (loaded as numpy ndarrays), recursing 

112 into items if they contain mat_struct objects.""" 

113 elem_list = [] 

114 for sub_elem in ndarray: 

115 if isinstance(sub_elem, mat_struct): 

116 elem_list.append(_matstruct_to_dict(sub_elem)) 

117 elif _has_struct(sub_elem): 

118 elem_list.append(_inspect_cell_array(sub_elem)) 

119 else: 

120 elem_list.append(sub_elem) 

121 return elem_list 

122 

123 

124def _matstruct_to_dict(matobj): 

125 """Construct nested dicts from mat_struct objects.""" 

126 d = {} 

127 for f in matobj._fieldnames: 

128 elem = matobj.__dict__[f] 

129 if isinstance(elem, mat_struct): 

130 d[f] = _matstruct_to_dict(elem) 

131 elif _has_struct(elem): 

132 d[f] = _inspect_cell_array(elem) 

133 else: 

134 d[f] = elem 

135 return d 

136 

137 

138def _simplify_cells(d): 

139 """Convert mat objects in dict to nested dicts.""" 

140 for key in d: 

141 if isinstance(d[key], mat_struct): 

142 d[key] = _matstruct_to_dict(d[key]) 

143 elif _has_struct(d[key]): 

144 d[key] = _inspect_cell_array(d[key]) 

145 return d 

146 

147 

148class MatFile5Reader(MatFileReader): 

149 ''' Reader for Mat 5 mat files 

150 Adds the following attribute to base class 

151 

152 uint16_codec - char codec to use for uint16 char arrays 

153 (defaults to system default codec) 

154 

155 Uses variable reader that has the following standard interface (see 

156 abstract class in ``miobase``:: 

157 

158 __init__(self, file_reader) 

159 read_header(self) 

160 array_from_header(self) 

161 

162 and added interface:: 

163 

164 set_stream(self, stream) 

165 read_full_tag(self) 

166 

167 ''' 

168 @docfiller 

169 def __init__(self, 

170 mat_stream, 

171 byte_order=None, 

172 mat_dtype=False, 

173 squeeze_me=False, 

174 chars_as_strings=True, 

175 matlab_compatible=False, 

176 struct_as_record=True, 

177 verify_compressed_data_integrity=True, 

178 uint16_codec=None, 

179 simplify_cells=False): 

180 '''Initializer for matlab 5 file format reader 

181 

182 %(matstream_arg)s 

183 %(load_args)s 

184 %(struct_arg)s 

185 uint16_codec : {None, string} 

186 Set codec to use for uint16 char arrays (e.g., 'utf-8'). 

187 Use system default codec if None 

188 ''' 

189 super().__init__( 

190 mat_stream, 

191 byte_order, 

192 mat_dtype, 

193 squeeze_me, 

194 chars_as_strings, 

195 matlab_compatible, 

196 struct_as_record, 

197 verify_compressed_data_integrity, 

198 simplify_cells) 

199 # Set uint16 codec 

200 if not uint16_codec: 

201 uint16_codec = sys.getdefaultencoding() 

202 self.uint16_codec = uint16_codec 

203 # placeholders for readers - see initialize_read method 

204 self._file_reader = None 

205 self._matrix_reader = None 

206 

207 def guess_byte_order(self): 

208 ''' Guess byte order. 

209 Sets stream pointer to 0''' 

210 self.mat_stream.seek(126) 

211 mi = self.mat_stream.read(2) 

212 self.mat_stream.seek(0) 

213 return mi == b'IM' and '<' or '>' 

214 

215 def read_file_header(self): 

216 ''' Read in mat 5 file header ''' 

217 hdict = {} 

218 hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header'] 

219 hdr = read_dtype(self.mat_stream, hdr_dtype) 

220 hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000') 

221 v_major = hdr['version'] >> 8 

222 v_minor = hdr['version'] & 0xFF 

223 hdict['__version__'] = '%d.%d' % (v_major, v_minor) 

224 return hdict 

225 

226 def initialize_read(self): 

227 ''' Run when beginning read of variables 

228 

229 Sets up readers from parameters in `self` 

230 ''' 

231 # reader for top level stream. We need this extra top-level 

232 # reader because we use the matrix_reader object to contain 

233 # compressed matrices (so they have their own stream) 

234 self._file_reader = VarReader5(self) 

235 # reader for matrix streams 

236 self._matrix_reader = VarReader5(self) 

237 

238 def read_var_header(self): 

239 ''' Read header, return header, next position 

240 

241 Header has to define at least .name and .is_global 

242 

243 Parameters 

244 ---------- 

245 None 

246 

247 Returns 

248 ------- 

249 header : object 

250 object that can be passed to self.read_var_array, and that 

251 has attributes .name and .is_global 

252 next_position : int 

253 position in stream of next variable 

254 ''' 

255 mdtype, byte_count = self._file_reader.read_full_tag() 

256 if not byte_count > 0: 

257 raise ValueError("Did not read any bytes") 

258 next_pos = self.mat_stream.tell() + byte_count 

259 if mdtype == miCOMPRESSED: 

260 # Make new stream from compressed data 

261 stream = ZlibInputStream(self.mat_stream, byte_count) 

262 self._matrix_reader.set_stream(stream) 

263 check_stream_limit = self.verify_compressed_data_integrity 

264 mdtype, byte_count = self._matrix_reader.read_full_tag() 

265 else: 

266 check_stream_limit = False 

267 self._matrix_reader.set_stream(self.mat_stream) 

268 if not mdtype == miMATRIX: 

269 raise TypeError('Expecting miMATRIX type here, got %d' % mdtype) 

270 header = self._matrix_reader.read_header(check_stream_limit) 

271 return header, next_pos 

272 

273 def read_var_array(self, header, process=True): 

274 ''' Read array, given `header` 

275 

276 Parameters 

277 ---------- 

278 header : header object 

279 object with fields defining variable header 

280 process : {True, False} bool, optional 

281 If True, apply recursive post-processing during loading of 

282 array. 

283 

284 Returns 

285 ------- 

286 arr : array 

287 array with post-processing applied or not according to 

288 `process`. 

289 ''' 

290 return self._matrix_reader.array_from_header(header, process) 

291 

292 def get_variables(self, variable_names=None): 

293 ''' get variables from stream as dictionary 

294 

295 variable_names - optional list of variable names to get 

296 

297 If variable_names is None, then get all variables in file 

298 ''' 

299 if isinstance(variable_names, str): 

300 variable_names = [variable_names] 

301 elif variable_names is not None: 

302 variable_names = list(variable_names) 

303 

304 self.mat_stream.seek(0) 

305 # Here we pass all the parameters in self to the reading objects 

306 self.initialize_read() 

307 mdict = self.read_file_header() 

308 mdict['__globals__'] = [] 

309 while not self.end_of_stream(): 

310 hdr, next_position = self.read_var_header() 

311 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

312 if name in mdict: 

313 warnings.warn('Duplicate variable name "%s" in stream' 

314 ' - replacing previous with new\n' 

315 'Consider mio5.varmats_from_mat to split ' 

316 'file into single variable files' % name, 

317 MatReadWarning, stacklevel=2) 

318 if name == '': 

319 # can only be a matlab 7 function workspace 

320 name = '__function_workspace__' 

321 # We want to keep this raw because mat_dtype processing 

322 # will break the format (uint8 as mxDOUBLE_CLASS) 

323 process = False 

324 else: 

325 process = True 

326 if variable_names is not None and name not in variable_names: 

327 self.mat_stream.seek(next_position) 

328 continue 

329 try: 

330 res = self.read_var_array(hdr, process) 

331 except MatReadError as err: 

332 warnings.warn( 

333 f'Unreadable variable "{name}", because "{err}"', 

334 Warning, stacklevel=2) 

335 res = "Read error: %s" % err 

336 self.mat_stream.seek(next_position) 

337 mdict[name] = res 

338 if hdr.is_global: 

339 mdict['__globals__'].append(name) 

340 if variable_names is not None: 

341 variable_names.remove(name) 

342 if len(variable_names) == 0: 

343 break 

344 if self.simplify_cells: 

345 return _simplify_cells(mdict) 

346 else: 

347 return mdict 

348 

349 def list_variables(self): 

350 ''' list variables from stream ''' 

351 self.mat_stream.seek(0) 

352 # Here we pass all the parameters in self to the reading objects 

353 self.initialize_read() 

354 self.read_file_header() 

355 vars = [] 

356 while not self.end_of_stream(): 

357 hdr, next_position = self.read_var_header() 

358 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

359 if name == '': 

360 # can only be a matlab 7 function workspace 

361 name = '__function_workspace__' 

362 

363 shape = self._matrix_reader.shape_from_header(hdr) 

364 if hdr.is_logical: 

365 info = 'logical' 

366 else: 

367 info = mclass_info.get(hdr.mclass, 'unknown') 

368 vars.append((name, shape, info)) 

369 

370 self.mat_stream.seek(next_position) 

371 return vars 

372 

373 

374def varmats_from_mat(file_obj): 

375 """ Pull variables out of mat 5 file as a sequence of mat file objects 

376 

377 This can be useful with a difficult mat file, containing unreadable 

378 variables. This routine pulls the variables out in raw form and puts them, 

379 unread, back into a file stream for saving or reading. Another use is the 

380 pathological case where there is more than one variable of the same name in 

381 the file; this routine returns the duplicates, whereas the standard reader 

382 will overwrite duplicates in the returned dictionary. 

383 

384 The file pointer in `file_obj` will be undefined. File pointers for the 

385 returned file-like objects are set at 0. 

386 

387 Parameters 

388 ---------- 

389 file_obj : file-like 

390 file object containing mat file 

391 

392 Returns 

393 ------- 

394 named_mats : list 

395 list contains tuples of (name, BytesIO) where BytesIO is a file-like 

396 object containing mat file contents as for a single variable. The 

397 BytesIO contains a string with the original header and a single var. If 

398 ``var_file_obj`` is an individual BytesIO instance, then save as a mat 

399 file with something like ``open('test.mat', 

400 'wb').write(var_file_obj.read())`` 

401 

402 Examples 

403 -------- 

404 >>> import scipy.io 

405 >>> import numpy as np 

406 >>> from io import BytesIO 

407 >>> from scipy.io.matlab._mio5 import varmats_from_mat 

408 >>> mat_fileobj = BytesIO() 

409 >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'}) 

410 >>> varmats = varmats_from_mat(mat_fileobj) 

411 >>> sorted([name for name, str_obj in varmats]) 

412 ['a', 'b'] 

413 """ 

414 rdr = MatFile5Reader(file_obj) 

415 file_obj.seek(0) 

416 # Raw read of top-level file header 

417 hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize 

418 raw_hdr = file_obj.read(hdr_len) 

419 # Initialize variable reading 

420 file_obj.seek(0) 

421 rdr.initialize_read() 

422 rdr.read_file_header() 

423 next_position = file_obj.tell() 

424 named_mats = [] 

425 while not rdr.end_of_stream(): 

426 start_position = next_position 

427 hdr, next_position = rdr.read_var_header() 

428 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

429 # Read raw variable string 

430 file_obj.seek(start_position) 

431 byte_count = next_position - start_position 

432 var_str = file_obj.read(byte_count) 

433 # write to stringio object 

434 out_obj = BytesIO() 

435 out_obj.write(raw_hdr) 

436 out_obj.write(var_str) 

437 out_obj.seek(0) 

438 named_mats.append((name, out_obj)) 

439 return named_mats 

440 

441 

442class EmptyStructMarker: 

443 """ Class to indicate presence of empty matlab struct on output """ 

444 

445 

446def to_writeable(source): 

447 ''' Convert input object ``source`` to something we can write 

448 

449 Parameters 

450 ---------- 

451 source : object 

452 

453 Returns 

454 ------- 

455 arr : None or ndarray or EmptyStructMarker 

456 If `source` cannot be converted to something we can write to a matfile, 

457 return None. If `source` is equivalent to an empty dictionary, return 

458 ``EmptyStructMarker``. Otherwise return `source` converted to an 

459 ndarray with contents for writing to matfile. 

460 ''' 

461 if isinstance(source, np.ndarray): 

462 return source 

463 if source is None: 

464 return None 

465 if hasattr(source, "__array__"): 

466 return np.asarray(source) 

467 # Objects that implement mappings 

468 is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and 

469 hasattr(source, 'items')) 

470 # Objects that don't implement mappings, but do have dicts 

471 if isinstance(source, np.generic): 

472 # NumPy scalars are never mappings (PyPy issue workaround) 

473 pass 

474 elif not is_mapping and hasattr(source, '__dict__'): 

475 source = {key: value for key, value in source.__dict__.items() 

476 if not key.startswith('_')} 

477 is_mapping = True 

478 if is_mapping: 

479 dtype = [] 

480 values = [] 

481 for field, value in source.items(): 

482 if (isinstance(field, str) and 

483 field[0] not in '_0123456789'): 

484 dtype.append((str(field), object)) 

485 values.append(value) 

486 if dtype: 

487 return np.array([tuple(values)], dtype) 

488 else: 

489 return EmptyStructMarker 

490 # Next try and convert to an array 

491 try: 

492 narr = np.asanyarray(source) 

493 except ValueError: 

494 narr = np.asanyarray(source, dtype=object) 

495 if narr.dtype.type in (object, np.object_) and \ 

496 narr.shape == () and narr == source: 

497 # No interesting conversion possible 

498 return None 

499 return narr 

500 

501 

502# Native byte ordered dtypes for convenience for writers 

503NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header'] 

504NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full'] 

505NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata'] 

506NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags'] 

507 

508 

509class VarWriter5: 

510 ''' Generic matlab matrix writing class ''' 

511 mat_tag = np.zeros((), NDT_TAG_FULL) 

512 mat_tag['mdtype'] = miMATRIX 

513 

514 def __init__(self, file_writer): 

515 self.file_stream = file_writer.file_stream 

516 self.unicode_strings = file_writer.unicode_strings 

517 self.long_field_names = file_writer.long_field_names 

518 self.oned_as = file_writer.oned_as 

519 # These are used for top level writes, and unset after 

520 self._var_name = None 

521 self._var_is_global = False 

522 

523 def write_bytes(self, arr): 

524 self.file_stream.write(arr.tobytes(order='F')) 

525 

526 def write_string(self, s): 

527 self.file_stream.write(s) 

528 

529 def write_element(self, arr, mdtype=None): 

530 ''' write tag and data ''' 

531 if mdtype is None: 

532 mdtype = NP_TO_MTYPES[arr.dtype.str[1:]] 

533 # Array needs to be in native byte order 

534 if arr.dtype.byteorder == swapped_code: 

535 arr = arr.byteswap().view(arr.dtype.newbyteorder()) 

536 byte_count = arr.size*arr.itemsize 

537 if byte_count <= 4: 

538 self.write_smalldata_element(arr, mdtype, byte_count) 

539 else: 

540 self.write_regular_element(arr, mdtype, byte_count) 

541 

542 def write_smalldata_element(self, arr, mdtype, byte_count): 

543 # write tag with embedded data 

544 tag = np.zeros((), NDT_TAG_SMALL) 

545 tag['byte_count_mdtype'] = (byte_count << 16) + mdtype 

546 # if arr.tobytes is < 4, the element will be zero-padded as needed. 

547 tag['data'] = arr.tobytes(order='F') 

548 self.write_bytes(tag) 

549 

550 def write_regular_element(self, arr, mdtype, byte_count): 

551 # write tag, data 

552 tag = np.zeros((), NDT_TAG_FULL) 

553 tag['mdtype'] = mdtype 

554 tag['byte_count'] = byte_count 

555 self.write_bytes(tag) 

556 self.write_bytes(arr) 

557 # pad to next 64-bit boundary 

558 bc_mod_8 = byte_count % 8 

559 if bc_mod_8: 

560 self.file_stream.write(b'\x00' * (8-bc_mod_8)) 

561 

562 def write_header(self, 

563 shape, 

564 mclass, 

565 is_complex=False, 

566 is_logical=False, 

567 nzmax=0): 

568 ''' Write header for given data options 

569 shape : sequence 

570 array shape 

571 mclass - mat5 matrix class 

572 is_complex - True if matrix is complex 

573 is_logical - True if matrix is logical 

574 nzmax - max non zero elements for sparse arrays 

575 

576 We get the name and the global flag from the object, and reset 

577 them to defaults after we've used them 

578 ''' 

579 # get name and is_global from one-shot object store 

580 name = self._var_name 

581 is_global = self._var_is_global 

582 # initialize the top-level matrix tag, store position 

583 self._mat_tag_pos = self.file_stream.tell() 

584 self.write_bytes(self.mat_tag) 

585 # write array flags (complex, global, logical, class, nzmax) 

586 af = np.zeros((), NDT_ARRAY_FLAGS) 

587 af['data_type'] = miUINT32 

588 af['byte_count'] = 8 

589 flags = is_complex << 3 | is_global << 2 | is_logical << 1 

590 af['flags_class'] = mclass | flags << 8 

591 af['nzmax'] = nzmax 

592 self.write_bytes(af) 

593 # shape 

594 self.write_element(np.array(shape, dtype='i4')) 

595 # write name 

596 name = np.asarray(name) 

597 if name == '': # empty string zero-terminated 

598 self.write_smalldata_element(name, miINT8, 0) 

599 else: 

600 self.write_element(name, miINT8) 

601 # reset the one-shot store to defaults 

602 self._var_name = '' 

603 self._var_is_global = False 

604 

605 def update_matrix_tag(self, start_pos): 

606 curr_pos = self.file_stream.tell() 

607 self.file_stream.seek(start_pos) 

608 byte_count = curr_pos - start_pos - 8 

609 if byte_count >= 2**32: 

610 raise MatWriteError("Matrix too large to save with Matlab " 

611 "5 format") 

612 self.mat_tag['byte_count'] = byte_count 

613 self.write_bytes(self.mat_tag) 

614 self.file_stream.seek(curr_pos) 

615 

616 def write_top(self, arr, name, is_global): 

617 """ Write variable at top level of mat file 

618 

619 Parameters 

620 ---------- 

621 arr : array_like 

622 array-like object to create writer for 

623 name : str, optional 

624 name as it will appear in matlab workspace 

625 default is empty string 

626 is_global : {False, True}, optional 

627 whether variable will be global on load into matlab 

628 """ 

629 # these are set before the top-level header write, and unset at 

630 # the end of the same write, because they do not apply for lower levels 

631 self._var_is_global = is_global 

632 self._var_name = name 

633 # write the header and data 

634 self.write(arr) 

635 

636 def write(self, arr): 

637 ''' Write `arr` to stream at top and sub levels 

638 

639 Parameters 

640 ---------- 

641 arr : array_like 

642 array-like object to create writer for 

643 ''' 

644 # store position, so we can update the matrix tag 

645 mat_tag_pos = self.file_stream.tell() 

646 # First check if these are sparse 

647 if scipy.sparse.issparse(arr): 

648 self.write_sparse(arr) 

649 self.update_matrix_tag(mat_tag_pos) 

650 return 

651 # Try to convert things that aren't arrays 

652 narr = to_writeable(arr) 

653 if narr is None: 

654 raise TypeError(f'Could not convert {arr} (type {type(arr)}) to array') 

655 if isinstance(narr, MatlabObject): 

656 self.write_object(narr) 

657 elif isinstance(narr, MatlabFunction): 

658 raise MatWriteError('Cannot write matlab functions') 

659 elif narr is EmptyStructMarker: # empty struct array 

660 self.write_empty_struct() 

661 elif narr.dtype.fields: # struct array 

662 self.write_struct(narr) 

663 elif narr.dtype.hasobject: # cell array 

664 self.write_cells(narr) 

665 elif narr.dtype.kind in ('U', 'S'): 

666 if self.unicode_strings: 

667 codec = 'UTF8' 

668 else: 

669 codec = 'ascii' 

670 self.write_char(narr, codec) 

671 else: 

672 self.write_numeric(narr) 

673 self.update_matrix_tag(mat_tag_pos) 

674 

675 def write_numeric(self, arr): 

676 imagf = arr.dtype.kind == 'c' 

677 logif = arr.dtype.kind == 'b' 

678 try: 

679 mclass = NP_TO_MXTYPES[arr.dtype.str[1:]] 

680 except KeyError: 

681 # No matching matlab type, probably complex256 / float128 / float96 

682 # Cast data to complex128 / float64. 

683 if imagf: 

684 arr = arr.astype('c128') 

685 elif logif: 

686 arr = arr.astype('i1') # Should only contain 0/1 

687 else: 

688 arr = arr.astype('f8') 

689 mclass = mxDOUBLE_CLASS 

690 self.write_header(matdims(arr, self.oned_as), 

691 mclass, 

692 is_complex=imagf, 

693 is_logical=logif) 

694 if imagf: 

695 self.write_element(arr.real) 

696 self.write_element(arr.imag) 

697 else: 

698 self.write_element(arr) 

699 

700 def write_char(self, arr, codec='ascii'): 

701 ''' Write string array `arr` with given `codec` 

702 ''' 

703 if arr.size == 0 or np.all(arr == ''): 

704 # This an empty string array or a string array containing 

705 # only empty strings. Matlab cannot distinguish between a 

706 # string array that is empty, and a string array containing 

707 # only empty strings, because it stores strings as arrays of 

708 # char. There is no way of having an array of char that is 

709 # not empty, but contains an empty string. We have to 

710 # special-case the array-with-empty-strings because even 

711 # empty strings have zero padding, which would otherwise 

712 # appear in matlab as a string with a space. 

713 shape = (0,) * np.max([arr.ndim, 2]) 

714 self.write_header(shape, mxCHAR_CLASS) 

715 self.write_smalldata_element(arr, miUTF8, 0) 

716 return 

717 # non-empty string. 

718 # 

719 # Convert to char array 

720 arr = arr_to_chars(arr) 

721 # We have to write the shape directly, because we are going 

722 # recode the characters, and the resulting stream of chars 

723 # may have a different length 

724 shape = arr.shape 

725 self.write_header(shape, mxCHAR_CLASS) 

726 if arr.dtype.kind == 'U' and arr.size: 

727 # Make one long string from all the characters. We need to 

728 # transpose here, because we're flattening the array, before 

729 # we write the bytes. The bytes have to be written in 

730 # Fortran order. 

731 n_chars = np.prod(shape) 

732 st_arr = np.ndarray(shape=(), 

733 dtype=arr_dtype_number(arr, n_chars), 

734 buffer=arr.T.copy()) # Fortran order 

735 # Recode with codec to give byte string 

736 st = st_arr.item().encode(codec) 

737 # Reconstruct as 1-D byte array 

738 arr = np.ndarray(shape=(len(st),), 

739 dtype='S1', 

740 buffer=st) 

741 self.write_element(arr, mdtype=miUTF8) 

742 

743 def write_sparse(self, arr): 

744 ''' Sparse matrices are 2D 

745 ''' 

746 A = arr.tocsc() # convert to sparse CSC format 

747 A.sort_indices() # MATLAB expects sorted row indices 

748 is_complex = (A.dtype.kind == 'c') 

749 is_logical = (A.dtype.kind == 'b') 

750 nz = A.nnz 

751 self.write_header(matdims(arr, self.oned_as), 

752 mxSPARSE_CLASS, 

753 is_complex=is_complex, 

754 is_logical=is_logical, 

755 # matlab won't load file with 0 nzmax 

756 nzmax=1 if nz == 0 else nz) 

757 self.write_element(A.indices.astype('i4')) 

758 self.write_element(A.indptr.astype('i4')) 

759 self.write_element(A.data.real) 

760 if is_complex: 

761 self.write_element(A.data.imag) 

762 

763 def write_cells(self, arr): 

764 self.write_header(matdims(arr, self.oned_as), 

765 mxCELL_CLASS) 

766 # loop over data, column major 

767 A = np.atleast_2d(arr).flatten('F') 

768 for el in A: 

769 self.write(el) 

770 

771 def write_empty_struct(self): 

772 self.write_header((1, 1), mxSTRUCT_CLASS) 

773 # max field name length set to 1 in an example matlab struct 

774 self.write_element(np.array(1, dtype=np.int32)) 

775 # Field names element is empty 

776 self.write_element(np.array([], dtype=np.int8)) 

777 

778 def write_struct(self, arr): 

779 self.write_header(matdims(arr, self.oned_as), 

780 mxSTRUCT_CLASS) 

781 self._write_items(arr) 

782 

783 def _write_items(self, arr): 

784 # write fieldnames 

785 fieldnames = [f[0] for f in arr.dtype.descr] 

786 length = max([len(fieldname) for fieldname in fieldnames])+1 

787 max_length = (self.long_field_names and 64) or 32 

788 if length > max_length: 

789 raise ValueError("Field names are restricted to %d characters" % 

790 (max_length-1)) 

791 self.write_element(np.array([length], dtype='i4')) 

792 self.write_element( 

793 np.array(fieldnames, dtype='S%d' % (length)), 

794 mdtype=miINT8) 

795 A = np.atleast_2d(arr).flatten('F') 

796 for el in A: 

797 for f in fieldnames: 

798 self.write(el[f]) 

799 

800 def write_object(self, arr): 

801 '''Same as writing structs, except different mx class, and extra 

802 classname element after header 

803 ''' 

804 self.write_header(matdims(arr, self.oned_as), 

805 mxOBJECT_CLASS) 

806 self.write_element(np.array(arr.classname, dtype='S'), 

807 mdtype=miINT8) 

808 self._write_items(arr) 

809 

810 

811class MatFile5Writer: 

812 ''' Class for writing mat5 files ''' 

813 

814 @docfiller 

815 def __init__(self, file_stream, 

816 do_compression=False, 

817 unicode_strings=False, 

818 global_vars=None, 

819 long_field_names=False, 

820 oned_as='row'): 

821 ''' Initialize writer for matlab 5 format files 

822 

823 Parameters 

824 ---------- 

825 %(do_compression)s 

826 %(unicode_strings)s 

827 global_vars : None or sequence of strings, optional 

828 Names of variables to be marked as global for matlab 

829 %(long_fields)s 

830 %(oned_as)s 

831 ''' 

832 self.file_stream = file_stream 

833 self.do_compression = do_compression 

834 self.unicode_strings = unicode_strings 

835 if global_vars: 

836 self.global_vars = global_vars 

837 else: 

838 self.global_vars = [] 

839 self.long_field_names = long_field_names 

840 self.oned_as = oned_as 

841 self._matrix_writer = None 

842 

843 def write_file_header(self): 

844 # write header 

845 hdr = np.zeros((), NDT_FILE_HDR) 

846 hdr['description'] = (f'MATLAB 5.0 MAT-file Platform: {os.name}, ' 

847 f'Created on: {time.asctime()}') 

848 hdr['version'] = 0x0100 

849 hdr['endian_test'] = np.ndarray(shape=(), 

850 dtype='S2', 

851 buffer=np.uint16(0x4d49)) 

852 self.file_stream.write(hdr.tobytes()) 

853 

854 def put_variables(self, mdict, write_header=None): 

855 ''' Write variables in `mdict` to stream 

856 

857 Parameters 

858 ---------- 

859 mdict : mapping 

860 mapping with method ``items`` returns name, contents pairs where 

861 ``name`` which will appear in the matlab workspace in file load, and 

862 ``contents`` is something writeable to a matlab file, such as a NumPy 

863 array. 

864 write_header : {None, True, False}, optional 

865 If True, then write the matlab file header before writing the 

866 variables. If None (the default) then write the file header 

867 if we are at position 0 in the stream. By setting False 

868 here, and setting the stream position to the end of the file, 

869 you can append variables to a matlab file 

870 ''' 

871 # write header if requested, or None and start of file 

872 if write_header is None: 

873 write_header = self.file_stream.tell() == 0 

874 if write_header: 

875 self.write_file_header() 

876 self._matrix_writer = VarWriter5(self) 

877 for name, var in mdict.items(): 

878 if name[0] == '_': 

879 continue 

880 is_global = name in self.global_vars 

881 if self.do_compression: 

882 stream = BytesIO() 

883 self._matrix_writer.file_stream = stream 

884 self._matrix_writer.write_top(var, name.encode('latin1'), is_global) 

885 out_str = zlib.compress(stream.getvalue()) 

886 tag = np.empty((), NDT_TAG_FULL) 

887 tag['mdtype'] = miCOMPRESSED 

888 tag['byte_count'] = len(out_str) 

889 self.file_stream.write(tag.tobytes()) 

890 self.file_stream.write(out_str) 

891 else: # not compressing 

892 self._matrix_writer.write_top(var, name.encode('latin1'), is_global)