Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio4.py: 20%

280 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1''' Classes for read / write of matlab (TM) 4 files 

2''' 

3import sys 

4import warnings 

5 

6import numpy as np 

7 

8import scipy.sparse 

9 

10from ._miobase import (MatFileReader, docfiller, matdims, read_dtype, 

11 convert_dtypes, arr_to_chars, arr_dtype_number) 

12 

13from ._mio_utils import squeeze_element, chars_to_strings 

14from functools import reduce 

15 

16 

17__all__ = [ 

18 'MatFile4Reader', 'MatFile4Writer', 'SYS_LITTLE_ENDIAN', 

19 'VarHeader4', 'VarReader4', 'VarWriter4', 'arr_to_2d', 'mclass_info', 

20 'mdtypes_template', 'miDOUBLE', 'miINT16', 'miINT32', 'miSINGLE', 

21 'miUINT16', 'miUINT8', 'mxCHAR_CLASS', 'mxFULL_CLASS', 'mxSPARSE_CLASS', 

22 'np_to_mtypes', 'order_codes' 

23] 

24 

25 

26SYS_LITTLE_ENDIAN = sys.byteorder == 'little' 

27 

28miDOUBLE = 0 

29miSINGLE = 1 

30miINT32 = 2 

31miINT16 = 3 

32miUINT16 = 4 

33miUINT8 = 5 

34 

35mdtypes_template = { 

36 miDOUBLE: 'f8', 

37 miSINGLE: 'f4', 

38 miINT32: 'i4', 

39 miINT16: 'i2', 

40 miUINT16: 'u2', 

41 miUINT8: 'u1', 

42 'header': [('mopt', 'i4'), 

43 ('mrows', 'i4'), 

44 ('ncols', 'i4'), 

45 ('imagf', 'i4'), 

46 ('namlen', 'i4')], 

47 'U1': 'U1', 

48 } 

49 

50np_to_mtypes = { 

51 'f8': miDOUBLE, 

52 'c32': miDOUBLE, 

53 'c24': miDOUBLE, 

54 'c16': miDOUBLE, 

55 'f4': miSINGLE, 

56 'c8': miSINGLE, 

57 'i4': miINT32, 

58 'i2': miINT16, 

59 'u2': miUINT16, 

60 'u1': miUINT8, 

61 'S1': miUINT8, 

62 } 

63 

64# matrix classes 

65mxFULL_CLASS = 0 

66mxCHAR_CLASS = 1 

67mxSPARSE_CLASS = 2 

68 

69order_codes = { 

70 0: '<', 

71 1: '>', 

72 2: 'VAX D-float', # ! 

73 3: 'VAX G-float', 

74 4: 'Cray', # !! 

75 } 

76 

77mclass_info = { 

78 mxFULL_CLASS: 'double', 

79 mxCHAR_CLASS: 'char', 

80 mxSPARSE_CLASS: 'sparse', 

81 } 

82 

83 

84class VarHeader4: 

85 # Mat4 variables never logical or global 

86 is_logical = False 

87 is_global = False 

88 

89 def __init__(self, 

90 name, 

91 dtype, 

92 mclass, 

93 dims, 

94 is_complex): 

95 self.name = name 

96 self.dtype = dtype 

97 self.mclass = mclass 

98 self.dims = dims 

99 self.is_complex = is_complex 

100 

101 

102class VarReader4: 

103 ''' Class to read matlab 4 variables ''' 

104 

105 def __init__(self, file_reader): 

106 self.file_reader = file_reader 

107 self.mat_stream = file_reader.mat_stream 

108 self.dtypes = file_reader.dtypes 

109 self.chars_as_strings = file_reader.chars_as_strings 

110 self.squeeze_me = file_reader.squeeze_me 

111 

112 def read_header(self): 

113 ''' Read and return header for variable ''' 

114 data = read_dtype(self.mat_stream, self.dtypes['header']) 

115 name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00') 

116 if data['mopt'] < 0 or data['mopt'] > 5000: 

117 raise ValueError('Mat 4 mopt wrong format, byteswapping problem?') 

118 M, rest = divmod(data['mopt'], 1000) # order code 

119 if M not in (0, 1): 

120 warnings.warn("We do not support byte ordering '%s'; returned " 

121 "data may be corrupt" % order_codes[M], 

122 UserWarning) 

123 O, rest = divmod(rest, 100) # unused, should be 0 

124 if O != 0: 

125 raise ValueError('O in MOPT integer should be 0, wrong format?') 

126 P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above) 

127 T = rest # matrix type code e.g., mxFULL_CLASS (see above) 

128 dims = (data['mrows'], data['ncols']) 

129 is_complex = data['imagf'] == 1 

130 dtype = self.dtypes[P] 

131 return VarHeader4( 

132 name, 

133 dtype, 

134 T, 

135 dims, 

136 is_complex) 

137 

138 def array_from_header(self, hdr, process=True): 

139 mclass = hdr.mclass 

140 if mclass == mxFULL_CLASS: 

141 arr = self.read_full_array(hdr) 

142 elif mclass == mxCHAR_CLASS: 

143 arr = self.read_char_array(hdr) 

144 if process and self.chars_as_strings: 

145 arr = chars_to_strings(arr) 

146 elif mclass == mxSPARSE_CLASS: 

147 # no current processing (below) makes sense for sparse 

148 return self.read_sparse_array(hdr) 

149 else: 

150 raise TypeError('No reader for class code %s' % mclass) 

151 if process and self.squeeze_me: 

152 return squeeze_element(arr) 

153 return arr 

154 

155 def read_sub_array(self, hdr, copy=True): 

156 ''' Mat4 read using header `hdr` dtype and dims 

157 

158 Parameters 

159 ---------- 

160 hdr : object 

161 object with attributes ``dtype``, ``dims``. dtype is assumed to be 

162 the correct endianness 

163 copy : bool, optional 

164 copies array before return if True (default True) 

165 (buffer is usually read only) 

166 

167 Returns 

168 ------- 

169 arr : ndarray 

170 of dtype given by `hdr` ``dtype`` and shape given by `hdr` ``dims`` 

171 ''' 

172 dt = hdr.dtype 

173 dims = hdr.dims 

174 num_bytes = dt.itemsize 

175 for d in dims: 

176 num_bytes *= d 

177 buffer = self.mat_stream.read(int(num_bytes)) 

178 if len(buffer) != num_bytes: 

179 raise ValueError("Not enough bytes to read matrix '%s'; is this " 

180 "a badly-formed file? Consider listing matrices " 

181 "with `whosmat` and loading named matrices with " 

182 "`variable_names` kwarg to `loadmat`" % hdr.name) 

183 arr = np.ndarray(shape=dims, 

184 dtype=dt, 

185 buffer=buffer, 

186 order='F') 

187 if copy: 

188 arr = arr.copy() 

189 return arr 

190 

191 def read_full_array(self, hdr): 

192 ''' Full (rather than sparse) matrix getter 

193 

194 Read matrix (array) can be real or complex 

195 

196 Parameters 

197 ---------- 

198 hdr : ``VarHeader4`` instance 

199 

200 Returns 

201 ------- 

202 arr : ndarray 

203 complex array if ``hdr.is_complex`` is True, otherwise a real 

204 numeric array 

205 ''' 

206 if hdr.is_complex: 

207 # avoid array copy to save memory 

208 res = self.read_sub_array(hdr, copy=False) 

209 res_j = self.read_sub_array(hdr, copy=False) 

210 return res + (res_j * 1j) 

211 return self.read_sub_array(hdr) 

212 

213 def read_char_array(self, hdr): 

214 ''' latin-1 text matrix (char matrix) reader 

215 

216 Parameters 

217 ---------- 

218 hdr : ``VarHeader4`` instance 

219 

220 Returns 

221 ------- 

222 arr : ndarray 

223 with dtype 'U1', shape given by `hdr` ``dims`` 

224 ''' 

225 arr = self.read_sub_array(hdr).astype(np.uint8) 

226 S = arr.tobytes().decode('latin-1') 

227 return np.ndarray(shape=hdr.dims, 

228 dtype=np.dtype('U1'), 

229 buffer=np.array(S)).copy() 

230 

231 def read_sparse_array(self, hdr): 

232 ''' Read and return sparse matrix type 

233 

234 Parameters 

235 ---------- 

236 hdr : ``VarHeader4`` instance 

237 

238 Returns 

239 ------- 

240 arr : ``scipy.sparse.coo_matrix`` 

241 with dtype ``float`` and shape read from the sparse matrix data 

242 

243 Notes 

244 ----- 

245 MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where 

246 N is the number of non-zero values. Column 1 values [0:N] are the 

247 (1-based) row indices of the each non-zero value, column 2 [0:N] are the 

248 column indices, column 3 [0:N] are the (real) values. The last values 

249 [-1,0:2] of the rows, column indices are shape[0] and shape[1] 

250 respectively of the output matrix. The last value for the values column 

251 is a padding 0. mrows and ncols values from the header give the shape of 

252 the stored matrix, here [N+1, 3]. Complex data are saved as a 4 column 

253 matrix, where the fourth column contains the imaginary component; the 

254 last value is again 0. Complex sparse data do *not* have the header 

255 ``imagf`` field set to True; the fact that the data are complex is only 

256 detectable because there are 4 storage columns. 

257 ''' 

258 res = self.read_sub_array(hdr) 

259 tmp = res[:-1,:] 

260 # All numbers are float64 in Matlab, but SciPy sparse expects int shape 

261 dims = (int(res[-1,0]), int(res[-1,1])) 

262 I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also 

263 J = np.ascontiguousarray(tmp[:,1],dtype='intc') 

264 I -= 1 # for 1-based indexing 

265 J -= 1 

266 if res.shape[1] == 3: 

267 V = np.ascontiguousarray(tmp[:,2],dtype='float') 

268 else: 

269 V = np.ascontiguousarray(tmp[:,2],dtype='complex') 

270 V.imag = tmp[:,3] 

271 return scipy.sparse.coo_matrix((V,(I,J)), dims) 

272 

273 def shape_from_header(self, hdr): 

274 '''Read the shape of the array described by the header. 

275 The file position after this call is unspecified. 

276 ''' 

277 mclass = hdr.mclass 

278 if mclass == mxFULL_CLASS: 

279 shape = tuple(map(int, hdr.dims)) 

280 elif mclass == mxCHAR_CLASS: 

281 shape = tuple(map(int, hdr.dims)) 

282 if self.chars_as_strings: 

283 shape = shape[:-1] 

284 elif mclass == mxSPARSE_CLASS: 

285 dt = hdr.dtype 

286 dims = hdr.dims 

287 

288 if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1): 

289 return () 

290 

291 # Read only the row and column counts 

292 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1) 

293 rows = np.ndarray(shape=(), dtype=dt, 

294 buffer=self.mat_stream.read(dt.itemsize)) 

295 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1) 

296 cols = np.ndarray(shape=(), dtype=dt, 

297 buffer=self.mat_stream.read(dt.itemsize)) 

298 

299 shape = (int(rows), int(cols)) 

300 else: 

301 raise TypeError('No reader for class code %s' % mclass) 

302 

303 if self.squeeze_me: 

304 shape = tuple([x for x in shape if x != 1]) 

305 return shape 

306 

307 

308class MatFile4Reader(MatFileReader): 

309 ''' Reader for Mat4 files ''' 

310 @docfiller 

311 def __init__(self, mat_stream, *args, **kwargs): 

312 ''' Initialize matlab 4 file reader 

313 

314 %(matstream_arg)s 

315 %(load_args)s 

316 ''' 

317 super().__init__(mat_stream, *args, **kwargs) 

318 self._matrix_reader = None 

319 

320 def guess_byte_order(self): 

321 self.mat_stream.seek(0) 

322 mopt = read_dtype(self.mat_stream, np.dtype('i4')) 

323 self.mat_stream.seek(0) 

324 if mopt == 0: 

325 return '<' 

326 if mopt < 0 or mopt > 5000: 

327 # Number must have been byteswapped 

328 return SYS_LITTLE_ENDIAN and '>' or '<' 

329 # Not byteswapped 

330 return SYS_LITTLE_ENDIAN and '<' or '>' 

331 

332 def initialize_read(self): 

333 ''' Run when beginning read of variables 

334 

335 Sets up readers from parameters in `self` 

336 ''' 

337 self.dtypes = convert_dtypes(mdtypes_template, self.byte_order) 

338 self._matrix_reader = VarReader4(self) 

339 

340 def read_var_header(self): 

341 ''' Read and return header, next position 

342 

343 Parameters 

344 ---------- 

345 None 

346 

347 Returns 

348 ------- 

349 header : object 

350 object that can be passed to self.read_var_array, and that 

351 has attributes ``name`` and ``is_global`` 

352 next_position : int 

353 position in stream of next variable 

354 ''' 

355 hdr = self._matrix_reader.read_header() 

356 n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product 

357 remaining_bytes = hdr.dtype.itemsize * n 

358 if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS: 

359 remaining_bytes *= 2 

360 next_position = self.mat_stream.tell() + remaining_bytes 

361 return hdr, next_position 

362 

363 def read_var_array(self, header, process=True): 

364 ''' Read array, given `header` 

365 

366 Parameters 

367 ---------- 

368 header : header object 

369 object with fields defining variable header 

370 process : {True, False}, optional 

371 If True, apply recursive post-processing during loading of array. 

372 

373 Returns 

374 ------- 

375 arr : array 

376 array with post-processing applied or not according to 

377 `process`. 

378 ''' 

379 return self._matrix_reader.array_from_header(header, process) 

380 

381 def get_variables(self, variable_names=None): 

382 ''' get variables from stream as dictionary 

383 

384 Parameters 

385 ---------- 

386 variable_names : None or str or sequence of str, optional 

387 variable name, or sequence of variable names to get from Mat file / 

388 file stream. If None, then get all variables in file. 

389 ''' 

390 if isinstance(variable_names, str): 

391 variable_names = [variable_names] 

392 elif variable_names is not None: 

393 variable_names = list(variable_names) 

394 self.mat_stream.seek(0) 

395 # set up variable reader 

396 self.initialize_read() 

397 mdict = {} 

398 while not self.end_of_stream(): 

399 hdr, next_position = self.read_var_header() 

400 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

401 if variable_names is not None and name not in variable_names: 

402 self.mat_stream.seek(next_position) 

403 continue 

404 mdict[name] = self.read_var_array(hdr) 

405 self.mat_stream.seek(next_position) 

406 if variable_names is not None: 

407 variable_names.remove(name) 

408 if len(variable_names) == 0: 

409 break 

410 return mdict 

411 

412 def list_variables(self): 

413 ''' list variables from stream ''' 

414 self.mat_stream.seek(0) 

415 # set up variable reader 

416 self.initialize_read() 

417 vars = [] 

418 while not self.end_of_stream(): 

419 hdr, next_position = self.read_var_header() 

420 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

421 shape = self._matrix_reader.shape_from_header(hdr) 

422 info = mclass_info.get(hdr.mclass, 'unknown') 

423 vars.append((name, shape, info)) 

424 

425 self.mat_stream.seek(next_position) 

426 return vars 

427 

428 

429def arr_to_2d(arr, oned_as='row'): 

430 ''' Make ``arr`` exactly two dimensional 

431 

432 If `arr` has more than 2 dimensions, raise a ValueError 

433 

434 Parameters 

435 ---------- 

436 arr : array 

437 oned_as : {'row', 'column'}, optional 

438 Whether to reshape 1-D vectors as row vectors or column vectors. 

439 See documentation for ``matdims`` for more detail 

440 

441 Returns 

442 ------- 

443 arr2d : array 

444 2-D version of the array 

445 ''' 

446 dims = matdims(arr, oned_as) 

447 if len(dims) > 2: 

448 raise ValueError('Matlab 4 files cannot save arrays with more than ' 

449 '2 dimensions') 

450 return arr.reshape(dims) 

451 

452 

453class VarWriter4: 

454 def __init__(self, file_writer): 

455 self.file_stream = file_writer.file_stream 

456 self.oned_as = file_writer.oned_as 

457 

458 def write_bytes(self, arr): 

459 self.file_stream.write(arr.tobytes(order='F')) 

460 

461 def write_string(self, s): 

462 self.file_stream.write(s) 

463 

464 def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0): 

465 ''' Write header for given data options 

466 

467 Parameters 

468 ---------- 

469 name : str 

470 name of variable 

471 shape : sequence 

472 Shape of array as it will be read in matlab 

473 P : int, optional 

474 code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32, 

475 miINT16, miUINT16, miUINT8`` 

476 T : int, optional 

477 code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS, 

478 mxSPARSE_CLASS`` 

479 imagf : int, optional 

480 flag indicating complex 

481 ''' 

482 header = np.empty((), mdtypes_template['header']) 

483 M = not SYS_LITTLE_ENDIAN 

484 O = 0 

485 header['mopt'] = (M * 1000 + 

486 O * 100 + 

487 P * 10 + 

488 T) 

489 header['mrows'] = shape[0] 

490 header['ncols'] = shape[1] 

491 header['imagf'] = imagf 

492 header['namlen'] = len(name) + 1 

493 self.write_bytes(header) 

494 data = name + '\0' 

495 self.write_string(data.encode('latin1')) 

496 

497 def write(self, arr, name): 

498 ''' Write matrix `arr`, with name `name` 

499 

500 Parameters 

501 ---------- 

502 arr : array_like 

503 array to write 

504 name : str 

505 name in matlab workspace 

506 ''' 

507 # we need to catch sparse first, because np.asarray returns an 

508 # an object array for scipy.sparse 

509 if scipy.sparse.issparse(arr): 

510 self.write_sparse(arr, name) 

511 return 

512 arr = np.asarray(arr) 

513 dt = arr.dtype 

514 if not dt.isnative: 

515 arr = arr.astype(dt.newbyteorder('=')) 

516 dtt = dt.type 

517 if dtt is np.object_: 

518 raise TypeError('Cannot save object arrays in Mat4') 

519 elif dtt is np.void: 

520 raise TypeError('Cannot save void type arrays') 

521 elif dtt in (np.str_, np.bytes_): 

522 self.write_char(arr, name) 

523 return 

524 self.write_numeric(arr, name) 

525 

526 def write_numeric(self, arr, name): 

527 arr = arr_to_2d(arr, self.oned_as) 

528 imagf = arr.dtype.kind == 'c' 

529 try: 

530 P = np_to_mtypes[arr.dtype.str[1:]] 

531 except KeyError: 

532 if imagf: 

533 arr = arr.astype('c128') 

534 else: 

535 arr = arr.astype('f8') 

536 P = miDOUBLE 

537 self.write_header(name, 

538 arr.shape, 

539 P=P, 

540 T=mxFULL_CLASS, 

541 imagf=imagf) 

542 if imagf: 

543 self.write_bytes(arr.real) 

544 self.write_bytes(arr.imag) 

545 else: 

546 self.write_bytes(arr) 

547 

548 def write_char(self, arr, name): 

549 arr = arr_to_chars(arr) 

550 arr = arr_to_2d(arr, self.oned_as) 

551 dims = arr.shape 

552 self.write_header( 

553 name, 

554 dims, 

555 P=miUINT8, 

556 T=mxCHAR_CLASS) 

557 if arr.dtype.kind == 'U': 

558 # Recode unicode to latin1 

559 n_chars = np.prod(dims) 

560 st_arr = np.ndarray(shape=(), 

561 dtype=arr_dtype_number(arr, n_chars), 

562 buffer=arr) 

563 st = st_arr.item().encode('latin-1') 

564 arr = np.ndarray(shape=dims, dtype='S1', buffer=st) 

565 self.write_bytes(arr) 

566 

567 def write_sparse(self, arr, name): 

568 ''' Sparse matrices are 2-D 

569 

570 See docstring for VarReader4.read_sparse_array 

571 ''' 

572 A = arr.tocoo() # convert to sparse COO format (ijv) 

573 imagf = A.dtype.kind == 'c' 

574 ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8') 

575 ijv[:-1,0] = A.row 

576 ijv[:-1,1] = A.col 

577 ijv[:-1,0:2] += 1 # 1 based indexing 

578 if imagf: 

579 ijv[:-1,2] = A.data.real 

580 ijv[:-1,3] = A.data.imag 

581 else: 

582 ijv[:-1,2] = A.data 

583 ijv[-1,0:2] = A.shape 

584 self.write_header( 

585 name, 

586 ijv.shape, 

587 P=miDOUBLE, 

588 T=mxSPARSE_CLASS) 

589 self.write_bytes(ijv) 

590 

591 

592class MatFile4Writer: 

593 ''' Class for writing matlab 4 format files ''' 

594 def __init__(self, file_stream, oned_as=None): 

595 self.file_stream = file_stream 

596 if oned_as is None: 

597 oned_as = 'row' 

598 self.oned_as = oned_as 

599 self._matrix_writer = None 

600 

601 def put_variables(self, mdict, write_header=None): 

602 ''' Write variables in `mdict` to stream 

603 

604 Parameters 

605 ---------- 

606 mdict : mapping 

607 mapping with method ``items`` return name, contents pairs 

608 where ``name`` which will appeak in the matlab workspace in 

609 file load, and ``contents`` is something writeable to a 

610 matlab file, such as a NumPy array. 

611 write_header : {None, True, False} 

612 If True, then write the matlab file header before writing the 

613 variables. If None (the default) then write the file header 

614 if we are at position 0 in the stream. By setting False 

615 here, and setting the stream position to the end of the file, 

616 you can append variables to a matlab file 

617 ''' 

618 # there is no header for a matlab 4 mat file, so we ignore the 

619 # ``write_header`` input argument. It's there for compatibility 

620 # with the matlab 5 version of this method 

621 self._matrix_writer = VarWriter4(self) 

622 for name, var in mdict.items(): 

623 self._matrix_writer.write(var, name)