Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_mio4.py: 2%

282 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-03 06:39 +0000

1''' Classes for read / write of matlab (TM) 4 files 

2''' 

3import sys 

4import warnings 

5import math 

6 

7import numpy as np 

8 

9import scipy.sparse 

10 

11from ._miobase import (MatFileReader, docfiller, matdims, read_dtype, 

12 convert_dtypes, arr_to_chars, arr_dtype_number) 

13 

14from ._mio_utils import squeeze_element, chars_to_strings 

15from functools import reduce 

16 

17 

18__all__ = [ 

19 'MatFile4Reader', 'MatFile4Writer', 'SYS_LITTLE_ENDIAN', 

20 'VarHeader4', 'VarReader4', 'VarWriter4', 'arr_to_2d', 'mclass_info', 

21 'mdtypes_template', 'miDOUBLE', 'miINT16', 'miINT32', 'miSINGLE', 

22 'miUINT16', 'miUINT8', 'mxCHAR_CLASS', 'mxFULL_CLASS', 'mxSPARSE_CLASS', 

23 'np_to_mtypes', 'order_codes' 

24] 

25 

26 

27SYS_LITTLE_ENDIAN = sys.byteorder == 'little' 

28 

29miDOUBLE = 0 

30miSINGLE = 1 

31miINT32 = 2 

32miINT16 = 3 

33miUINT16 = 4 

34miUINT8 = 5 

35 

36mdtypes_template = { 

37 miDOUBLE: 'f8', 

38 miSINGLE: 'f4', 

39 miINT32: 'i4', 

40 miINT16: 'i2', 

41 miUINT16: 'u2', 

42 miUINT8: 'u1', 

43 'header': [('mopt', 'i4'), 

44 ('mrows', 'i4'), 

45 ('ncols', 'i4'), 

46 ('imagf', 'i4'), 

47 ('namlen', 'i4')], 

48 'U1': 'U1', 

49 } 

50 

51np_to_mtypes = { 

52 'f8': miDOUBLE, 

53 'c32': miDOUBLE, 

54 'c24': miDOUBLE, 

55 'c16': miDOUBLE, 

56 'f4': miSINGLE, 

57 'c8': miSINGLE, 

58 'i4': miINT32, 

59 'i2': miINT16, 

60 'u2': miUINT16, 

61 'u1': miUINT8, 

62 'S1': miUINT8, 

63 } 

64 

65# matrix classes 

66mxFULL_CLASS = 0 

67mxCHAR_CLASS = 1 

68mxSPARSE_CLASS = 2 

69 

70order_codes = { 

71 0: '<', 

72 1: '>', 

73 2: 'VAX D-float', # ! 

74 3: 'VAX G-float', 

75 4: 'Cray', # !! 

76 } 

77 

78mclass_info = { 

79 mxFULL_CLASS: 'double', 

80 mxCHAR_CLASS: 'char', 

81 mxSPARSE_CLASS: 'sparse', 

82 } 

83 

84 

85class VarHeader4: 

86 # Mat4 variables never logical or global 

87 is_logical = False 

88 is_global = False 

89 

90 def __init__(self, 

91 name, 

92 dtype, 

93 mclass, 

94 dims, 

95 is_complex): 

96 self.name = name 

97 self.dtype = dtype 

98 self.mclass = mclass 

99 self.dims = dims 

100 self.is_complex = is_complex 

101 

102 

103class VarReader4: 

104 ''' Class to read matlab 4 variables ''' 

105 

106 def __init__(self, file_reader): 

107 self.file_reader = file_reader 

108 self.mat_stream = file_reader.mat_stream 

109 self.dtypes = file_reader.dtypes 

110 self.chars_as_strings = file_reader.chars_as_strings 

111 self.squeeze_me = file_reader.squeeze_me 

112 

113 def read_header(self): 

114 ''' Read and return header for variable ''' 

115 data = read_dtype(self.mat_stream, self.dtypes['header']) 

116 name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00') 

117 if data['mopt'] < 0 or data['mopt'] > 5000: 

118 raise ValueError('Mat 4 mopt wrong format, byteswapping problem?') 

119 M, rest = divmod(data['mopt'], 1000) # order code 

120 if M not in (0, 1): 

121 warnings.warn("We do not support byte ordering '%s'; returned " 

122 "data may be corrupt" % order_codes[M], 

123 UserWarning, stacklevel=3) 

124 O, rest = divmod(rest, 100) # unused, should be 0 

125 if O != 0: 

126 raise ValueError('O in MOPT integer should be 0, wrong format?') 

127 P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above) 

128 T = rest # matrix type code e.g., mxFULL_CLASS (see above) 

129 dims = (data['mrows'], data['ncols']) 

130 is_complex = data['imagf'] == 1 

131 dtype = self.dtypes[P] 

132 return VarHeader4( 

133 name, 

134 dtype, 

135 T, 

136 dims, 

137 is_complex) 

138 

139 def array_from_header(self, hdr, process=True): 

140 mclass = hdr.mclass 

141 if mclass == mxFULL_CLASS: 

142 arr = self.read_full_array(hdr) 

143 elif mclass == mxCHAR_CLASS: 

144 arr = self.read_char_array(hdr) 

145 if process and self.chars_as_strings: 

146 arr = chars_to_strings(arr) 

147 elif mclass == mxSPARSE_CLASS: 

148 # no current processing (below) makes sense for sparse 

149 return self.read_sparse_array(hdr) 

150 else: 

151 raise TypeError('No reader for class code %s' % mclass) 

152 if process and self.squeeze_me: 

153 return squeeze_element(arr) 

154 return arr 

155 

156 def read_sub_array(self, hdr, copy=True): 

157 ''' Mat4 read using header `hdr` dtype and dims 

158 

159 Parameters 

160 ---------- 

161 hdr : object 

162 object with attributes ``dtype``, ``dims``. dtype is assumed to be 

163 the correct endianness 

164 copy : bool, optional 

165 copies array before return if True (default True) 

166 (buffer is usually read only) 

167 

168 Returns 

169 ------- 

170 arr : ndarray 

171 of dtype given by `hdr` ``dtype`` and shape given by `hdr` ``dims`` 

172 ''' 

173 dt = hdr.dtype 

174 dims = hdr.dims 

175 num_bytes = dt.itemsize 

176 for d in dims: 

177 num_bytes *= d 

178 buffer = self.mat_stream.read(int(num_bytes)) 

179 if len(buffer) != num_bytes: 

180 raise ValueError("Not enough bytes to read matrix '%s'; is this " 

181 "a badly-formed file? Consider listing matrices " 

182 "with `whosmat` and loading named matrices with " 

183 "`variable_names` kwarg to `loadmat`" % hdr.name) 

184 arr = np.ndarray(shape=dims, 

185 dtype=dt, 

186 buffer=buffer, 

187 order='F') 

188 if copy: 

189 arr = arr.copy() 

190 return arr 

191 

192 def read_full_array(self, hdr): 

193 ''' Full (rather than sparse) matrix getter 

194 

195 Read matrix (array) can be real or complex 

196 

197 Parameters 

198 ---------- 

199 hdr : ``VarHeader4`` instance 

200 

201 Returns 

202 ------- 

203 arr : ndarray 

204 complex array if ``hdr.is_complex`` is True, otherwise a real 

205 numeric array 

206 ''' 

207 if hdr.is_complex: 

208 # avoid array copy to save memory 

209 res = self.read_sub_array(hdr, copy=False) 

210 res_j = self.read_sub_array(hdr, copy=False) 

211 return res + (res_j * 1j) 

212 return self.read_sub_array(hdr) 

213 

214 def read_char_array(self, hdr): 

215 ''' latin-1 text matrix (char matrix) reader 

216 

217 Parameters 

218 ---------- 

219 hdr : ``VarHeader4`` instance 

220 

221 Returns 

222 ------- 

223 arr : ndarray 

224 with dtype 'U1', shape given by `hdr` ``dims`` 

225 ''' 

226 arr = self.read_sub_array(hdr).astype(np.uint8) 

227 S = arr.tobytes().decode('latin-1') 

228 return np.ndarray(shape=hdr.dims, 

229 dtype=np.dtype('U1'), 

230 buffer=np.array(S)).copy() 

231 

232 def read_sparse_array(self, hdr): 

233 ''' Read and return sparse matrix type 

234 

235 Parameters 

236 ---------- 

237 hdr : ``VarHeader4`` instance 

238 

239 Returns 

240 ------- 

241 arr : ``scipy.sparse.coo_matrix`` 

242 with dtype ``float`` and shape read from the sparse matrix data 

243 

244 Notes 

245 ----- 

246 MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where 

247 N is the number of non-zero values. Column 1 values [0:N] are the 

248 (1-based) row indices of the each non-zero value, column 2 [0:N] are the 

249 column indices, column 3 [0:N] are the (real) values. The last values 

250 [-1,0:2] of the rows, column indices are shape[0] and shape[1] 

251 respectively of the output matrix. The last value for the values column 

252 is a padding 0. mrows and ncols values from the header give the shape of 

253 the stored matrix, here [N+1, 3]. Complex data are saved as a 4 column 

254 matrix, where the fourth column contains the imaginary component; the 

255 last value is again 0. Complex sparse data do *not* have the header 

256 ``imagf`` field set to True; the fact that the data are complex is only 

257 detectable because there are 4 storage columns. 

258 ''' 

259 res = self.read_sub_array(hdr) 

260 tmp = res[:-1,:] 

261 # All numbers are float64 in Matlab, but SciPy sparse expects int shape 

262 dims = (int(res[-1,0]), int(res[-1,1])) 

263 I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also 

264 J = np.ascontiguousarray(tmp[:,1],dtype='intc') 

265 I -= 1 # for 1-based indexing 

266 J -= 1 

267 if res.shape[1] == 3: 

268 V = np.ascontiguousarray(tmp[:,2],dtype='float') 

269 else: 

270 V = np.ascontiguousarray(tmp[:,2],dtype='complex') 

271 V.imag = tmp[:,3] 

272 return scipy.sparse.coo_matrix((V,(I,J)), dims) 

273 

274 def shape_from_header(self, hdr): 

275 '''Read the shape of the array described by the header. 

276 The file position after this call is unspecified. 

277 ''' 

278 mclass = hdr.mclass 

279 if mclass == mxFULL_CLASS: 

280 shape = tuple(map(int, hdr.dims)) 

281 elif mclass == mxCHAR_CLASS: 

282 shape = tuple(map(int, hdr.dims)) 

283 if self.chars_as_strings: 

284 shape = shape[:-1] 

285 elif mclass == mxSPARSE_CLASS: 

286 dt = hdr.dtype 

287 dims = hdr.dims 

288 

289 if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1): 

290 return () 

291 

292 # Read only the row and column counts 

293 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1) 

294 rows = np.ndarray(shape=(), dtype=dt, 

295 buffer=self.mat_stream.read(dt.itemsize)) 

296 self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1) 

297 cols = np.ndarray(shape=(), dtype=dt, 

298 buffer=self.mat_stream.read(dt.itemsize)) 

299 

300 shape = (int(rows), int(cols)) 

301 else: 

302 raise TypeError('No reader for class code %s' % mclass) 

303 

304 if self.squeeze_me: 

305 shape = tuple([x for x in shape if x != 1]) 

306 return shape 

307 

308 

309class MatFile4Reader(MatFileReader): 

310 ''' Reader for Mat4 files ''' 

311 @docfiller 

312 def __init__(self, mat_stream, *args, **kwargs): 

313 ''' Initialize matlab 4 file reader 

314 

315 %(matstream_arg)s 

316 %(load_args)s 

317 ''' 

318 super().__init__(mat_stream, *args, **kwargs) 

319 self._matrix_reader = None 

320 

321 def guess_byte_order(self): 

322 self.mat_stream.seek(0) 

323 mopt = read_dtype(self.mat_stream, np.dtype('i4')) 

324 self.mat_stream.seek(0) 

325 if mopt == 0: 

326 return '<' 

327 if mopt < 0 or mopt > 5000: 

328 # Number must have been byteswapped 

329 return SYS_LITTLE_ENDIAN and '>' or '<' 

330 # Not byteswapped 

331 return SYS_LITTLE_ENDIAN and '<' or '>' 

332 

333 def initialize_read(self): 

334 ''' Run when beginning read of variables 

335 

336 Sets up readers from parameters in `self` 

337 ''' 

338 self.dtypes = convert_dtypes(mdtypes_template, self.byte_order) 

339 self._matrix_reader = VarReader4(self) 

340 

341 def read_var_header(self): 

342 ''' Read and return header, next position 

343 

344 Parameters 

345 ---------- 

346 None 

347 

348 Returns 

349 ------- 

350 header : object 

351 object that can be passed to self.read_var_array, and that 

352 has attributes ``name`` and ``is_global`` 

353 next_position : int 

354 position in stream of next variable 

355 ''' 

356 hdr = self._matrix_reader.read_header() 

357 n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product 

358 remaining_bytes = hdr.dtype.itemsize * n 

359 if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS: 

360 remaining_bytes *= 2 

361 next_position = self.mat_stream.tell() + remaining_bytes 

362 return hdr, next_position 

363 

364 def read_var_array(self, header, process=True): 

365 ''' Read array, given `header` 

366 

367 Parameters 

368 ---------- 

369 header : header object 

370 object with fields defining variable header 

371 process : {True, False}, optional 

372 If True, apply recursive post-processing during loading of array. 

373 

374 Returns 

375 ------- 

376 arr : array 

377 array with post-processing applied or not according to 

378 `process`. 

379 ''' 

380 return self._matrix_reader.array_from_header(header, process) 

381 

382 def get_variables(self, variable_names=None): 

383 ''' get variables from stream as dictionary 

384 

385 Parameters 

386 ---------- 

387 variable_names : None or str or sequence of str, optional 

388 variable name, or sequence of variable names to get from Mat file / 

389 file stream. If None, then get all variables in file. 

390 ''' 

391 if isinstance(variable_names, str): 

392 variable_names = [variable_names] 

393 elif variable_names is not None: 

394 variable_names = list(variable_names) 

395 self.mat_stream.seek(0) 

396 # set up variable reader 

397 self.initialize_read() 

398 mdict = {} 

399 while not self.end_of_stream(): 

400 hdr, next_position = self.read_var_header() 

401 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

402 if variable_names is not None and name not in variable_names: 

403 self.mat_stream.seek(next_position) 

404 continue 

405 mdict[name] = self.read_var_array(hdr) 

406 self.mat_stream.seek(next_position) 

407 if variable_names is not None: 

408 variable_names.remove(name) 

409 if len(variable_names) == 0: 

410 break 

411 return mdict 

412 

413 def list_variables(self): 

414 ''' list variables from stream ''' 

415 self.mat_stream.seek(0) 

416 # set up variable reader 

417 self.initialize_read() 

418 vars = [] 

419 while not self.end_of_stream(): 

420 hdr, next_position = self.read_var_header() 

421 name = 'None' if hdr.name is None else hdr.name.decode('latin1') 

422 shape = self._matrix_reader.shape_from_header(hdr) 

423 info = mclass_info.get(hdr.mclass, 'unknown') 

424 vars.append((name, shape, info)) 

425 

426 self.mat_stream.seek(next_position) 

427 return vars 

428 

429 

430def arr_to_2d(arr, oned_as='row'): 

431 ''' Make ``arr`` exactly two dimensional 

432 

433 If `arr` has more than 2 dimensions, raise a ValueError 

434 

435 Parameters 

436 ---------- 

437 arr : array 

438 oned_as : {'row', 'column'}, optional 

439 Whether to reshape 1-D vectors as row vectors or column vectors. 

440 See documentation for ``matdims`` for more detail 

441 

442 Returns 

443 ------- 

444 arr2d : array 

445 2-D version of the array 

446 ''' 

447 dims = matdims(arr, oned_as) 

448 if len(dims) > 2: 

449 raise ValueError('Matlab 4 files cannot save arrays with more than ' 

450 '2 dimensions') 

451 return arr.reshape(dims) 

452 

453 

454class VarWriter4: 

455 def __init__(self, file_writer): 

456 self.file_stream = file_writer.file_stream 

457 self.oned_as = file_writer.oned_as 

458 

459 def write_bytes(self, arr): 

460 self.file_stream.write(arr.tobytes(order='F')) 

461 

462 def write_string(self, s): 

463 self.file_stream.write(s) 

464 

465 def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0): 

466 ''' Write header for given data options 

467 

468 Parameters 

469 ---------- 

470 name : str 

471 name of variable 

472 shape : sequence 

473 Shape of array as it will be read in matlab 

474 P : int, optional 

475 code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32, 

476 miINT16, miUINT16, miUINT8`` 

477 T : int, optional 

478 code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS, 

479 mxSPARSE_CLASS`` 

480 imagf : int, optional 

481 flag indicating complex 

482 ''' 

483 header = np.empty((), mdtypes_template['header']) 

484 M = not SYS_LITTLE_ENDIAN 

485 O = 0 

486 header['mopt'] = (M * 1000 + 

487 O * 100 + 

488 P * 10 + 

489 T) 

490 header['mrows'] = shape[0] 

491 header['ncols'] = shape[1] 

492 header['imagf'] = imagf 

493 header['namlen'] = len(name) + 1 

494 self.write_bytes(header) 

495 data = name + '\0' 

496 self.write_string(data.encode('latin1')) 

497 

498 def write(self, arr, name): 

499 ''' Write matrix `arr`, with name `name` 

500 

501 Parameters 

502 ---------- 

503 arr : array_like 

504 array to write 

505 name : str 

506 name in matlab workspace 

507 ''' 

508 # we need to catch sparse first, because np.asarray returns an 

509 # an object array for scipy.sparse 

510 if scipy.sparse.issparse(arr): 

511 self.write_sparse(arr, name) 

512 return 

513 arr = np.asarray(arr) 

514 dt = arr.dtype 

515 if not dt.isnative: 

516 arr = arr.astype(dt.newbyteorder('=')) 

517 dtt = dt.type 

518 if dtt is np.object_: 

519 raise TypeError('Cannot save object arrays in Mat4') 

520 elif dtt is np.void: 

521 raise TypeError('Cannot save void type arrays') 

522 elif dtt in (np.str_, np.bytes_): 

523 self.write_char(arr, name) 

524 return 

525 self.write_numeric(arr, name) 

526 

527 def write_numeric(self, arr, name): 

528 arr = arr_to_2d(arr, self.oned_as) 

529 imagf = arr.dtype.kind == 'c' 

530 try: 

531 P = np_to_mtypes[arr.dtype.str[1:]] 

532 except KeyError: 

533 if imagf: 

534 arr = arr.astype('c128') 

535 else: 

536 arr = arr.astype('f8') 

537 P = miDOUBLE 

538 self.write_header(name, 

539 arr.shape, 

540 P=P, 

541 T=mxFULL_CLASS, 

542 imagf=imagf) 

543 if imagf: 

544 self.write_bytes(arr.real) 

545 self.write_bytes(arr.imag) 

546 else: 

547 self.write_bytes(arr) 

548 

549 def write_char(self, arr, name): 

550 if arr.dtype.type == np.str_ and arr.dtype.itemsize != np.dtype('U1').itemsize: 

551 arr = arr_to_chars(arr) 

552 arr = arr_to_2d(arr, self.oned_as) 

553 dims = arr.shape 

554 self.write_header( 

555 name, 

556 dims, 

557 P=miUINT8, 

558 T=mxCHAR_CLASS) 

559 if arr.dtype.kind == 'U': 

560 # Recode unicode to latin1 

561 n_chars = math.prod(dims) 

562 st_arr = np.ndarray(shape=(), 

563 dtype=arr_dtype_number(arr, n_chars), 

564 buffer=arr) 

565 st = st_arr.item().encode('latin-1') 

566 arr = np.ndarray(shape=dims, dtype='S1', buffer=st) 

567 self.write_bytes(arr) 

568 

569 def write_sparse(self, arr, name): 

570 ''' Sparse matrices are 2-D 

571 

572 See docstring for VarReader4.read_sparse_array 

573 ''' 

574 A = arr.tocoo() # convert to sparse COO format (ijv) 

575 imagf = A.dtype.kind == 'c' 

576 ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8') 

577 ijv[:-1,0] = A.row 

578 ijv[:-1,1] = A.col 

579 ijv[:-1,0:2] += 1 # 1 based indexing 

580 if imagf: 

581 ijv[:-1,2] = A.data.real 

582 ijv[:-1,3] = A.data.imag 

583 else: 

584 ijv[:-1,2] = A.data 

585 ijv[-1,0:2] = A.shape 

586 self.write_header( 

587 name, 

588 ijv.shape, 

589 P=miDOUBLE, 

590 T=mxSPARSE_CLASS) 

591 self.write_bytes(ijv) 

592 

593 

594class MatFile4Writer: 

595 ''' Class for writing matlab 4 format files ''' 

596 def __init__(self, file_stream, oned_as=None): 

597 self.file_stream = file_stream 

598 if oned_as is None: 

599 oned_as = 'row' 

600 self.oned_as = oned_as 

601 self._matrix_writer = None 

602 

603 def put_variables(self, mdict, write_header=None): 

604 ''' Write variables in `mdict` to stream 

605 

606 Parameters 

607 ---------- 

608 mdict : mapping 

609 mapping with method ``items`` return name, contents pairs 

610 where ``name`` which will appeak in the matlab workspace in 

611 file load, and ``contents`` is something writeable to a 

612 matlab file, such as a NumPy array. 

613 write_header : {None, True, False} 

614 If True, then write the matlab file header before writing the 

615 variables. If None (the default) then write the file header 

616 if we are at position 0 in the stream. By setting False 

617 here, and setting the stream position to the end of the file, 

618 you can append variables to a matlab file 

619 ''' 

620 # there is no header for a matlab 4 mat file, so we ignore the 

621 # ``write_header`` input argument. It's there for compatibility 

622 # with the matlab 5 version of this method 

623 self._matrix_writer = VarWriter4(self) 

624 for name, var in mdict.items(): 

625 self._matrix_writer.write(var, name)