Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/matlab/_miobase.py: 26%

108 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-03 06:39 +0000

1# Authors: Travis Oliphant, Matthew Brett 

2 

3""" 

4Base classes for MATLAB file stream reading. 

5 

6MATLAB is a registered trademark of the Mathworks inc. 

7""" 

8 

9import numpy as np 

10from scipy._lib import doccer 

11 

12from . import _byteordercodes as boc 

13 

14__all__ = [ 

15 'MatFileReader', 'MatReadError', 'MatReadWarning', 

16 'MatVarReader', 'MatWriteError', 'arr_dtype_number', 

17 'arr_to_chars', 'convert_dtypes', 'doc_dict', 

18 'docfiller', 'get_matfile_version', 

19 'matdims', 'read_dtype' 

20] 

21 

22class MatReadError(Exception): 

23 """Exception indicating a read issue.""" 

24 

25 

26class MatWriteError(Exception): 

27 """Exception indicating a write issue.""" 

28 

29 

30class MatReadWarning(UserWarning): 

31 """Warning class for read issues.""" 

32 

33 

34doc_dict = \ 

35 {'file_arg': 

36 '''file_name : str 

37 Name of the mat file (do not need .mat extension if 

38 appendmat==True) Can also pass open file-like object.''', 

39 'append_arg': 

40 '''appendmat : bool, optional 

41 True to append the .mat extension to the end of the given 

42 filename, if not already present. Default is True.''', 

43 'load_args': 

44 '''byte_order : str or None, optional 

45 None by default, implying byte order guessed from mat 

46 file. Otherwise can be one of ('native', '=', 'little', '<', 

47 'BIG', '>'). 

48mat_dtype : bool, optional 

49 If True, return arrays in same dtype as would be loaded into 

50 MATLAB (instead of the dtype with which they are saved). 

51squeeze_me : bool, optional 

52 Whether to squeeze unit matrix dimensions or not. 

53chars_as_strings : bool, optional 

54 Whether to convert char arrays to string arrays. 

55matlab_compatible : bool, optional 

56 Returns matrices as would be loaded by MATLAB (implies 

57 squeeze_me=False, chars_as_strings=False, mat_dtype=True, 

58 struct_as_record=True).''', 

59 'struct_arg': 

60 '''struct_as_record : bool, optional 

61 Whether to load MATLAB structs as NumPy record arrays, or as 

62 old-style NumPy arrays with dtype=object. Setting this flag to 

63 False replicates the behavior of SciPy version 0.7.x (returning 

64 numpy object arrays). The default setting is True, because it 

65 allows easier round-trip load and save of MATLAB files.''', 

66 'matstream_arg': 

67 '''mat_stream : file-like 

68 Object with file API, open for reading.''', 

69 'long_fields': 

70 '''long_field_names : bool, optional 

71 * False - maximum field name length in a structure is 31 characters 

72 which is the documented maximum length. This is the default. 

73 * True - maximum field name length in a structure is 63 characters 

74 which works for MATLAB 7.6''', 

75 'do_compression': 

76 '''do_compression : bool, optional 

77 Whether to compress matrices on write. Default is False.''', 

78 'oned_as': 

79 '''oned_as : {'row', 'column'}, optional 

80 If 'column', write 1-D NumPy arrays as column vectors. 

81 If 'row', write 1D NumPy arrays as row vectors.''', 

82 'unicode_strings': 

83 '''unicode_strings : bool, optional 

84 If True, write strings as Unicode, else MATLAB usual encoding.'''} 

85 

86docfiller = doccer.filldoc(doc_dict) 

87 

88''' 

89 

90 Note on architecture 

91====================== 

92 

93There are three sets of parameters relevant for reading files. The 

94first are *file read parameters* - containing options that are common 

95for reading the whole file, and therefore every variable within that 

96file. At the moment these are: 

97 

98* mat_stream 

99* dtypes (derived from byte code) 

100* byte_order 

101* chars_as_strings 

102* squeeze_me 

103* struct_as_record (MATLAB 5 files) 

104* class_dtypes (derived from order code, MATLAB 5 files) 

105* codecs (MATLAB 5 files) 

106* uint16_codec (MATLAB 5 files) 

107 

108Another set of parameters are those that apply only to the current 

109variable being read - the *header*: 

110 

111* header related variables (different for v4 and v5 mat files) 

112* is_complex 

113* mclass 

114* var_stream 

115 

116With the header, we need ``next_position`` to tell us where the next 

117variable in the stream is. 

118 

119Then, for each element in a matrix, there can be *element read 

120parameters*. An element is, for example, one element in a MATLAB cell 

121array. At the moment, these are: 

122 

123* mat_dtype 

124 

125The file-reading object contains the *file read parameters*. The 

126*header* is passed around as a data object, or may be read and discarded 

127in a single function. The *element read parameters* - the mat_dtype in 

128this instance, is passed into a general post-processing function - see 

129``mio_utils`` for details. 

130''' 

131 

132 

133def convert_dtypes(dtype_template, order_code): 

134 ''' Convert dtypes in mapping to given order 

135 

136 Parameters 

137 ---------- 

138 dtype_template : mapping 

139 mapping with values returning numpy dtype from ``np.dtype(val)`` 

140 order_code : str 

141 an order code suitable for using in ``dtype.newbyteorder()`` 

142 

143 Returns 

144 ------- 

145 dtypes : mapping 

146 mapping where values have been replaced by 

147 ``np.dtype(val).newbyteorder(order_code)`` 

148 

149 ''' 

150 dtypes = dtype_template.copy() 

151 for k in dtypes: 

152 dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code) 

153 return dtypes 

154 

155 

156def read_dtype(mat_stream, a_dtype): 

157 """ 

158 Generic get of byte stream data of known type 

159 

160 Parameters 

161 ---------- 

162 mat_stream : file_like object 

163 MATLAB (tm) mat file stream 

164 a_dtype : dtype 

165 dtype of array to read. `a_dtype` is assumed to be correct 

166 endianness. 

167 

168 Returns 

169 ------- 

170 arr : ndarray 

171 Array of dtype `a_dtype` read from stream. 

172 

173 """ 

174 num_bytes = a_dtype.itemsize 

175 arr = np.ndarray(shape=(), 

176 dtype=a_dtype, 

177 buffer=mat_stream.read(num_bytes), 

178 order='F') 

179 return arr 

180 

181 

182def matfile_version(file_name, *, appendmat=True): 

183 """ 

184 Return major, minor tuple depending on apparent mat file type 

185 

186 Where: 

187 

188 #. 0,x -> version 4 format mat files 

189 #. 1,x -> version 5 format mat files 

190 #. 2,x -> version 7.3 format mat files (HDF format) 

191 

192 Parameters 

193 ---------- 

194 file_name : str 

195 Name of the mat file (do not need .mat extension if 

196 appendmat==True). Can also pass open file-like object. 

197 appendmat : bool, optional 

198 True to append the .mat extension to the end of the given 

199 filename, if not already present. Default is True. 

200 

201 Returns 

202 ------- 

203 major_version : {0, 1, 2} 

204 major MATLAB File format version 

205 minor_version : int 

206 minor MATLAB file format version 

207 

208 Raises 

209 ------ 

210 MatReadError 

211 If the file is empty. 

212 ValueError 

213 The matfile version is unknown. 

214 

215 Notes 

216 ----- 

217 Has the side effect of setting the file read pointer to 0 

218 """ 

219 from ._mio import _open_file_context 

220 with _open_file_context(file_name, appendmat=appendmat) as fileobj: 

221 return _get_matfile_version(fileobj) 

222 

223 

224get_matfile_version = matfile_version 

225 

226 

227def _get_matfile_version(fileobj): 

228 # Mat4 files have a zero somewhere in first 4 bytes 

229 fileobj.seek(0) 

230 mopt_bytes = fileobj.read(4) 

231 if len(mopt_bytes) == 0: 

232 raise MatReadError("Mat file appears to be empty") 

233 mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes) 

234 if 0 in mopt_ints: 

235 fileobj.seek(0) 

236 return (0,0) 

237 # For 5 format or 7.3 format we need to read an integer in the 

238 # header. Bytes 124 through 128 contain a version integer and an 

239 # endian test string 

240 fileobj.seek(124) 

241 tst_str = fileobj.read(4) 

242 fileobj.seek(0) 

243 maj_ind = int(tst_str[2] == b'I'[0]) 

244 maj_val = int(tst_str[maj_ind]) 

245 min_val = int(tst_str[1 - maj_ind]) 

246 ret = (maj_val, min_val) 

247 if maj_val in (1, 2): 

248 return ret 

249 raise ValueError('Unknown mat file type, version {}, {}'.format(*ret)) 

250 

251 

252def matdims(arr, oned_as='column'): 

253 """ 

254 Determine equivalent MATLAB dimensions for given array 

255 

256 Parameters 

257 ---------- 

258 arr : ndarray 

259 Input array 

260 oned_as : {'column', 'row'}, optional 

261 Whether 1-D arrays are returned as MATLAB row or column matrices. 

262 Default is 'column'. 

263 

264 Returns 

265 ------- 

266 dims : tuple 

267 Shape tuple, in the form MATLAB expects it. 

268 

269 Notes 

270 ----- 

271 We had to decide what shape a 1 dimensional array would be by 

272 default. ``np.atleast_2d`` thinks it is a row vector. The 

273 default for a vector in MATLAB (e.g., ``>> 1:12``) is a row vector. 

274 

275 Versions of scipy up to and including 0.11 resulted (accidentally) 

276 in 1-D arrays being read as column vectors. For the moment, we 

277 maintain the same tradition here. 

278 

279 Examples 

280 -------- 

281 >>> import numpy as np 

282 >>> from scipy.io.matlab._miobase import matdims 

283 >>> matdims(np.array(1)) # NumPy scalar 

284 (1, 1) 

285 >>> matdims(np.array([1])) # 1-D array, 1 element 

286 (1, 1) 

287 >>> matdims(np.array([1,2])) # 1-D array, 2 elements 

288 (2, 1) 

289 >>> matdims(np.array([[2],[3]])) # 2-D array, column vector 

290 (2, 1) 

291 >>> matdims(np.array([[2,3]])) # 2-D array, row vector 

292 (1, 2) 

293 >>> matdims(np.array([[[2,3]]])) # 3-D array, rowish vector 

294 (1, 1, 2) 

295 >>> matdims(np.array([])) # empty 1-D array 

296 (0, 0) 

297 >>> matdims(np.array([[]])) # empty 2-D array 

298 (0, 0) 

299 >>> matdims(np.array([[[]]])) # empty 3-D array 

300 (0, 0, 0) 

301 

302 Optional argument flips 1-D shape behavior. 

303 

304 >>> matdims(np.array([1,2]), 'row') # 1-D array, 2 elements 

305 (1, 2) 

306 

307 The argument has to make sense though 

308 

309 >>> matdims(np.array([1,2]), 'bizarre') 

310 Traceback (most recent call last): 

311 ... 

312 ValueError: 1-D option "bizarre" is strange 

313 

314 """ 

315 shape = arr.shape 

316 if shape == (): # scalar 

317 return (1, 1) 

318 if len(shape) == 1: # 1D 

319 if shape[0] == 0: 

320 return (0, 0) 

321 elif oned_as == 'column': 

322 return shape + (1,) 

323 elif oned_as == 'row': 

324 return (1,) + shape 

325 else: 

326 raise ValueError('1-D option "%s" is strange' 

327 % oned_as) 

328 return shape 

329 

330 

331class MatVarReader: 

332 ''' Abstract class defining required interface for var readers''' 

333 def __init__(self, file_reader): 

334 pass 

335 

336 def read_header(self): 

337 ''' Returns header ''' 

338 pass 

339 

340 def array_from_header(self, header): 

341 ''' Reads array given header ''' 

342 pass 

343 

344 

345class MatFileReader: 

346 """ Base object for reading mat files 

347 

348 To make this class functional, you will need to override the 

349 following methods: 

350 

351 matrix_getter_factory - gives object to fetch next matrix from stream 

352 guess_byte_order - guesses file byte order from file 

353 """ 

354 

355 @docfiller 

356 def __init__(self, mat_stream, 

357 byte_order=None, 

358 mat_dtype=False, 

359 squeeze_me=False, 

360 chars_as_strings=True, 

361 matlab_compatible=False, 

362 struct_as_record=True, 

363 verify_compressed_data_integrity=True, 

364 simplify_cells=False): 

365 ''' 

366 Initializer for mat file reader 

367 

368 mat_stream : file-like 

369 object with file API, open for reading 

370 %(load_args)s 

371 ''' 

372 # Initialize stream 

373 self.mat_stream = mat_stream 

374 self.dtypes = {} 

375 if not byte_order: 

376 byte_order = self.guess_byte_order() 

377 else: 

378 byte_order = boc.to_numpy_code(byte_order) 

379 self.byte_order = byte_order 

380 self.struct_as_record = struct_as_record 

381 if matlab_compatible: 

382 self.set_matlab_compatible() 

383 else: 

384 self.squeeze_me = squeeze_me 

385 self.chars_as_strings = chars_as_strings 

386 self.mat_dtype = mat_dtype 

387 self.verify_compressed_data_integrity = verify_compressed_data_integrity 

388 self.simplify_cells = simplify_cells 

389 if simplify_cells: 

390 self.squeeze_me = True 

391 self.struct_as_record = False 

392 

393 def set_matlab_compatible(self): 

394 ''' Sets options to return arrays as MATLAB loads them ''' 

395 self.mat_dtype = True 

396 self.squeeze_me = False 

397 self.chars_as_strings = False 

398 

399 def guess_byte_order(self): 

400 ''' As we do not know what file type we have, assume native ''' 

401 return boc.native_code 

402 

403 def end_of_stream(self): 

404 b = self.mat_stream.read(1) 

405 curpos = self.mat_stream.tell() 

406 self.mat_stream.seek(curpos-1) 

407 return len(b) == 0 

408 

409 

410def arr_dtype_number(arr, num): 

411 ''' Return dtype for given number of items per element''' 

412 return np.dtype(arr.dtype.str[:2] + str(num)) 

413 

414 

415def arr_to_chars(arr): 

416 ''' Convert string array to char array ''' 

417 dims = list(arr.shape) 

418 if not dims: 

419 dims = [1] 

420 dims.append(int(arr.dtype.str[2:])) 

421 arr = np.ndarray(shape=dims, 

422 dtype=arr_dtype_number(arr, 1), 

423 buffer=arr) 

424 empties = [arr == np.array('', dtype=arr.dtype)] 

425 if not np.any(empties): 

426 return arr 

427 arr = arr.copy() 

428 arr[tuple(empties)] = ' ' 

429 return arr