Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_fast_matrix_market/__init__.py: 17%

187 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1# Copyright (C) 2022-2023 Adam Lugowski. All rights reserved. 

2# Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file. 

3# SPDX-License-Identifier: BSD-2-Clause 

4""" 

5Matrix Market I/O with a C++ backend. 

6See http://math.nist.gov/MatrixMarket/formats.html 

7for information about the Matrix Market format. 

8 

9.. versionadded:: 1.12.0 

10""" 

11import io 

12import os 

13 

14import numpy as np 

15import scipy.sparse 

16 

17__all__ = ['mminfo', 'mmread', 'mmwrite'] 

18 

19PARALLELISM = 0 

20""" 

21Number of threads that `mmread()` and `mmwrite()` use. 

220 means number of CPUs in the system. 

23Use `threadpoolctl` to set this value. 

24""" 

25 

26ALWAYS_FIND_SYMMETRY = False 

27""" 

28Whether mmwrite() with symmetry='AUTO' will always search for symmetry inside the matrix. 

29This is scipy.io._mmio.mmwrite()'s default behavior, but has a significant performance cost on large matrices. 

30""" 

31 

32_field_to_dtype = { 

33 "integer": "int64", 

34 "unsigned-integer": "uint64", 

35 "real": "float64", 

36 "complex": "complex", 

37 "pattern": "float64", 

38} 

39 

40 

41def _fmm_version(): 

42 from . import _fmm_core 

43 return _fmm_core.__version__ 

44 

45 

46# Register with threadpoolctl, if available 

47try: 

48 import threadpoolctl 

49 

50 class _FMMThreadPoolCtlController(threadpoolctl.LibController): 

51 user_api = "scipy" 

52 internal_api = "scipy_mmio" 

53 

54 filename_prefixes = ("_fmm_core",) 

55 

56 def get_num_threads(self): 

57 global PARALLELISM 

58 return PARALLELISM 

59 

60 def set_num_threads(self, num_threads): 

61 global PARALLELISM 

62 PARALLELISM = num_threads 

63 

64 def get_version(self): 

65 return _fmm_version 

66 

67 def set_additional_attributes(self): 

68 pass 

69 

70 threadpoolctl.register(_FMMThreadPoolCtlController) 

71except (ImportError, AttributeError): 

72 # threadpoolctl not installed or version too old 

73 pass 

74 

75 

76class _TextToBytesWrapper(io.BufferedReader): 

77 """ 

78 Convert a TextIOBase string stream to a byte stream. 

79 """ 

80 

81 def __init__(self, text_io_buffer, encoding=None, errors=None, **kwargs): 

82 super(_TextToBytesWrapper, self).__init__(text_io_buffer, **kwargs) 

83 self.encoding = encoding or text_io_buffer.encoding or 'utf-8' 

84 self.errors = errors or text_io_buffer.errors or 'strict' 

85 

86 def __del__(self): 

87 # do not close the wrapped stream 

88 self.detach() 

89 

90 def _encoding_call(self, method_name, *args, **kwargs): 

91 raw_method = getattr(self.raw, method_name) 

92 val = raw_method(*args, **kwargs) 

93 return val.encode(self.encoding, errors=self.errors) 

94 

95 def read(self, size=-1): 

96 return self._encoding_call('read', size) 

97 

98 def read1(self, size=-1): 

99 return self._encoding_call('read1', size) 

100 

101 def peek(self, size=-1): 

102 return self._encoding_call('peek', size) 

103 

104 def seek(self, offset, whence=0): 

105 # Random seeks are not allowed because of non-trivial conversion between byte and character offsets, 

106 # with the possibility of a byte offset landing within a character. 

107 if offset == 0 and whence == 0 or \ 

108 offset == 0 and whence == 2: 

109 # seek to start or end is ok 

110 super(_TextToBytesWrapper, self).seek(offset, whence) 

111 else: 

112 # Drop any other seek 

113 # In this application this may happen when pystreambuf seeks during sync(), which can happen when closing 

114 # a partially-read stream. Ex. when mminfo() only reads the header then exits. 

115 pass 

116 

117 

118def _read_body_array(cursor): 

119 """ 

120 Read MatrixMarket array body 

121 """ 

122 from . import _fmm_core 

123 

124 vals = np.zeros(cursor.header.shape, dtype=_field_to_dtype.get(cursor.header.field)) 

125 _fmm_core.read_body_array(cursor, vals) 

126 return vals 

127 

128 

129def _read_body_coo(cursor, generalize_symmetry=True): 

130 """ 

131 Read MatrixMarket coordinate body 

132 """ 

133 from . import _fmm_core 

134 

135 index_dtype = "int32" 

136 if cursor.header.nrows >= 2**31 or cursor.header.ncols >= 2**31: 

137 # Dimensions are too large to fit in int32 

138 index_dtype = "int64" 

139 

140 i = np.zeros(cursor.header.nnz, dtype=index_dtype) 

141 j = np.zeros(cursor.header.nnz, dtype=index_dtype) 

142 data = np.zeros(cursor.header.nnz, dtype=_field_to_dtype.get(cursor.header.field)) 

143 

144 _fmm_core.read_body_coo(cursor, i, j, data) 

145 

146 if generalize_symmetry and cursor.header.symmetry != "general": 

147 off_diagonal_mask = (i != j) 

148 off_diagonal_rows = i[off_diagonal_mask] 

149 off_diagonal_cols = j[off_diagonal_mask] 

150 off_diagonal_data = data[off_diagonal_mask] 

151 

152 if cursor.header.symmetry == "skew-symmetric": 

153 off_diagonal_data *= -1 

154 elif cursor.header.symmetry == "hermitian": 

155 off_diagonal_data = off_diagonal_data.conjugate() 

156 

157 i = np.concatenate((i, off_diagonal_cols)) 

158 j = np.concatenate((j, off_diagonal_rows)) 

159 data = np.concatenate((data, off_diagonal_data)) 

160 

161 return (data, (i, j)), cursor.header.shape 

162 

163 

164def _get_read_cursor(source, parallelism=None): 

165 """ 

166 Open file for reading. 

167 """ 

168 from . import _fmm_core 

169 

170 ret_stream_to_close = None 

171 if parallelism is None: 

172 parallelism = PARALLELISM 

173 

174 try: 

175 source = os.fspath(source) 

176 # It's a file path 

177 is_path = True 

178 except TypeError: 

179 is_path = False 

180 

181 if is_path: 

182 path = str(source) 

183 if path.endswith('.gz'): 

184 import gzip 

185 source = gzip.GzipFile(path, 'r') 

186 ret_stream_to_close = source 

187 elif path.endswith('.bz2'): 

188 import bz2 

189 source = bz2.BZ2File(path, 'rb') 

190 ret_stream_to_close = source 

191 else: 

192 return _fmm_core.open_read_file(path, parallelism), ret_stream_to_close 

193 

194 # Stream object. 

195 if hasattr(source, "read"): 

196 if isinstance(source, io.TextIOBase): 

197 source = _TextToBytesWrapper(source) 

198 return _fmm_core.open_read_stream(source, parallelism), ret_stream_to_close 

199 else: 

200 raise TypeError("Unknown source type") 

201 

202 

203def _get_write_cursor(target, h=None, comment=None, parallelism=None, symmetry="general", precision=None): 

204 """ 

205 Open file for writing. 

206 """ 

207 from . import _fmm_core 

208 

209 if parallelism is None: 

210 parallelism = PARALLELISM 

211 if comment is None: 

212 comment = '' 

213 if symmetry is None: 

214 symmetry = "general" 

215 if precision is None: 

216 precision = -1 

217 

218 if not h: 

219 h = _fmm_core.header(comment=comment, symmetry=symmetry) 

220 

221 try: 

222 target = os.fspath(target) 

223 # It's a file path 

224 return _fmm_core.open_write_file(str(target), h, parallelism, precision) 

225 except TypeError: 

226 pass 

227 

228 if hasattr(target, "write"): 

229 # Stream object. 

230 if isinstance(target, io.TextIOBase): 

231 raise TypeError("target stream must be open in binary mode.") 

232 return _fmm_core.open_write_stream(target, h, parallelism, precision) 

233 else: 

234 raise TypeError("Unknown source object") 

235 

236 

237def _apply_field(data, field, no_pattern=False): 

238 """ 

239 Ensure that a NumPy array has a dtype compatible with the specified MatrixMarket field type. 

240 

241 Parameters 

242 ---------- 

243 data : ndarray 

244 Input array. 

245 

246 field : str 

247 Matrix Market field, such as 'real', 'complex', 'integer', 'pattern'. 

248 

249 no_pattern : bool, optional 

250 Whether an empty array may be returned for a 'pattern' field. 

251 

252 Returns 

253 ------- 

254 data : ndarray 

255 Input data if no conversion necessary, or a converted version 

256 """ 

257 

258 if field is None: 

259 return data 

260 if field == "pattern": 

261 if no_pattern: 

262 return data 

263 else: 

264 return np.zeros(0) 

265 

266 dtype = _field_to_dtype.get(field, None) 

267 if dtype is None: 

268 raise ValueError("Invalid field.") 

269 

270 return np.asarray(data, dtype=dtype) 

271 

272 

273def _validate_symmetry(symmetry): 

274 """ 

275 Check that the symmetry parameter is one that MatrixMarket allows.. 

276 """ 

277 if symmetry is None: 

278 return "general" 

279 

280 symmetry = str(symmetry).lower() 

281 symmetries = ["general", "symmetric", "skew-symmetric", "hermitian"] 

282 if symmetry not in symmetries: 

283 raise ValueError("Invalid symmetry. Must be one of: " + ", ".join(symmetries)) 

284 

285 return symmetry 

286 

287 

288def mmread(source): 

289 """ 

290 Reads the contents of a Matrix Market file-like 'source' into a matrix. 

291 

292 Parameters 

293 ---------- 

294 source : str or file-like 

295 Matrix Market filename (extensions .mtx, .mtz.gz) 

296 or open file-like object. 

297 

298 Returns 

299 ------- 

300 a : ndarray or coo_matrix 

301 Dense or sparse matrix depending on the matrix format in the 

302 Matrix Market file. 

303 

304 Notes 

305 ----- 

306 .. versionchanged:: 1.12.0 

307 C++ implementation. 

308 

309 Examples 

310 -------- 

311 >>> from io import StringIO 

312 >>> from scipy.io import mmread 

313 

314 >>> text = '''%%MatrixMarket matrix coordinate real general 

315 ... 5 5 7 

316 ... 2 3 1.0 

317 ... 3 4 2.0 

318 ... 3 5 3.0 

319 ... 4 1 4.0 

320 ... 4 2 5.0 

321 ... 4 3 6.0 

322 ... 4 4 7.0 

323 ... ''' 

324 

325 ``mmread(source)`` returns the data as sparse matrix in COO format. 

326 

327 >>> m = mmread(StringIO(text)) 

328 >>> m 

329 <5x5 sparse matrix of type '<class 'numpy.float64'>' 

330 with 7 stored elements in COOrdinate format> 

331 >>> m.A 

332 array([[0., 0., 0., 0., 0.], 

333 [0., 0., 1., 0., 0.], 

334 [0., 0., 0., 2., 3.], 

335 [4., 5., 6., 7., 0.], 

336 [0., 0., 0., 0., 0.]]) 

337 

338 This method is threaded. The default number of threads is equal to the number of CPUs in the system. 

339 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override: 

340 

341 >>> import threadpoolctl 

342 >>> 

343 >>> with threadpoolctl.threadpool_limits(limits=2): 

344 ... m = mmread(StringIO(text)) 

345 

346 """ 

347 cursor, stream_to_close = _get_read_cursor(source) 

348 

349 if cursor.header.format == "array": 

350 mat = _read_body_array(cursor) 

351 if stream_to_close: 

352 stream_to_close.close() 

353 return mat 

354 else: 

355 from scipy.sparse import coo_matrix 

356 triplet, shape = _read_body_coo(cursor, generalize_symmetry=True) 

357 if stream_to_close: 

358 stream_to_close.close() 

359 return coo_matrix(triplet, shape=shape) 

360 

361 

362def mmwrite(target, a, comment=None, field=None, precision=None, symmetry="AUTO"): 

363 r""" 

364 Writes the sparse or dense array `a` to Matrix Market file-like `target`. 

365 

366 Parameters 

367 ---------- 

368 target : str or file-like 

369 Matrix Market filename (extension .mtx) or open file-like object. 

370 a : array like 

371 Sparse or dense 2-D array. 

372 comment : str, optional 

373 Comments to be prepended to the Matrix Market file. 

374 field : None or str, optional 

375 Either 'real', 'complex', 'pattern', or 'integer'. 

376 precision : None or int, optional 

377 Number of digits to display for real or complex values. 

378 symmetry : None or str, optional 

379 Either 'AUTO', 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. 

380 If symmetry is None the symmetry type of 'a' is determined by its 

381 values. If symmetry is 'AUTO' the symmetry type of 'a' is either 

382 determined or set to 'general', at mmwrite's discretion. 

383 

384 Returns 

385 ------- 

386 None 

387 

388 Notes 

389 ----- 

390 .. versionchanged:: 1.12.0 

391 C++ implementation. 

392 

393 Examples 

394 -------- 

395 >>> from io import BytesIO 

396 >>> import numpy as np 

397 >>> from scipy.sparse import coo_matrix 

398 >>> from scipy.io import mmwrite 

399 

400 Write a small NumPy array to a matrix market file. The file will be 

401 written in the ``'array'`` format. 

402 

403 >>> a = np.array([[1.0, 0, 0, 0], [0, 2.5, 0, 6.25]]) 

404 >>> target = BytesIO() 

405 >>> mmwrite(target, a) 

406 >>> print(target.getvalue().decode('latin1')) 

407 %%MatrixMarket matrix array real general 

408 % 

409 2 4 

410 1 

411 0 

412 0 

413 2.5 

414 0 

415 0 

416 0 

417 6.25 

418 

419 Add a comment to the output file, and set the precision to 3. 

420 

421 >>> target = BytesIO() 

422 >>> mmwrite(target, a, comment='\n Some test data.\n', precision=3) 

423 >>> print(target.getvalue().decode('latin1')) 

424 %%MatrixMarket matrix array real general 

425 % 

426 % Some test data. 

427 % 

428 2 4 

429 1.00e+00 

430 0.00e+00 

431 0.00e+00 

432 2.50e+00 

433 0.00e+00 

434 0.00e+00 

435 0.00e+00 

436 6.25e+00 

437 

438 Convert to a sparse matrix before calling ``mmwrite``. This will 

439 result in the output format being ``'coordinate'`` rather than 

440 ``'array'``. 

441 

442 >>> target = BytesIO() 

443 >>> mmwrite(target, coo_matrix(a), precision=3) 

444 >>> print(target.getvalue().decode('latin1')) 

445 %%MatrixMarket matrix coordinate real general 

446 % 

447 2 4 3 

448 1 1 1.00e+00 

449 2 2 2.50e+00 

450 2 4 6.25e+00 

451 

452 Write a complex Hermitian array to a matrix market file. Note that 

453 only six values are actually written to the file; the other values 

454 are implied by the symmetry. 

455 

456 >>> z = np.array([[3, 1+2j, 4-3j], [1-2j, 1, -5j], [4+3j, 5j, 2.5]]) 

457 >>> z 

458 array([[ 3. +0.j, 1. +2.j, 4. -3.j], 

459 [ 1. -2.j, 1. +0.j, -0. -5.j], 

460 [ 4. +3.j, 0. +5.j, 2.5+0.j]]) 

461 

462 >>> target = BytesIO() 

463 >>> mmwrite(target, z, precision=2) 

464 >>> print(target.getvalue().decode('latin1')) 

465 %%MatrixMarket matrix array complex hermitian 

466 % 

467 3 3 

468 3.0e+00 0.0e+00 

469 1.0e+00 -2.0e+00 

470 4.0e+00 3.0e+00 

471 1.0e+00 0.0e+00 

472 0.0e+00 5.0e+00 

473 2.5e+00 0.0e+00 

474 

475 This method is threaded. The default number of threads is equal to the number of CPUs in the system. 

476 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override: 

477 

478 >>> import threadpoolctl 

479 >>> 

480 >>> target = BytesIO() 

481 >>> with threadpoolctl.threadpool_limits(limits=2): 

482 ... mmwrite(target, a) 

483 

484 """ 

485 from . import _fmm_core 

486 

487 if isinstance(a, list) or isinstance(a, tuple) or hasattr(a, "__array__"): 

488 a = np.asarray(a) 

489 

490 if symmetry == "AUTO": 

491 if ALWAYS_FIND_SYMMETRY or (hasattr(a, "shape") and max(a.shape) < 100): 

492 symmetry = None 

493 else: 

494 symmetry = "general" 

495 

496 if symmetry is None: 

497 symmetry = scipy.io._mmio.MMFile()._get_symmetry(a) 

498 

499 symmetry = _validate_symmetry(symmetry) 

500 cursor = _get_write_cursor(target, comment=comment, precision=precision, symmetry=symmetry) 

501 

502 if isinstance(a, np.ndarray): 

503 # Write dense numpy arrays 

504 a = _apply_field(a, field, no_pattern=True) 

505 _fmm_core.write_body_array(cursor, a) 

506 

507 elif scipy.sparse.issparse(a): 

508 # Write sparse scipy matrices 

509 a = a.tocoo() 

510 

511 if symmetry is not None and symmetry != "general": 

512 # A symmetric matrix only specifies the elements below the diagonal. 

513 # Ensure that the matrix satisfies this requirement. 

514 from scipy.sparse import coo_array 

515 lower_triangle_mask = a.row >= a.col 

516 a = coo_array((a.data[lower_triangle_mask], 

517 (a.row[lower_triangle_mask], 

518 a.col[lower_triangle_mask])), shape=a.shape) 

519 

520 data = _apply_field(a.data, field) 

521 _fmm_core.write_body_coo(cursor, a.shape, a.row, a.col, data) 

522 

523 else: 

524 raise ValueError("unknown matrix type: %s" % type(a)) 

525 

526 

527def mminfo(source): 

528 """ 

529 Return size and storage parameters from Matrix Market file-like 'source'. 

530 

531 Parameters 

532 ---------- 

533 source : str or file-like 

534 Matrix Market filename (extension .mtx) or open file-like object 

535 

536 Returns 

537 ------- 

538 rows : int 

539 Number of matrix rows. 

540 cols : int 

541 Number of matrix columns. 

542 entries : int 

543 Number of non-zero entries of a sparse matrix 

544 or rows*cols for a dense matrix. 

545 format : str 

546 Either 'coordinate' or 'array'. 

547 field : str 

548 Either 'real', 'complex', 'pattern', or 'integer'. 

549 symmetry : str 

550 Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. 

551 

552 Notes 

553 ----- 

554 .. versionchanged:: 1.12.0 

555 C++ implementation. 

556 

557 Examples 

558 -------- 

559 >>> from io import StringIO 

560 >>> from scipy.io import mminfo 

561 

562 >>> text = '''%%MatrixMarket matrix coordinate real general 

563 ... 5 5 7 

564 ... 2 3 1.0 

565 ... 3 4 2.0 

566 ... 3 5 3.0 

567 ... 4 1 4.0 

568 ... 4 2 5.0 

569 ... 4 3 6.0 

570 ... 4 4 7.0 

571 ... ''' 

572 

573 

574 ``mminfo(source)`` returns the number of rows, number of columns, 

575 format, field type and symmetry attribute of the source file. 

576 

577 >>> mminfo(StringIO(text)) 

578 (5, 5, 7, 'coordinate', 'real', 'general') 

579 """ 

580 cursor, stream_to_close = _get_read_cursor(source, 1) 

581 h = cursor.header 

582 cursor.close() 

583 if stream_to_close: 

584 stream_to_close.close() 

585 return h.nrows, h.ncols, h.nnz, h.format, h.field, h.symmetry