Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/io/_fast_matrix_market/__init__.py: 17%

188 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-22 06:44 +0000

1# Copyright (C) 2022-2023 Adam Lugowski. All rights reserved. 

2# Use of this source code is governed by the BSD 2-clause license found in 

3# the LICENSE.txt file. 

4# SPDX-License-Identifier: BSD-2-Clause 

5""" 

6Matrix Market I/O with a C++ backend. 

7See http://math.nist.gov/MatrixMarket/formats.html 

8for information about the Matrix Market format. 

9 

10.. versionadded:: 1.12.0 

11""" 

12import io 

13import os 

14 

15import numpy as np 

16import scipy.sparse 

17from scipy.io import _mmio 

18 

19__all__ = ['mminfo', 'mmread', 'mmwrite'] 

20 

21PARALLELISM = 0 

22""" 

23Number of threads that `mmread()` and `mmwrite()` use. 

240 means number of CPUs in the system. 

25Use `threadpoolctl` to set this value. 

26""" 

27 

28ALWAYS_FIND_SYMMETRY = False 

29""" 

30Whether mmwrite() with symmetry='AUTO' will always search for symmetry 

31inside the matrix. This is scipy.io._mmio.mmwrite()'s default behavior, 

32but has a significant performance cost on large matrices. 

33""" 

34 

35_field_to_dtype = { 

36 "integer": "int64", 

37 "unsigned-integer": "uint64", 

38 "real": "float64", 

39 "complex": "complex", 

40 "pattern": "float64", 

41} 

42 

43 

44def _fmm_version(): 

45 from . import _fmm_core 

46 return _fmm_core.__version__ 

47 

48 

49# Register with threadpoolctl, if available 

50try: 

51 import threadpoolctl 

52 

53 class _FMMThreadPoolCtlController(threadpoolctl.LibController): 

54 user_api = "scipy" 

55 internal_api = "scipy_mmio" 

56 

57 filename_prefixes = ("_fmm_core",) 

58 

59 def get_num_threads(self): 

60 global PARALLELISM 

61 return PARALLELISM 

62 

63 def set_num_threads(self, num_threads): 

64 global PARALLELISM 

65 PARALLELISM = num_threads 

66 

67 def get_version(self): 

68 return _fmm_version 

69 

70 def set_additional_attributes(self): 

71 pass 

72 

73 threadpoolctl.register(_FMMThreadPoolCtlController) 

74except (ImportError, AttributeError): 

75 # threadpoolctl not installed or version too old 

76 pass 

77 

78 

79class _TextToBytesWrapper(io.BufferedReader): 

80 """ 

81 Convert a TextIOBase string stream to a byte stream. 

82 """ 

83 

84 def __init__(self, text_io_buffer, encoding=None, errors=None, **kwargs): 

85 super().__init__(text_io_buffer, **kwargs) 

86 self.encoding = encoding or text_io_buffer.encoding or 'utf-8' 

87 self.errors = errors or text_io_buffer.errors or 'strict' 

88 

89 def __del__(self): 

90 # do not close the wrapped stream 

91 self.detach() 

92 

93 def _encoding_call(self, method_name, *args, **kwargs): 

94 raw_method = getattr(self.raw, method_name) 

95 val = raw_method(*args, **kwargs) 

96 return val.encode(self.encoding, errors=self.errors) 

97 

98 def read(self, size=-1): 

99 return self._encoding_call('read', size) 

100 

101 def read1(self, size=-1): 

102 return self._encoding_call('read1', size) 

103 

104 def peek(self, size=-1): 

105 return self._encoding_call('peek', size) 

106 

107 def seek(self, offset, whence=0): 

108 # Random seeks are not allowed because of non-trivial conversion 

109 # between byte and character offsets, 

110 # with the possibility of a byte offset landing within a character. 

111 if offset == 0 and whence == 0 or \ 

112 offset == 0 and whence == 2: 

113 # seek to start or end is ok 

114 super().seek(offset, whence) 

115 else: 

116 # Drop any other seek 

117 # In this application this may happen when pystreambuf seeks during sync(), 

118 # which can happen when closing a partially-read stream. 

119 # Ex. when mminfo() only reads the header then exits. 

120 pass 

121 

122 

123def _read_body_array(cursor): 

124 """ 

125 Read MatrixMarket array body 

126 """ 

127 from . import _fmm_core 

128 

129 vals = np.zeros(cursor.header.shape, dtype=_field_to_dtype.get(cursor.header.field)) 

130 _fmm_core.read_body_array(cursor, vals) 

131 return vals 

132 

133 

134def _read_body_coo(cursor, generalize_symmetry=True): 

135 """ 

136 Read MatrixMarket coordinate body 

137 """ 

138 from . import _fmm_core 

139 

140 index_dtype = "int32" 

141 if cursor.header.nrows >= 2**31 or cursor.header.ncols >= 2**31: 

142 # Dimensions are too large to fit in int32 

143 index_dtype = "int64" 

144 

145 i = np.zeros(cursor.header.nnz, dtype=index_dtype) 

146 j = np.zeros(cursor.header.nnz, dtype=index_dtype) 

147 data = np.zeros(cursor.header.nnz, dtype=_field_to_dtype.get(cursor.header.field)) 

148 

149 _fmm_core.read_body_coo(cursor, i, j, data) 

150 

151 if generalize_symmetry and cursor.header.symmetry != "general": 

152 off_diagonal_mask = (i != j) 

153 off_diagonal_rows = i[off_diagonal_mask] 

154 off_diagonal_cols = j[off_diagonal_mask] 

155 off_diagonal_data = data[off_diagonal_mask] 

156 

157 if cursor.header.symmetry == "skew-symmetric": 

158 off_diagonal_data *= -1 

159 elif cursor.header.symmetry == "hermitian": 

160 off_diagonal_data = off_diagonal_data.conjugate() 

161 

162 i = np.concatenate((i, off_diagonal_cols)) 

163 j = np.concatenate((j, off_diagonal_rows)) 

164 data = np.concatenate((data, off_diagonal_data)) 

165 

166 return (data, (i, j)), cursor.header.shape 

167 

168 

169def _get_read_cursor(source, parallelism=None): 

170 """ 

171 Open file for reading. 

172 """ 

173 from . import _fmm_core 

174 

175 ret_stream_to_close = None 

176 if parallelism is None: 

177 parallelism = PARALLELISM 

178 

179 try: 

180 source = os.fspath(source) 

181 # It's a file path 

182 is_path = True 

183 except TypeError: 

184 is_path = False 

185 

186 if is_path: 

187 path = str(source) 

188 if path.endswith('.gz'): 

189 import gzip 

190 source = gzip.GzipFile(path, 'r') 

191 ret_stream_to_close = source 

192 elif path.endswith('.bz2'): 

193 import bz2 

194 source = bz2.BZ2File(path, 'rb') 

195 ret_stream_to_close = source 

196 else: 

197 return _fmm_core.open_read_file(path, parallelism), ret_stream_to_close 

198 

199 # Stream object. 

200 if hasattr(source, "read"): 

201 if isinstance(source, io.TextIOBase): 

202 source = _TextToBytesWrapper(source) 

203 return _fmm_core.open_read_stream(source, parallelism), ret_stream_to_close 

204 else: 

205 raise TypeError("Unknown source type") 

206 

207 

208def _get_write_cursor(target, h=None, comment=None, parallelism=None, 

209 symmetry="general", precision=None): 

210 """ 

211 Open file for writing. 

212 """ 

213 from . import _fmm_core 

214 

215 if parallelism is None: 

216 parallelism = PARALLELISM 

217 if comment is None: 

218 comment = '' 

219 if symmetry is None: 

220 symmetry = "general" 

221 if precision is None: 

222 precision = -1 

223 

224 if not h: 

225 h = _fmm_core.header(comment=comment, symmetry=symmetry) 

226 

227 try: 

228 target = os.fspath(target) 

229 # It's a file path 

230 return _fmm_core.open_write_file(str(target), h, parallelism, precision) 

231 except TypeError: 

232 pass 

233 

234 if hasattr(target, "write"): 

235 # Stream object. 

236 if isinstance(target, io.TextIOBase): 

237 raise TypeError("target stream must be open in binary mode.") 

238 return _fmm_core.open_write_stream(target, h, parallelism, precision) 

239 else: 

240 raise TypeError("Unknown source object") 

241 

242 

243def _apply_field(data, field, no_pattern=False): 

244 """ 

245 Ensure that ``data.dtype`` is compatible with the specified MatrixMarket field type. 

246 

247 Parameters 

248 ---------- 

249 data : ndarray 

250 Input array. 

251 

252 field : str 

253 Matrix Market field, such as 'real', 'complex', 'integer', 'pattern'. 

254 

255 no_pattern : bool, optional 

256 Whether an empty array may be returned for a 'pattern' field. 

257 

258 Returns 

259 ------- 

260 data : ndarray 

261 Input data if no conversion necessary, or a converted version 

262 """ 

263 

264 if field is None: 

265 return data 

266 if field == "pattern": 

267 if no_pattern: 

268 return data 

269 else: 

270 return np.zeros(0) 

271 

272 dtype = _field_to_dtype.get(field, None) 

273 if dtype is None: 

274 raise ValueError("Invalid field.") 

275 

276 return np.asarray(data, dtype=dtype) 

277 

278 

279def _validate_symmetry(symmetry): 

280 """ 

281 Check that the symmetry parameter is one that MatrixMarket allows.. 

282 """ 

283 if symmetry is None: 

284 return "general" 

285 

286 symmetry = str(symmetry).lower() 

287 symmetries = ["general", "symmetric", "skew-symmetric", "hermitian"] 

288 if symmetry not in symmetries: 

289 raise ValueError("Invalid symmetry. Must be one of: " + ", ".join(symmetries)) 

290 

291 return symmetry 

292 

293 

294def mmread(source): 

295 """ 

296 Reads the contents of a Matrix Market file-like 'source' into a matrix. 

297 

298 Parameters 

299 ---------- 

300 source : str or file-like 

301 Matrix Market filename (extensions .mtx, .mtz.gz) 

302 or open file-like object. 

303 

304 Returns 

305 ------- 

306 a : ndarray or coo_matrix 

307 Dense or sparse matrix depending on the matrix format in the 

308 Matrix Market file. 

309 

310 Notes 

311 ----- 

312 .. versionchanged:: 1.12.0 

313 C++ implementation. 

314 

315 Examples 

316 -------- 

317 >>> from io import StringIO 

318 >>> from scipy.io import mmread 

319 

320 >>> text = '''%%MatrixMarket matrix coordinate real general 

321 ... 5 5 7 

322 ... 2 3 1.0 

323 ... 3 4 2.0 

324 ... 3 5 3.0 

325 ... 4 1 4.0 

326 ... 4 2 5.0 

327 ... 4 3 6.0 

328 ... 4 4 7.0 

329 ... ''' 

330 

331 ``mmread(source)`` returns the data as sparse matrix in COO format. 

332 

333 >>> m = mmread(StringIO(text)) 

334 >>> m 

335 <5x5 sparse matrix of type '<class 'numpy.float64'>' 

336 with 7 stored elements in COOrdinate format> 

337 >>> m.A 

338 array([[0., 0., 0., 0., 0.], 

339 [0., 0., 1., 0., 0.], 

340 [0., 0., 0., 2., 3.], 

341 [4., 5., 6., 7., 0.], 

342 [0., 0., 0., 0., 0.]]) 

343 

344 This method is threaded. 

345 The default number of threads is equal to the number of CPUs in the system. 

346 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override: 

347 

348 >>> import threadpoolctl 

349 >>> 

350 >>> with threadpoolctl.threadpool_limits(limits=2): 

351 ... m = mmread(StringIO(text)) 

352 

353 """ 

354 cursor, stream_to_close = _get_read_cursor(source) 

355 

356 if cursor.header.format == "array": 

357 mat = _read_body_array(cursor) 

358 if stream_to_close: 

359 stream_to_close.close() 

360 return mat 

361 else: 

362 from scipy.sparse import coo_matrix 

363 triplet, shape = _read_body_coo(cursor, generalize_symmetry=True) 

364 if stream_to_close: 

365 stream_to_close.close() 

366 return coo_matrix(triplet, shape=shape) 

367 

368 

369def mmwrite(target, a, comment=None, field=None, precision=None, symmetry="AUTO"): 

370 r""" 

371 Writes the sparse or dense array `a` to Matrix Market file-like `target`. 

372 

373 Parameters 

374 ---------- 

375 target : str or file-like 

376 Matrix Market filename (extension .mtx) or open file-like object. 

377 a : array like 

378 Sparse or dense 2-D array. 

379 comment : str, optional 

380 Comments to be prepended to the Matrix Market file. 

381 field : None or str, optional 

382 Either 'real', 'complex', 'pattern', or 'integer'. 

383 precision : None or int, optional 

384 Number of digits to display for real or complex values. 

385 symmetry : None or str, optional 

386 Either 'AUTO', 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. 

387 If symmetry is None the symmetry type of 'a' is determined by its 

388 values. If symmetry is 'AUTO' the symmetry type of 'a' is either 

389 determined or set to 'general', at mmwrite's discretion. 

390 

391 Returns 

392 ------- 

393 None 

394 

395 Notes 

396 ----- 

397 .. versionchanged:: 1.12.0 

398 C++ implementation. 

399 

400 Examples 

401 -------- 

402 >>> from io import BytesIO 

403 >>> import numpy as np 

404 >>> from scipy.sparse import coo_matrix 

405 >>> from scipy.io import mmwrite 

406 

407 Write a small NumPy array to a matrix market file. The file will be 

408 written in the ``'array'`` format. 

409 

410 >>> a = np.array([[1.0, 0, 0, 0], [0, 2.5, 0, 6.25]]) 

411 >>> target = BytesIO() 

412 >>> mmwrite(target, a) 

413 >>> print(target.getvalue().decode('latin1')) 

414 %%MatrixMarket matrix array real general 

415 % 

416 2 4 

417 1 

418 0 

419 0 

420 2.5 

421 0 

422 0 

423 0 

424 6.25 

425 

426 Add a comment to the output file, and set the precision to 3. 

427 

428 >>> target = BytesIO() 

429 >>> mmwrite(target, a, comment='\n Some test data.\n', precision=3) 

430 >>> print(target.getvalue().decode('latin1')) 

431 %%MatrixMarket matrix array real general 

432 % 

433 % Some test data. 

434 % 

435 2 4 

436 1.00e+00 

437 0.00e+00 

438 0.00e+00 

439 2.50e+00 

440 0.00e+00 

441 0.00e+00 

442 0.00e+00 

443 6.25e+00 

444 

445 Convert to a sparse matrix before calling ``mmwrite``. This will 

446 result in the output format being ``'coordinate'`` rather than 

447 ``'array'``. 

448 

449 >>> target = BytesIO() 

450 >>> mmwrite(target, coo_matrix(a), precision=3) 

451 >>> print(target.getvalue().decode('latin1')) 

452 %%MatrixMarket matrix coordinate real general 

453 % 

454 2 4 3 

455 1 1 1.00e+00 

456 2 2 2.50e+00 

457 2 4 6.25e+00 

458 

459 Write a complex Hermitian array to a matrix market file. Note that 

460 only six values are actually written to the file; the other values 

461 are implied by the symmetry. 

462 

463 >>> z = np.array([[3, 1+2j, 4-3j], [1-2j, 1, -5j], [4+3j, 5j, 2.5]]) 

464 >>> z 

465 array([[ 3. +0.j, 1. +2.j, 4. -3.j], 

466 [ 1. -2.j, 1. +0.j, -0. -5.j], 

467 [ 4. +3.j, 0. +5.j, 2.5+0.j]]) 

468 

469 >>> target = BytesIO() 

470 >>> mmwrite(target, z, precision=2) 

471 >>> print(target.getvalue().decode('latin1')) 

472 %%MatrixMarket matrix array complex hermitian 

473 % 

474 3 3 

475 3.0e+00 0.0e+00 

476 1.0e+00 -2.0e+00 

477 4.0e+00 3.0e+00 

478 1.0e+00 0.0e+00 

479 0.0e+00 5.0e+00 

480 2.5e+00 0.0e+00 

481 

482 This method is threaded. 

483 The default number of threads is equal to the number of CPUs in the system. 

484 Use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_ to override: 

485 

486 >>> import threadpoolctl 

487 >>> 

488 >>> target = BytesIO() 

489 >>> with threadpoolctl.threadpool_limits(limits=2): 

490 ... mmwrite(target, a) 

491 

492 """ 

493 from . import _fmm_core 

494 

495 if isinstance(a, list) or isinstance(a, tuple) or hasattr(a, "__array__"): 

496 a = np.asarray(a) 

497 

498 if symmetry == "AUTO": 

499 if ALWAYS_FIND_SYMMETRY or (hasattr(a, "shape") and max(a.shape) < 100): 

500 symmetry = None 

501 else: 

502 symmetry = "general" 

503 

504 if symmetry is None: 

505 symmetry = _mmio.MMFile()._get_symmetry(a) 

506 

507 symmetry = _validate_symmetry(symmetry) 

508 cursor = _get_write_cursor(target, comment=comment, 

509 precision=precision, symmetry=symmetry) 

510 

511 if isinstance(a, np.ndarray): 

512 # Write dense numpy arrays 

513 a = _apply_field(a, field, no_pattern=True) 

514 _fmm_core.write_body_array(cursor, a) 

515 

516 elif scipy.sparse.issparse(a): 

517 # Write sparse scipy matrices 

518 a = a.tocoo() 

519 

520 if symmetry is not None and symmetry != "general": 

521 # A symmetric matrix only specifies the elements below the diagonal. 

522 # Ensure that the matrix satisfies this requirement. 

523 from scipy.sparse import coo_array 

524 lower_triangle_mask = a.row >= a.col 

525 a = coo_array((a.data[lower_triangle_mask], 

526 (a.row[lower_triangle_mask], 

527 a.col[lower_triangle_mask])), shape=a.shape) 

528 

529 data = _apply_field(a.data, field) 

530 _fmm_core.write_body_coo(cursor, a.shape, a.row, a.col, data) 

531 

532 else: 

533 raise ValueError("unknown matrix type: %s" % type(a)) 

534 

535 

536def mminfo(source): 

537 """ 

538 Return size and storage parameters from Matrix Market file-like 'source'. 

539 

540 Parameters 

541 ---------- 

542 source : str or file-like 

543 Matrix Market filename (extension .mtx) or open file-like object 

544 

545 Returns 

546 ------- 

547 rows : int 

548 Number of matrix rows. 

549 cols : int 

550 Number of matrix columns. 

551 entries : int 

552 Number of non-zero entries of a sparse matrix 

553 or rows*cols for a dense matrix. 

554 format : str 

555 Either 'coordinate' or 'array'. 

556 field : str 

557 Either 'real', 'complex', 'pattern', or 'integer'. 

558 symmetry : str 

559 Either 'general', 'symmetric', 'skew-symmetric', or 'hermitian'. 

560 

561 Notes 

562 ----- 

563 .. versionchanged:: 1.12.0 

564 C++ implementation. 

565 

566 Examples 

567 -------- 

568 >>> from io import StringIO 

569 >>> from scipy.io import mminfo 

570 

571 >>> text = '''%%MatrixMarket matrix coordinate real general 

572 ... 5 5 7 

573 ... 2 3 1.0 

574 ... 3 4 2.0 

575 ... 3 5 3.0 

576 ... 4 1 4.0 

577 ... 4 2 5.0 

578 ... 4 3 6.0 

579 ... 4 4 7.0 

580 ... ''' 

581 

582 

583 ``mminfo(source)`` returns the number of rows, number of columns, 

584 format, field type and symmetry attribute of the source file. 

585 

586 >>> mminfo(StringIO(text)) 

587 (5, 5, 7, 'coordinate', 'real', 'general') 

588 """ 

589 cursor, stream_to_close = _get_read_cursor(source, 1) 

590 h = cursor.header 

591 cursor.close() 

592 if stream_to_close: 

593 stream_to_close.close() 

594 return h.nrows, h.ncols, h.nnz, h.format, h.field, h.symmetry