Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_coo.py: 14%

290 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1""" A sparse matrix in COOrdinate or 'triplet' format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['coo_matrix', 'isspmatrix_coo'] 

6 

7from warnings import warn 

8 

9import numpy as np 

10 

11 

12from ._sparsetools import coo_tocsr, coo_todense, coo_matvec 

13from ._base import isspmatrix, SparseEfficiencyWarning, spmatrix 

14from ._data import _data_matrix, _minmax_mixin 

15from ._sputils import (upcast, upcast_char, to_native, isshape, getdtype, 

16 getdata, get_index_dtype, downcast_intp_index, 

17 check_shape, check_reshape_kwargs) 

18 

19import operator 

20 

21 

22class coo_matrix(_data_matrix, _minmax_mixin): 

23 """ 

24 A sparse matrix in COOrdinate format. 

25 

26 Also known as the 'ijv' or 'triplet' format. 

27 

28 This can be instantiated in several ways: 

29 coo_matrix(D) 

30 with a dense matrix D 

31 

32 coo_matrix(S) 

33 with another sparse matrix S (equivalent to S.tocoo()) 

34 

35 coo_matrix((M, N), [dtype]) 

36 to construct an empty matrix with shape (M, N) 

37 dtype is optional, defaulting to dtype='d'. 

38 

39 coo_matrix((data, (i, j)), [shape=(M, N)]) 

40 to construct from three arrays: 

41 1. data[:] the entries of the matrix, in any order 

42 2. i[:] the row indices of the matrix entries 

43 3. j[:] the column indices of the matrix entries 

44 

45 Where ``A[i[k], j[k]] = data[k]``. When shape is not 

46 specified, it is inferred from the index arrays 

47 

48 Attributes 

49 ---------- 

50 dtype : dtype 

51 Data type of the matrix 

52 shape : 2-tuple 

53 Shape of the matrix 

54 ndim : int 

55 Number of dimensions (this is always 2) 

56 nnz 

57 Number of stored values, including explicit zeros 

58 data 

59 COO format data array of the matrix 

60 row 

61 COO format row index array of the matrix 

62 col 

63 COO format column index array of the matrix 

64 

65 Notes 

66 ----- 

67 

68 Sparse matrices can be used in arithmetic operations: they support 

69 addition, subtraction, multiplication, division, and matrix power. 

70 

71 Advantages of the COO format 

72 - facilitates fast conversion among sparse formats 

73 - permits duplicate entries (see example) 

74 - very fast conversion to and from CSR/CSC formats 

75 

76 Disadvantages of the COO format 

77 - does not directly support: 

78 + arithmetic operations 

79 + slicing 

80 

81 Intended Usage 

82 - COO is a fast format for constructing sparse matrices 

83 - Once a matrix has been constructed, convert to CSR or 

84 CSC format for fast arithmetic and matrix vector operations 

85 - By default when converting to CSR or CSC format, duplicate (i,j) 

86 entries will be summed together. This facilitates efficient 

87 construction of finite element matrices and the like. (see example) 

88 

89 Examples 

90 -------- 

91 

92 >>> # Constructing an empty matrix 

93 >>> import numpy as np 

94 >>> from scipy.sparse import coo_matrix 

95 >>> coo_matrix((3, 4), dtype=np.int8).toarray() 

96 array([[0, 0, 0, 0], 

97 [0, 0, 0, 0], 

98 [0, 0, 0, 0]], dtype=int8) 

99 

100 >>> # Constructing a matrix using ijv format 

101 >>> row = np.array([0, 3, 1, 0]) 

102 >>> col = np.array([0, 3, 1, 2]) 

103 >>> data = np.array([4, 5, 7, 9]) 

104 >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray() 

105 array([[4, 0, 9, 0], 

106 [0, 7, 0, 0], 

107 [0, 0, 0, 0], 

108 [0, 0, 0, 5]]) 

109 

110 >>> # Constructing a matrix with duplicate indices 

111 >>> row = np.array([0, 0, 1, 3, 1, 0, 0]) 

112 >>> col = np.array([0, 2, 1, 3, 1, 0, 0]) 

113 >>> data = np.array([1, 1, 1, 1, 1, 1, 1]) 

114 >>> coo = coo_matrix((data, (row, col)), shape=(4, 4)) 

115 >>> # Duplicate indices are maintained until implicitly or explicitly summed 

116 >>> np.max(coo.data) 

117 1 

118 >>> coo.toarray() 

119 array([[3, 0, 1, 0], 

120 [0, 2, 0, 0], 

121 [0, 0, 0, 0], 

122 [0, 0, 0, 1]]) 

123 

124 """ 

125 format = 'coo' 

126 

127 def __init__(self, arg1, shape=None, dtype=None, copy=False): 

128 _data_matrix.__init__(self) 

129 

130 if isinstance(arg1, tuple): 

131 if isshape(arg1): 

132 M, N = arg1 

133 self._shape = check_shape((M, N)) 

134 idx_dtype = get_index_dtype(maxval=max(M, N)) 

135 data_dtype = getdtype(dtype, default=float) 

136 self.row = np.array([], dtype=idx_dtype) 

137 self.col = np.array([], dtype=idx_dtype) 

138 self.data = np.array([], dtype=data_dtype) 

139 self.has_canonical_format = True 

140 else: 

141 try: 

142 obj, (row, col) = arg1 

143 except (TypeError, ValueError) as e: 

144 raise TypeError('invalid input format') from e 

145 

146 if shape is None: 

147 if len(row) == 0 or len(col) == 0: 

148 raise ValueError('cannot infer dimensions from zero ' 

149 'sized index arrays') 

150 M = operator.index(np.max(row)) + 1 

151 N = operator.index(np.max(col)) + 1 

152 self._shape = check_shape((M, N)) 

153 else: 

154 # Use 2 steps to ensure shape has length 2. 

155 M, N = shape 

156 self._shape = check_shape((M, N)) 

157 

158 idx_dtype = get_index_dtype(maxval=max(self.shape)) 

159 self.row = np.array(row, copy=copy, dtype=idx_dtype) 

160 self.col = np.array(col, copy=copy, dtype=idx_dtype) 

161 self.data = getdata(obj, copy=copy, dtype=dtype) 

162 self.has_canonical_format = False 

163 else: 

164 if isspmatrix(arg1): 

165 if isspmatrix_coo(arg1) and copy: 

166 self.row = arg1.row.copy() 

167 self.col = arg1.col.copy() 

168 self.data = arg1.data.copy() 

169 self._shape = check_shape(arg1.shape) 

170 else: 

171 coo = arg1.tocoo() 

172 self.row = coo.row 

173 self.col = coo.col 

174 self.data = coo.data 

175 self._shape = check_shape(coo.shape) 

176 self.has_canonical_format = False 

177 else: 

178 #dense argument 

179 M = np.atleast_2d(np.asarray(arg1)) 

180 

181 if M.ndim != 2: 

182 raise TypeError('expected dimension <= 2 array or matrix') 

183 

184 self._shape = check_shape(M.shape) 

185 if shape is not None: 

186 if check_shape(shape) != self._shape: 

187 raise ValueError('inconsistent shapes: %s != %s' % 

188 (shape, self._shape)) 

189 

190 self.row, self.col = M.nonzero() 

191 self.data = M[self.row, self.col] 

192 self.has_canonical_format = True 

193 

194 if dtype is not None: 

195 self.data = self.data.astype(dtype, copy=False) 

196 

197 self._check() 

198 

199 def reshape(self, *args, **kwargs): 

200 shape = check_shape(args, self.shape) 

201 order, copy = check_reshape_kwargs(kwargs) 

202 

203 # Return early if reshape is not required 

204 if shape == self.shape: 

205 if copy: 

206 return self.copy() 

207 else: 

208 return self 

209 

210 nrows, ncols = self.shape 

211 

212 if order == 'C': 

213 # Upcast to avoid overflows: the coo_matrix constructor 

214 # below will downcast the results to a smaller dtype, if 

215 # possible. 

216 dtype = get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1))) 

217 

218 flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col 

219 new_row, new_col = divmod(flat_indices, shape[1]) 

220 elif order == 'F': 

221 dtype = get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1))) 

222 

223 flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row 

224 new_col, new_row = divmod(flat_indices, shape[0]) 

225 else: 

226 raise ValueError("'order' must be 'C' or 'F'") 

227 

228 # Handle copy here rather than passing on to the constructor so that no 

229 # copy will be made of new_row and new_col regardless 

230 if copy: 

231 new_data = self.data.copy() 

232 else: 

233 new_data = self.data 

234 

235 return self.__class__((new_data, (new_row, new_col)), 

236 shape=shape, copy=False) 

237 

238 reshape.__doc__ = spmatrix.reshape.__doc__ 

239 

240 def getnnz(self, axis=None): 

241 if axis is None: 

242 nnz = len(self.data) 

243 if nnz != len(self.row) or nnz != len(self.col): 

244 raise ValueError('row, column, and data array must all be the ' 

245 'same length') 

246 

247 if self.data.ndim != 1 or self.row.ndim != 1 or \ 

248 self.col.ndim != 1: 

249 raise ValueError('row, column, and data arrays must be 1-D') 

250 

251 return int(nnz) 

252 

253 if axis < 0: 

254 axis += 2 

255 if axis == 0: 

256 return np.bincount(downcast_intp_index(self.col), 

257 minlength=self.shape[1]) 

258 elif axis == 1: 

259 return np.bincount(downcast_intp_index(self.row), 

260 minlength=self.shape[0]) 

261 else: 

262 raise ValueError('axis out of bounds') 

263 

264 getnnz.__doc__ = spmatrix.getnnz.__doc__ 

265 

266 def _check(self): 

267 """ Checks data structure for consistency """ 

268 

269 # index arrays should have integer data types 

270 if self.row.dtype.kind != 'i': 

271 warn("row index array has non-integer dtype (%s) " 

272 % self.row.dtype.name) 

273 if self.col.dtype.kind != 'i': 

274 warn("col index array has non-integer dtype (%s) " 

275 % self.col.dtype.name) 

276 

277 idx_dtype = get_index_dtype(maxval=max(self.shape)) 

278 self.row = np.asarray(self.row, dtype=idx_dtype) 

279 self.col = np.asarray(self.col, dtype=idx_dtype) 

280 self.data = to_native(self.data) 

281 

282 if self.nnz > 0: 

283 if self.row.max() >= self.shape[0]: 

284 raise ValueError('row index exceeds matrix dimensions') 

285 if self.col.max() >= self.shape[1]: 

286 raise ValueError('column index exceeds matrix dimensions') 

287 if self.row.min() < 0: 

288 raise ValueError('negative row index found') 

289 if self.col.min() < 0: 

290 raise ValueError('negative column index found') 

291 

292 def transpose(self, axes=None, copy=False): 

293 if axes is not None: 

294 raise ValueError(("Sparse matrices do not support " 

295 "an 'axes' parameter because swapping " 

296 "dimensions is the only logical permutation.")) 

297 

298 M, N = self.shape 

299 return self.__class__((self.data, (self.col, self.row)), 

300 shape=(N, M), copy=copy) 

301 

302 transpose.__doc__ = spmatrix.transpose.__doc__ 

303 

304 def resize(self, *shape): 

305 shape = check_shape(shape) 

306 new_M, new_N = shape 

307 M, N = self.shape 

308 

309 if new_M < M or new_N < N: 

310 mask = np.logical_and(self.row < new_M, self.col < new_N) 

311 if not mask.all(): 

312 self.row = self.row[mask] 

313 self.col = self.col[mask] 

314 self.data = self.data[mask] 

315 

316 self._shape = shape 

317 

318 resize.__doc__ = spmatrix.resize.__doc__ 

319 

320 def toarray(self, order=None, out=None): 

321 """See the docstring for `spmatrix.toarray`.""" 

322 B = self._process_toarray_args(order, out) 

323 fortran = int(B.flags.f_contiguous) 

324 if not fortran and not B.flags.c_contiguous: 

325 raise ValueError("Output array must be C or F contiguous") 

326 M,N = self.shape 

327 coo_todense(M, N, self.nnz, self.row, self.col, self.data, 

328 B.ravel('A'), fortran) 

329 return B 

330 

331 def tocsc(self, copy=False): 

332 """Convert this matrix to Compressed Sparse Column format 

333 

334 Duplicate entries will be summed together. 

335 

336 Examples 

337 -------- 

338 >>> from numpy import array 

339 >>> from scipy.sparse import coo_matrix 

340 >>> row = array([0, 0, 1, 3, 1, 0, 0]) 

341 >>> col = array([0, 2, 1, 3, 1, 0, 0]) 

342 >>> data = array([1, 1, 1, 1, 1, 1, 1]) 

343 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsc() 

344 >>> A.toarray() 

345 array([[3, 0, 1, 0], 

346 [0, 2, 0, 0], 

347 [0, 0, 0, 0], 

348 [0, 0, 0, 1]]) 

349 

350 """ 

351 if self.nnz == 0: 

352 return self._csc_container(self.shape, dtype=self.dtype) 

353 else: 

354 M,N = self.shape 

355 idx_dtype = get_index_dtype((self.col, self.row), 

356 maxval=max(self.nnz, M)) 

357 row = self.row.astype(idx_dtype, copy=False) 

358 col = self.col.astype(idx_dtype, copy=False) 

359 

360 indptr = np.empty(N + 1, dtype=idx_dtype) 

361 indices = np.empty_like(row, dtype=idx_dtype) 

362 data = np.empty_like(self.data, dtype=upcast(self.dtype)) 

363 

364 coo_tocsr(N, M, self.nnz, col, row, self.data, 

365 indptr, indices, data) 

366 

367 x = self._csc_container((data, indices, indptr), shape=self.shape) 

368 if not self.has_canonical_format: 

369 x.sum_duplicates() 

370 return x 

371 

372 def tocsr(self, copy=False): 

373 """Convert this matrix to Compressed Sparse Row format 

374 

375 Duplicate entries will be summed together. 

376 

377 Examples 

378 -------- 

379 >>> from numpy import array 

380 >>> from scipy.sparse import coo_matrix 

381 >>> row = array([0, 0, 1, 3, 1, 0, 0]) 

382 >>> col = array([0, 2, 1, 3, 1, 0, 0]) 

383 >>> data = array([1, 1, 1, 1, 1, 1, 1]) 

384 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr() 

385 >>> A.toarray() 

386 array([[3, 0, 1, 0], 

387 [0, 2, 0, 0], 

388 [0, 0, 0, 0], 

389 [0, 0, 0, 1]]) 

390 

391 """ 

392 if self.nnz == 0: 

393 return self._csr_container(self.shape, dtype=self.dtype) 

394 else: 

395 M,N = self.shape 

396 idx_dtype = get_index_dtype((self.row, self.col), 

397 maxval=max(self.nnz, N)) 

398 row = self.row.astype(idx_dtype, copy=False) 

399 col = self.col.astype(idx_dtype, copy=False) 

400 

401 indptr = np.empty(M + 1, dtype=idx_dtype) 

402 indices = np.empty_like(col, dtype=idx_dtype) 

403 data = np.empty_like(self.data, dtype=upcast(self.dtype)) 

404 

405 coo_tocsr(M, N, self.nnz, row, col, self.data, 

406 indptr, indices, data) 

407 

408 x = self._csr_container((data, indices, indptr), shape=self.shape) 

409 if not self.has_canonical_format: 

410 x.sum_duplicates() 

411 return x 

412 

413 def tocoo(self, copy=False): 

414 if copy: 

415 return self.copy() 

416 else: 

417 return self 

418 

419 tocoo.__doc__ = spmatrix.tocoo.__doc__ 

420 

421 def todia(self, copy=False): 

422 self.sum_duplicates() 

423 ks = self.col - self.row # the diagonal for each nonzero 

424 diags, diag_idx = np.unique(ks, return_inverse=True) 

425 

426 if len(diags) > 100: 

427 # probably undesired, should todia() have a maxdiags parameter? 

428 warn("Constructing a DIA matrix with %d diagonals " 

429 "is inefficient" % len(diags), SparseEfficiencyWarning) 

430 

431 #initialize and fill in data array 

432 if self.data.size == 0: 

433 data = np.zeros((0, 0), dtype=self.dtype) 

434 else: 

435 data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype) 

436 data[diag_idx, self.col] = self.data 

437 

438 return self._dia_container((data, diags), shape=self.shape) 

439 

440 todia.__doc__ = spmatrix.todia.__doc__ 

441 

442 def todok(self, copy=False): 

443 self.sum_duplicates() 

444 dok = self._dok_container((self.shape), dtype=self.dtype) 

445 dok._update(zip(zip(self.row,self.col),self.data)) 

446 

447 return dok 

448 

449 todok.__doc__ = spmatrix.todok.__doc__ 

450 

451 def diagonal(self, k=0): 

452 rows, cols = self.shape 

453 if k <= -rows or k >= cols: 

454 return np.empty(0, dtype=self.data.dtype) 

455 diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)), 

456 dtype=self.dtype) 

457 diag_mask = (self.row + k) == self.col 

458 

459 if self.has_canonical_format: 

460 row = self.row[diag_mask] 

461 data = self.data[diag_mask] 

462 else: 

463 row, _, data = self._sum_duplicates(self.row[diag_mask], 

464 self.col[diag_mask], 

465 self.data[diag_mask]) 

466 diag[row + min(k, 0)] = data 

467 

468 return diag 

469 

470 diagonal.__doc__ = _data_matrix.diagonal.__doc__ 

471 

472 def _setdiag(self, values, k): 

473 M, N = self.shape 

474 if values.ndim and not len(values): 

475 return 

476 idx_dtype = self.row.dtype 

477 

478 # Determine which triples to keep and where to put the new ones. 

479 full_keep = self.col - self.row != k 

480 if k < 0: 

481 max_index = min(M+k, N) 

482 if values.ndim: 

483 max_index = min(max_index, len(values)) 

484 keep = np.logical_or(full_keep, self.col >= max_index) 

485 new_row = np.arange(-k, -k + max_index, dtype=idx_dtype) 

486 new_col = np.arange(max_index, dtype=idx_dtype) 

487 else: 

488 max_index = min(M, N-k) 

489 if values.ndim: 

490 max_index = min(max_index, len(values)) 

491 keep = np.logical_or(full_keep, self.row >= max_index) 

492 new_row = np.arange(max_index, dtype=idx_dtype) 

493 new_col = np.arange(k, k + max_index, dtype=idx_dtype) 

494 

495 # Define the array of data consisting of the entries to be added. 

496 if values.ndim: 

497 new_data = values[:max_index] 

498 else: 

499 new_data = np.empty(max_index, dtype=self.dtype) 

500 new_data[:] = values 

501 

502 # Update the internal structure. 

503 self.row = np.concatenate((self.row[keep], new_row)) 

504 self.col = np.concatenate((self.col[keep], new_col)) 

505 self.data = np.concatenate((self.data[keep], new_data)) 

506 self.has_canonical_format = False 

507 

508 # needed by _data_matrix 

509 def _with_data(self,data,copy=True): 

510 """Returns a matrix with the same sparsity structure as self, 

511 but with different data. By default the index arrays 

512 (i.e. .row and .col) are copied. 

513 """ 

514 if copy: 

515 return self.__class__((data, (self.row.copy(), self.col.copy())), 

516 shape=self.shape, dtype=data.dtype) 

517 else: 

518 return self.__class__((data, (self.row, self.col)), 

519 shape=self.shape, dtype=data.dtype) 

520 

521 def sum_duplicates(self): 

522 """Eliminate duplicate matrix entries by adding them together 

523 

524 This is an *in place* operation 

525 """ 

526 if self.has_canonical_format: 

527 return 

528 summed = self._sum_duplicates(self.row, self.col, self.data) 

529 self.row, self.col, self.data = summed 

530 self.has_canonical_format = True 

531 

532 def _sum_duplicates(self, row, col, data): 

533 # Assumes (data, row, col) not in canonical format. 

534 if len(data) == 0: 

535 return row, col, data 

536 order = np.lexsort((row, col)) 

537 row = row[order] 

538 col = col[order] 

539 data = data[order] 

540 unique_mask = ((row[1:] != row[:-1]) | 

541 (col[1:] != col[:-1])) 

542 unique_mask = np.append(True, unique_mask) 

543 row = row[unique_mask] 

544 col = col[unique_mask] 

545 unique_inds, = np.nonzero(unique_mask) 

546 data = np.add.reduceat(data, unique_inds, dtype=self.dtype) 

547 return row, col, data 

548 

549 def eliminate_zeros(self): 

550 """Remove zero entries from the matrix 

551 

552 This is an *in place* operation 

553 """ 

554 mask = self.data != 0 

555 self.data = self.data[mask] 

556 self.row = self.row[mask] 

557 self.col = self.col[mask] 

558 

559 ####################### 

560 # Arithmetic handlers # 

561 ####################### 

562 

563 def _add_dense(self, other): 

564 if other.shape != self.shape: 

565 raise ValueError('Incompatible shapes ({} and {})' 

566 .format(self.shape, other.shape)) 

567 dtype = upcast_char(self.dtype.char, other.dtype.char) 

568 result = np.array(other, dtype=dtype, copy=True) 

569 fortran = int(result.flags.f_contiguous) 

570 M, N = self.shape 

571 coo_todense(M, N, self.nnz, self.row, self.col, self.data, 

572 result.ravel('A'), fortran) 

573 return self._container(result, copy=False) 

574 

575 def _mul_vector(self, other): 

576 #output array 

577 result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, 

578 other.dtype.char)) 

579 coo_matvec(self.nnz, self.row, self.col, self.data, other, result) 

580 return result 

581 

582 def _mul_multivector(self, other): 

583 result = np.zeros((other.shape[1], self.shape[0]), 

584 dtype=upcast_char(self.dtype.char, other.dtype.char)) 

585 for i, col in enumerate(other.T): 

586 coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i]) 

587 return result.T.view(type=type(other)) 

588 

589 

590def isspmatrix_coo(x): 

591 """Is x of coo_matrix type? 

592 

593 Parameters 

594 ---------- 

595 x 

596 object to check for being a coo matrix 

597 

598 Returns 

599 ------- 

600 bool 

601 True if x is a coo matrix, False otherwise 

602 

603 Examples 

604 -------- 

605 >>> from scipy.sparse import coo_matrix, isspmatrix_coo 

606 >>> isspmatrix_coo(coo_matrix([[5]])) 

607 True 

608 

609 >>> from scipy.sparse import coo_matrix, csr_matrix, isspmatrix_coo 

610 >>> isspmatrix_coo(csr_matrix([[5]])) 

611 False 

612 """ 

613 from ._arrays import coo_array 

614 return isinstance(x, coo_matrix) or isinstance(x, coo_array)