Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_csr.py: 26%

133 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-03-22 06:44 +0000

1"""Compressed Sparse Row matrix format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr'] 

6 

7import numpy as np 

8 

9from ._matrix import spmatrix 

10from ._base import _spbase, sparray 

11from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks, 

12 get_csr_submatrix) 

13from ._sputils import upcast 

14 

15from ._compressed import _cs_matrix 

16 

17 

18class _csr_base(_cs_matrix): 

19 _format = 'csr' 

20 

21 def transpose(self, axes=None, copy=False): 

22 if axes is not None and axes != (1, 0): 

23 raise ValueError("Sparse arrays/matrices do not support " 

24 "an 'axes' parameter because swapping " 

25 "dimensions is the only logical permutation.") 

26 

27 M, N = self.shape 

28 return self._csc_container((self.data, self.indices, 

29 self.indptr), shape=(N, M), copy=copy) 

30 

31 transpose.__doc__ = _spbase.transpose.__doc__ 

32 

33 def tolil(self, copy=False): 

34 lil = self._lil_container(self.shape, dtype=self.dtype) 

35 

36 self.sum_duplicates() 

37 ptr,ind,dat = self.indptr,self.indices,self.data 

38 rows, data = lil.rows, lil.data 

39 

40 for n in range(self.shape[0]): 

41 start = ptr[n] 

42 end = ptr[n+1] 

43 rows[n] = ind[start:end].tolist() 

44 data[n] = dat[start:end].tolist() 

45 

46 return lil 

47 

48 tolil.__doc__ = _spbase.tolil.__doc__ 

49 

50 def tocsr(self, copy=False): 

51 if copy: 

52 return self.copy() 

53 else: 

54 return self 

55 

56 tocsr.__doc__ = _spbase.tocsr.__doc__ 

57 

58 def tocsc(self, copy=False): 

59 idx_dtype = self._get_index_dtype((self.indptr, self.indices), 

60 maxval=max(self.nnz, self.shape[0])) 

61 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype) 

62 indices = np.empty(self.nnz, dtype=idx_dtype) 

63 data = np.empty(self.nnz, dtype=upcast(self.dtype)) 

64 

65 csr_tocsc(self.shape[0], self.shape[1], 

66 self.indptr.astype(idx_dtype), 

67 self.indices.astype(idx_dtype), 

68 self.data, 

69 indptr, 

70 indices, 

71 data) 

72 

73 A = self._csc_container((data, indices, indptr), shape=self.shape) 

74 A.has_sorted_indices = True 

75 return A 

76 

77 tocsc.__doc__ = _spbase.tocsc.__doc__ 

78 

79 def tobsr(self, blocksize=None, copy=True): 

80 if blocksize is None: 

81 from ._spfuncs import estimate_blocksize 

82 return self.tobsr(blocksize=estimate_blocksize(self)) 

83 

84 elif blocksize == (1,1): 

85 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr) 

86 return self._bsr_container(arg1, shape=self.shape, copy=copy) 

87 

88 else: 

89 R,C = blocksize 

90 M,N = self.shape 

91 

92 if R < 1 or C < 1 or M % R != 0 or N % C != 0: 

93 raise ValueError('invalid blocksize %s' % blocksize) 

94 

95 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices) 

96 

97 idx_dtype = self._get_index_dtype((self.indptr, self.indices), 

98 maxval=max(N//C, blks)) 

99 indptr = np.empty(M//R+1, dtype=idx_dtype) 

100 indices = np.empty(blks, dtype=idx_dtype) 

101 data = np.zeros((blks,R,C), dtype=self.dtype) 

102 

103 csr_tobsr(M, N, R, C, 

104 self.indptr.astype(idx_dtype), 

105 self.indices.astype(idx_dtype), 

106 self.data, 

107 indptr, indices, data.ravel()) 

108 

109 return self._bsr_container( 

110 (data, indices, indptr), shape=self.shape 

111 ) 

112 

113 tobsr.__doc__ = _spbase.tobsr.__doc__ 

114 

115 # these functions are used by the parent class (_cs_matrix) 

116 # to remove redundancy between csc_matrix and csr_array 

117 @staticmethod 

118 def _swap(x): 

119 """swap the members of x if this is a column-oriented matrix 

120 """ 

121 return x 

122 

123 def __iter__(self): 

124 indptr = np.zeros(2, dtype=self.indptr.dtype) 

125 shape = (1, self.shape[1]) 

126 i0 = 0 

127 for i1 in self.indptr[1:]: 

128 indptr[1] = i1 - i0 

129 indices = self.indices[i0:i1] 

130 data = self.data[i0:i1] 

131 yield self.__class__( 

132 (data, indices, indptr), shape=shape, copy=True 

133 ) 

134 i0 = i1 

135 

136 def _getrow(self, i): 

137 """Returns a copy of row i of the matrix, as a (1 x n) 

138 CSR matrix (row vector). 

139 """ 

140 M, N = self.shape 

141 i = int(i) 

142 if i < 0: 

143 i += M 

144 if i < 0 or i >= M: 

145 raise IndexError('index (%d) out of range' % i) 

146 indptr, indices, data = get_csr_submatrix( 

147 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N) 

148 return self.__class__((data, indices, indptr), shape=(1, N), 

149 dtype=self.dtype, copy=False) 

150 

151 def _getcol(self, i): 

152 """Returns a copy of column i of the matrix, as a (m x 1) 

153 CSR matrix (column vector). 

154 """ 

155 M, N = self.shape 

156 i = int(i) 

157 if i < 0: 

158 i += N 

159 if i < 0 or i >= N: 

160 raise IndexError('index (%d) out of range' % i) 

161 indptr, indices, data = get_csr_submatrix( 

162 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1) 

163 return self.__class__((data, indices, indptr), shape=(M, 1), 

164 dtype=self.dtype, copy=False) 

165 

166 def _get_intXarray(self, row, col): 

167 return self._getrow(row)._minor_index_fancy(col) 

168 

169 def _get_intXslice(self, row, col): 

170 if col.step in (1, None): 

171 return self._get_submatrix(row, col, copy=True) 

172 # TODO: uncomment this once it's faster: 

173 # return self._getrow(row)._minor_slice(col) 

174 

175 M, N = self.shape 

176 start, stop, stride = col.indices(N) 

177 

178 ii, jj = self.indptr[row:row+2] 

179 row_indices = self.indices[ii:jj] 

180 row_data = self.data[ii:jj] 

181 

182 if stride > 0: 

183 ind = (row_indices >= start) & (row_indices < stop) 

184 else: 

185 ind = (row_indices <= start) & (row_indices > stop) 

186 

187 if abs(stride) > 1: 

188 ind &= (row_indices - start) % stride == 0 

189 

190 row_indices = (row_indices[ind] - start) // stride 

191 row_data = row_data[ind] 

192 row_indptr = np.array([0, len(row_indices)]) 

193 

194 if stride < 0: 

195 row_data = row_data[::-1] 

196 row_indices = abs(row_indices[::-1]) 

197 

198 shape = (1, max(0, int(np.ceil(float(stop - start) / stride)))) 

199 return self.__class__((row_data, row_indices, row_indptr), shape=shape, 

200 dtype=self.dtype, copy=False) 

201 

202 def _get_sliceXint(self, row, col): 

203 if row.step in (1, None): 

204 return self._get_submatrix(row, col, copy=True) 

205 return self._major_slice(row)._get_submatrix(minor=col) 

206 

207 def _get_sliceXarray(self, row, col): 

208 return self._major_slice(row)._minor_index_fancy(col) 

209 

210 def _get_arrayXint(self, row, col): 

211 return self._major_index_fancy(row)._get_submatrix(minor=col) 

212 

213 def _get_arrayXslice(self, row, col): 

214 if col.step not in (1, None): 

215 col = np.arange(*col.indices(self.shape[1])) 

216 return self._get_arrayXarray(row, col) 

217 return self._major_index_fancy(row)._get_submatrix(minor=col) 

218 

219 

220def isspmatrix_csr(x): 

221 """Is `x` of csr_matrix type? 

222 

223 Parameters 

224 ---------- 

225 x 

226 object to check for being a csr matrix 

227 

228 Returns 

229 ------- 

230 bool 

231 True if `x` is a csr matrix, False otherwise 

232 

233 Examples 

234 -------- 

235 >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr 

236 >>> isspmatrix_csr(csr_matrix([[5]])) 

237 True 

238 >>> isspmatrix_csr(csr_array([[5]])) 

239 False 

240 >>> isspmatrix_csr(coo_matrix([[5]])) 

241 False 

242 """ 

243 return isinstance(x, csr_matrix) 

244 

245 

246# This namespace class separates array from matrix with isinstance 

247class csr_array(_csr_base, sparray): 

248 """ 

249 Compressed Sparse Row array. 

250 

251 This can be instantiated in several ways: 

252 csr_array(D) 

253 where D is a 2-D ndarray 

254 

255 csr_array(S) 

256 with another sparse array or matrix S (equivalent to S.tocsr()) 

257 

258 csr_array((M, N), [dtype]) 

259 to construct an empty array with shape (M, N) 

260 dtype is optional, defaulting to dtype='d'. 

261 

262 csr_array((data, (row_ind, col_ind)), [shape=(M, N)]) 

263 where ``data``, ``row_ind`` and ``col_ind`` satisfy the 

264 relationship ``a[row_ind[k], col_ind[k]] = data[k]``. 

265 

266 csr_array((data, indices, indptr), [shape=(M, N)]) 

267 is the standard CSR representation where the column indices for 

268 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their 

269 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. 

270 If the shape parameter is not supplied, the array dimensions 

271 are inferred from the index arrays. 

272 

273 Attributes 

274 ---------- 

275 dtype : dtype 

276 Data type of the array 

277 shape : 2-tuple 

278 Shape of the array 

279 ndim : int 

280 Number of dimensions (this is always 2) 

281 nnz 

282 size 

283 data 

284 CSR format data array of the array 

285 indices 

286 CSR format index array of the array 

287 indptr 

288 CSR format index pointer array of the array 

289 has_sorted_indices 

290 has_canonical_format 

291 T 

292 

293 Notes 

294 ----- 

295 

296 Sparse arrays can be used in arithmetic operations: they support 

297 addition, subtraction, multiplication, division, and matrix power. 

298 

299 Advantages of the CSR format 

300 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc. 

301 - efficient row slicing 

302 - fast matrix vector products 

303 

304 Disadvantages of the CSR format 

305 - slow column slicing operations (consider CSC) 

306 - changes to the sparsity structure are expensive (consider LIL or DOK) 

307 

308 Canonical Format 

309 - Within each row, indices are sorted by column. 

310 - There are no duplicate entries. 

311 

312 Examples 

313 -------- 

314 

315 >>> import numpy as np 

316 >>> from scipy.sparse import csr_array 

317 >>> csr_array((3, 4), dtype=np.int8).toarray() 

318 array([[0, 0, 0, 0], 

319 [0, 0, 0, 0], 

320 [0, 0, 0, 0]], dtype=int8) 

321 

322 >>> row = np.array([0, 0, 1, 2, 2, 2]) 

323 >>> col = np.array([0, 2, 2, 0, 1, 2]) 

324 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

325 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray() 

326 array([[1, 0, 2], 

327 [0, 0, 3], 

328 [4, 5, 6]]) 

329 

330 >>> indptr = np.array([0, 2, 3, 6]) 

331 >>> indices = np.array([0, 2, 2, 0, 1, 2]) 

332 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

333 >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray() 

334 array([[1, 0, 2], 

335 [0, 0, 3], 

336 [4, 5, 6]]) 

337 

338 Duplicate entries are summed together: 

339 

340 >>> row = np.array([0, 1, 2, 0]) 

341 >>> col = np.array([0, 1, 1, 0]) 

342 >>> data = np.array([1, 2, 4, 8]) 

343 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray() 

344 array([[9, 0, 0], 

345 [0, 2, 0], 

346 [0, 4, 0]]) 

347 

348 As an example of how to construct a CSR array incrementally, 

349 the following snippet builds a term-document array from texts: 

350 

351 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]] 

352 >>> indptr = [0] 

353 >>> indices = [] 

354 >>> data = [] 

355 >>> vocabulary = {} 

356 >>> for d in docs: 

357 ... for term in d: 

358 ... index = vocabulary.setdefault(term, len(vocabulary)) 

359 ... indices.append(index) 

360 ... data.append(1) 

361 ... indptr.append(len(indices)) 

362 ... 

363 >>> csr_array((data, indices, indptr), dtype=int).toarray() 

364 array([[2, 1, 0, 0], 

365 [0, 1, 1, 1]]) 

366 

367 """ 

368 

369 

370class csr_matrix(spmatrix, _csr_base): 

371 """ 

372 Compressed Sparse Row matrix. 

373 

374 This can be instantiated in several ways: 

375 csr_matrix(D) 

376 where D is a 2-D ndarray 

377 

378 csr_matrix(S) 

379 with another sparse array or matrix S (equivalent to S.tocsr()) 

380 

381 csr_matrix((M, N), [dtype]) 

382 to construct an empty matrix with shape (M, N) 

383 dtype is optional, defaulting to dtype='d'. 

384 

385 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)]) 

386 where ``data``, ``row_ind`` and ``col_ind`` satisfy the 

387 relationship ``a[row_ind[k], col_ind[k]] = data[k]``. 

388 

389 csr_matrix((data, indices, indptr), [shape=(M, N)]) 

390 is the standard CSR representation where the column indices for 

391 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their 

392 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. 

393 If the shape parameter is not supplied, the matrix dimensions 

394 are inferred from the index arrays. 

395 

396 Attributes 

397 ---------- 

398 dtype : dtype 

399 Data type of the matrix 

400 shape : 2-tuple 

401 Shape of the matrix 

402 ndim : int 

403 Number of dimensions (this is always 2) 

404 nnz 

405 size 

406 data 

407 CSR format data array of the matrix 

408 indices 

409 CSR format index array of the matrix 

410 indptr 

411 CSR format index pointer array of the matrix 

412 has_sorted_indices 

413 has_canonical_format 

414 T 

415 

416 Notes 

417 ----- 

418 

419 Sparse matrices can be used in arithmetic operations: they support 

420 addition, subtraction, multiplication, division, and matrix power. 

421 

422 Advantages of the CSR format 

423 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc. 

424 - efficient row slicing 

425 - fast matrix vector products 

426 

427 Disadvantages of the CSR format 

428 - slow column slicing operations (consider CSC) 

429 - changes to the sparsity structure are expensive (consider LIL or DOK) 

430 

431 Canonical Format 

432 - Within each row, indices are sorted by column. 

433 - There are no duplicate entries. 

434 

435 Examples 

436 -------- 

437 

438 >>> import numpy as np 

439 >>> from scipy.sparse import csr_matrix 

440 >>> csr_matrix((3, 4), dtype=np.int8).toarray() 

441 array([[0, 0, 0, 0], 

442 [0, 0, 0, 0], 

443 [0, 0, 0, 0]], dtype=int8) 

444 

445 >>> row = np.array([0, 0, 1, 2, 2, 2]) 

446 >>> col = np.array([0, 2, 2, 0, 1, 2]) 

447 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

448 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

449 array([[1, 0, 2], 

450 [0, 0, 3], 

451 [4, 5, 6]]) 

452 

453 >>> indptr = np.array([0, 2, 3, 6]) 

454 >>> indices = np.array([0, 2, 2, 0, 1, 2]) 

455 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

456 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray() 

457 array([[1, 0, 2], 

458 [0, 0, 3], 

459 [4, 5, 6]]) 

460 

461 Duplicate entries are summed together: 

462 

463 >>> row = np.array([0, 1, 2, 0]) 

464 >>> col = np.array([0, 1, 1, 0]) 

465 >>> data = np.array([1, 2, 4, 8]) 

466 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

467 array([[9, 0, 0], 

468 [0, 2, 0], 

469 [0, 4, 0]]) 

470 

471 As an example of how to construct a CSR matrix incrementally, 

472 the following snippet builds a term-document matrix from texts: 

473 

474 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]] 

475 >>> indptr = [0] 

476 >>> indices = [] 

477 >>> data = [] 

478 >>> vocabulary = {} 

479 >>> for d in docs: 

480 ... for term in d: 

481 ... index = vocabulary.setdefault(term, len(vocabulary)) 

482 ... indices.append(index) 

483 ... data.append(1) 

484 ... indptr.append(len(indices)) 

485 ... 

486 >>> csr_matrix((data, indices, indptr), dtype=int).toarray() 

487 array([[2, 1, 0, 0], 

488 [0, 1, 1, 1]]) 

489 

490 """ 

491