Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_csr.py: 25%

132 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-23 06:43 +0000

1"""Compressed Sparse Row matrix format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr'] 

6 

7import numpy as np 

8 

9from ._matrix import spmatrix 

10from ._base import _spbase, sparray 

11from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks, 

12 get_csr_submatrix) 

13from ._sputils import upcast 

14 

15from ._compressed import _cs_matrix 

16 

17 

18class _csr_base(_cs_matrix): 

19 _format = 'csr' 

20 

21 def transpose(self, axes=None, copy=False): 

22 if axes is not None and axes != (1, 0): 

23 raise ValueError("Sparse arrays/matrices do not support " 

24 "an 'axes' parameter because swapping " 

25 "dimensions is the only logical permutation.") 

26 

27 M, N = self.shape 

28 return self._csc_container((self.data, self.indices, 

29 self.indptr), shape=(N, M), copy=copy) 

30 

31 transpose.__doc__ = _spbase.transpose.__doc__ 

32 

33 def tolil(self, copy=False): 

34 lil = self._lil_container(self.shape, dtype=self.dtype) 

35 

36 self.sum_duplicates() 

37 ptr,ind,dat = self.indptr,self.indices,self.data 

38 rows, data = lil.rows, lil.data 

39 

40 for n in range(self.shape[0]): 

41 start = ptr[n] 

42 end = ptr[n+1] 

43 rows[n] = ind[start:end].tolist() 

44 data[n] = dat[start:end].tolist() 

45 

46 return lil 

47 

48 tolil.__doc__ = _spbase.tolil.__doc__ 

49 

50 def tocsr(self, copy=False): 

51 if copy: 

52 return self.copy() 

53 else: 

54 return self 

55 

56 tocsr.__doc__ = _spbase.tocsr.__doc__ 

57 

58 def tocsc(self, copy=False): 

59 idx_dtype = self._get_index_dtype((self.indptr, self.indices), 

60 maxval=max(self.nnz, self.shape[0])) 

61 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype) 

62 indices = np.empty(self.nnz, dtype=idx_dtype) 

63 data = np.empty(self.nnz, dtype=upcast(self.dtype)) 

64 

65 csr_tocsc(self.shape[0], self.shape[1], 

66 self.indptr.astype(idx_dtype), 

67 self.indices.astype(idx_dtype), 

68 self.data, 

69 indptr, 

70 indices, 

71 data) 

72 

73 A = self._csc_container((data, indices, indptr), shape=self.shape) 

74 A.has_sorted_indices = True 

75 return A 

76 

77 tocsc.__doc__ = _spbase.tocsc.__doc__ 

78 

79 def tobsr(self, blocksize=None, copy=True): 

80 if blocksize is None: 

81 from ._spfuncs import estimate_blocksize 

82 return self.tobsr(blocksize=estimate_blocksize(self)) 

83 

84 elif blocksize == (1,1): 

85 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr) 

86 return self._bsr_container(arg1, shape=self.shape, copy=copy) 

87 

88 else: 

89 R,C = blocksize 

90 M,N = self.shape 

91 

92 if R < 1 or C < 1 or M % R != 0 or N % C != 0: 

93 raise ValueError('invalid blocksize %s' % blocksize) 

94 

95 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices) 

96 

97 idx_dtype = self._get_index_dtype((self.indptr, self.indices), 

98 maxval=max(N//C, blks)) 

99 indptr = np.empty(M//R+1, dtype=idx_dtype) 

100 indices = np.empty(blks, dtype=idx_dtype) 

101 data = np.zeros((blks,R,C), dtype=self.dtype) 

102 

103 csr_tobsr(M, N, R, C, 

104 self.indptr.astype(idx_dtype), 

105 self.indices.astype(idx_dtype), 

106 self.data, 

107 indptr, indices, data.ravel()) 

108 

109 return self._bsr_container( 

110 (data, indices, indptr), shape=self.shape 

111 ) 

112 

113 tobsr.__doc__ = _spbase.tobsr.__doc__ 

114 

115 # these functions are used by the parent class (_cs_matrix) 

116 # to remove redundancy between csc_matrix and csr_array 

117 def _swap(self, x): 

118 """swap the members of x if this is a column-oriented matrix 

119 """ 

120 return x 

121 

122 def __iter__(self): 

123 indptr = np.zeros(2, dtype=self.indptr.dtype) 

124 shape = (1, self.shape[1]) 

125 i0 = 0 

126 for i1 in self.indptr[1:]: 

127 indptr[1] = i1 - i0 

128 indices = self.indices[i0:i1] 

129 data = self.data[i0:i1] 

130 yield self.__class__( 

131 (data, indices, indptr), shape=shape, copy=True 

132 ) 

133 i0 = i1 

134 

135 def _getrow(self, i): 

136 """Returns a copy of row i of the matrix, as a (1 x n) 

137 CSR matrix (row vector). 

138 """ 

139 M, N = self.shape 

140 i = int(i) 

141 if i < 0: 

142 i += M 

143 if i < 0 or i >= M: 

144 raise IndexError('index (%d) out of range' % i) 

145 indptr, indices, data = get_csr_submatrix( 

146 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N) 

147 return self.__class__((data, indices, indptr), shape=(1, N), 

148 dtype=self.dtype, copy=False) 

149 

150 def _getcol(self, i): 

151 """Returns a copy of column i of the matrix, as a (m x 1) 

152 CSR matrix (column vector). 

153 """ 

154 M, N = self.shape 

155 i = int(i) 

156 if i < 0: 

157 i += N 

158 if i < 0 or i >= N: 

159 raise IndexError('index (%d) out of range' % i) 

160 indptr, indices, data = get_csr_submatrix( 

161 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1) 

162 return self.__class__((data, indices, indptr), shape=(M, 1), 

163 dtype=self.dtype, copy=False) 

164 

165 def _get_intXarray(self, row, col): 

166 return self._getrow(row)._minor_index_fancy(col) 

167 

168 def _get_intXslice(self, row, col): 

169 if col.step in (1, None): 

170 return self._get_submatrix(row, col, copy=True) 

171 # TODO: uncomment this once it's faster: 

172 # return self._getrow(row)._minor_slice(col) 

173 

174 M, N = self.shape 

175 start, stop, stride = col.indices(N) 

176 

177 ii, jj = self.indptr[row:row+2] 

178 row_indices = self.indices[ii:jj] 

179 row_data = self.data[ii:jj] 

180 

181 if stride > 0: 

182 ind = (row_indices >= start) & (row_indices < stop) 

183 else: 

184 ind = (row_indices <= start) & (row_indices > stop) 

185 

186 if abs(stride) > 1: 

187 ind &= (row_indices - start) % stride == 0 

188 

189 row_indices = (row_indices[ind] - start) // stride 

190 row_data = row_data[ind] 

191 row_indptr = np.array([0, len(row_indices)]) 

192 

193 if stride < 0: 

194 row_data = row_data[::-1] 

195 row_indices = abs(row_indices[::-1]) 

196 

197 shape = (1, max(0, int(np.ceil(float(stop - start) / stride)))) 

198 return self.__class__((row_data, row_indices, row_indptr), shape=shape, 

199 dtype=self.dtype, copy=False) 

200 

201 def _get_sliceXint(self, row, col): 

202 if row.step in (1, None): 

203 return self._get_submatrix(row, col, copy=True) 

204 return self._major_slice(row)._get_submatrix(minor=col) 

205 

206 def _get_sliceXarray(self, row, col): 

207 return self._major_slice(row)._minor_index_fancy(col) 

208 

209 def _get_arrayXint(self, row, col): 

210 return self._major_index_fancy(row)._get_submatrix(minor=col) 

211 

212 def _get_arrayXslice(self, row, col): 

213 if col.step not in (1, None): 

214 col = np.arange(*col.indices(self.shape[1])) 

215 return self._get_arrayXarray(row, col) 

216 return self._major_index_fancy(row)._get_submatrix(minor=col) 

217 

218 

219def isspmatrix_csr(x): 

220 """Is `x` of csr_matrix type? 

221 

222 Parameters 

223 ---------- 

224 x 

225 object to check for being a csr matrix 

226 

227 Returns 

228 ------- 

229 bool 

230 True if `x` is a csr matrix, False otherwise 

231 

232 Examples 

233 -------- 

234 >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr 

235 >>> isspmatrix_csr(csr_matrix([[5]])) 

236 True 

237 >>> isspmatrix_csr(csr_array([[5]])) 

238 False 

239 >>> isspmatrix_csr(coo_matrix([[5]])) 

240 False 

241 """ 

242 return isinstance(x, csr_matrix) 

243 

244 

245# This namespace class separates array from matrix with isinstance 

246class csr_array(_csr_base, sparray): 

247 """ 

248 Compressed Sparse Row array. 

249 

250 This can be instantiated in several ways: 

251 csr_array(D) 

252 where D is a 2-D ndarray 

253 

254 csr_array(S) 

255 with another sparse array or matrix S (equivalent to S.tocsr()) 

256 

257 csr_array((M, N), [dtype]) 

258 to construct an empty array with shape (M, N) 

259 dtype is optional, defaulting to dtype='d'. 

260 

261 csr_array((data, (row_ind, col_ind)), [shape=(M, N)]) 

262 where ``data``, ``row_ind`` and ``col_ind`` satisfy the 

263 relationship ``a[row_ind[k], col_ind[k]] = data[k]``. 

264 

265 csr_array((data, indices, indptr), [shape=(M, N)]) 

266 is the standard CSR representation where the column indices for 

267 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their 

268 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. 

269 If the shape parameter is not supplied, the array dimensions 

270 are inferred from the index arrays. 

271 

272 Attributes 

273 ---------- 

274 dtype : dtype 

275 Data type of the array 

276 shape : 2-tuple 

277 Shape of the array 

278 ndim : int 

279 Number of dimensions (this is always 2) 

280 nnz 

281 size 

282 data 

283 CSR format data array of the array 

284 indices 

285 CSR format index array of the array 

286 indptr 

287 CSR format index pointer array of the array 

288 has_sorted_indices 

289 has_canonical_format 

290 T 

291 

292 Notes 

293 ----- 

294 

295 Sparse arrays can be used in arithmetic operations: they support 

296 addition, subtraction, multiplication, division, and matrix power. 

297 

298 Advantages of the CSR format 

299 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc. 

300 - efficient row slicing 

301 - fast matrix vector products 

302 

303 Disadvantages of the CSR format 

304 - slow column slicing operations (consider CSC) 

305 - changes to the sparsity structure are expensive (consider LIL or DOK) 

306 

307 Canonical Format 

308 - Within each row, indices are sorted by column. 

309 - There are no duplicate entries. 

310 

311 Examples 

312 -------- 

313 

314 >>> import numpy as np 

315 >>> from scipy.sparse import csr_array 

316 >>> csr_array((3, 4), dtype=np.int8).toarray() 

317 array([[0, 0, 0, 0], 

318 [0, 0, 0, 0], 

319 [0, 0, 0, 0]], dtype=int8) 

320 

321 >>> row = np.array([0, 0, 1, 2, 2, 2]) 

322 >>> col = np.array([0, 2, 2, 0, 1, 2]) 

323 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

324 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray() 

325 array([[1, 0, 2], 

326 [0, 0, 3], 

327 [4, 5, 6]]) 

328 

329 >>> indptr = np.array([0, 2, 3, 6]) 

330 >>> indices = np.array([0, 2, 2, 0, 1, 2]) 

331 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

332 >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray() 

333 array([[1, 0, 2], 

334 [0, 0, 3], 

335 [4, 5, 6]]) 

336 

337 Duplicate entries are summed together: 

338 

339 >>> row = np.array([0, 1, 2, 0]) 

340 >>> col = np.array([0, 1, 1, 0]) 

341 >>> data = np.array([1, 2, 4, 8]) 

342 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray() 

343 array([[9, 0, 0], 

344 [0, 2, 0], 

345 [0, 4, 0]]) 

346 

347 As an example of how to construct a CSR array incrementally, 

348 the following snippet builds a term-document array from texts: 

349 

350 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]] 

351 >>> indptr = [0] 

352 >>> indices = [] 

353 >>> data = [] 

354 >>> vocabulary = {} 

355 >>> for d in docs: 

356 ... for term in d: 

357 ... index = vocabulary.setdefault(term, len(vocabulary)) 

358 ... indices.append(index) 

359 ... data.append(1) 

360 ... indptr.append(len(indices)) 

361 ... 

362 >>> csr_array((data, indices, indptr), dtype=int).toarray() 

363 array([[2, 1, 0, 0], 

364 [0, 1, 1, 1]]) 

365 

366 """ 

367 

368 

369class csr_matrix(spmatrix, _csr_base): 

370 """ 

371 Compressed Sparse Row matrix. 

372 

373 This can be instantiated in several ways: 

374 csr_matrix(D) 

375 where D is a 2-D ndarray 

376 

377 csr_matrix(S) 

378 with another sparse array or matrix S (equivalent to S.tocsr()) 

379 

380 csr_matrix((M, N), [dtype]) 

381 to construct an empty matrix with shape (M, N) 

382 dtype is optional, defaulting to dtype='d'. 

383 

384 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)]) 

385 where ``data``, ``row_ind`` and ``col_ind`` satisfy the 

386 relationship ``a[row_ind[k], col_ind[k]] = data[k]``. 

387 

388 csr_matrix((data, indices, indptr), [shape=(M, N)]) 

389 is the standard CSR representation where the column indices for 

390 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their 

391 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. 

392 If the shape parameter is not supplied, the matrix dimensions 

393 are inferred from the index arrays. 

394 

395 Attributes 

396 ---------- 

397 dtype : dtype 

398 Data type of the matrix 

399 shape : 2-tuple 

400 Shape of the matrix 

401 ndim : int 

402 Number of dimensions (this is always 2) 

403 nnz 

404 size 

405 data 

406 CSR format data array of the matrix 

407 indices 

408 CSR format index array of the matrix 

409 indptr 

410 CSR format index pointer array of the matrix 

411 has_sorted_indices 

412 has_canonical_format 

413 T 

414 

415 Notes 

416 ----- 

417 

418 Sparse matrices can be used in arithmetic operations: they support 

419 addition, subtraction, multiplication, division, and matrix power. 

420 

421 Advantages of the CSR format 

422 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc. 

423 - efficient row slicing 

424 - fast matrix vector products 

425 

426 Disadvantages of the CSR format 

427 - slow column slicing operations (consider CSC) 

428 - changes to the sparsity structure are expensive (consider LIL or DOK) 

429 

430 Canonical Format 

431 - Within each row, indices are sorted by column. 

432 - There are no duplicate entries. 

433 

434 Examples 

435 -------- 

436 

437 >>> import numpy as np 

438 >>> from scipy.sparse import csr_matrix 

439 >>> csr_matrix((3, 4), dtype=np.int8).toarray() 

440 array([[0, 0, 0, 0], 

441 [0, 0, 0, 0], 

442 [0, 0, 0, 0]], dtype=int8) 

443 

444 >>> row = np.array([0, 0, 1, 2, 2, 2]) 

445 >>> col = np.array([0, 2, 2, 0, 1, 2]) 

446 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

447 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

448 array([[1, 0, 2], 

449 [0, 0, 3], 

450 [4, 5, 6]]) 

451 

452 >>> indptr = np.array([0, 2, 3, 6]) 

453 >>> indices = np.array([0, 2, 2, 0, 1, 2]) 

454 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

455 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray() 

456 array([[1, 0, 2], 

457 [0, 0, 3], 

458 [4, 5, 6]]) 

459 

460 Duplicate entries are summed together: 

461 

462 >>> row = np.array([0, 1, 2, 0]) 

463 >>> col = np.array([0, 1, 1, 0]) 

464 >>> data = np.array([1, 2, 4, 8]) 

465 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

466 array([[9, 0, 0], 

467 [0, 2, 0], 

468 [0, 4, 0]]) 

469 

470 As an example of how to construct a CSR matrix incrementally, 

471 the following snippet builds a term-document matrix from texts: 

472 

473 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]] 

474 >>> indptr = [0] 

475 >>> indices = [] 

476 >>> data = [] 

477 >>> vocabulary = {} 

478 >>> for d in docs: 

479 ... for term in d: 

480 ... index = vocabulary.setdefault(term, len(vocabulary)) 

481 ... indices.append(index) 

482 ... data.append(1) 

483 ... indptr.append(len(indices)) 

484 ... 

485 >>> csr_matrix((data, indices, indptr), dtype=int).toarray() 

486 array([[2, 1, 0, 0], 

487 [0, 1, 1, 1]]) 

488 

489 """ 

490