Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_csr.py: 24%

130 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-12 06:31 +0000

1"""Compressed Sparse Row matrix format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['csr_matrix', 'isspmatrix_csr'] 

6 

7import numpy as np 

8 

9from ._base import spmatrix 

10from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks, 

11 get_csr_submatrix) 

12from ._sputils import upcast, get_index_dtype 

13 

14from ._compressed import _cs_matrix 

15 

16 

17class csr_matrix(_cs_matrix): 

18 """ 

19 Compressed Sparse Row matrix 

20 

21 This can be instantiated in several ways: 

22 csr_matrix(D) 

23 with a dense matrix or rank-2 ndarray D 

24 

25 csr_matrix(S) 

26 with another sparse matrix S (equivalent to S.tocsr()) 

27 

28 csr_matrix((M, N), [dtype]) 

29 to construct an empty matrix with shape (M, N) 

30 dtype is optional, defaulting to dtype='d'. 

31 

32 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)]) 

33 where ``data``, ``row_ind`` and ``col_ind`` satisfy the 

34 relationship ``a[row_ind[k], col_ind[k]] = data[k]``. 

35 

36 csr_matrix((data, indices, indptr), [shape=(M, N)]) 

37 is the standard CSR representation where the column indices for 

38 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their 

39 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. 

40 If the shape parameter is not supplied, the matrix dimensions 

41 are inferred from the index arrays. 

42 

43 Attributes 

44 ---------- 

45 dtype : dtype 

46 Data type of the matrix 

47 shape : 2-tuple 

48 Shape of the matrix 

49 ndim : int 

50 Number of dimensions (this is always 2) 

51 nnz 

52 Number of stored values, including explicit zeros 

53 data 

54 CSR format data array of the matrix 

55 indices 

56 CSR format index array of the matrix 

57 indptr 

58 CSR format index pointer array of the matrix 

59 has_sorted_indices 

60 Whether indices are sorted 

61 

62 Notes 

63 ----- 

64 

65 Sparse matrices can be used in arithmetic operations: they support 

66 addition, subtraction, multiplication, division, and matrix power. 

67 

68 Advantages of the CSR format 

69 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc. 

70 - efficient row slicing 

71 - fast matrix vector products 

72 

73 Disadvantages of the CSR format 

74 - slow column slicing operations (consider CSC) 

75 - changes to the sparsity structure are expensive (consider LIL or DOK) 

76 

77 Examples 

78 -------- 

79 

80 >>> import numpy as np 

81 >>> from scipy.sparse import csr_matrix 

82 >>> csr_matrix((3, 4), dtype=np.int8).toarray() 

83 array([[0, 0, 0, 0], 

84 [0, 0, 0, 0], 

85 [0, 0, 0, 0]], dtype=int8) 

86 

87 >>> row = np.array([0, 0, 1, 2, 2, 2]) 

88 >>> col = np.array([0, 2, 2, 0, 1, 2]) 

89 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

90 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

91 array([[1, 0, 2], 

92 [0, 0, 3], 

93 [4, 5, 6]]) 

94 

95 >>> indptr = np.array([0, 2, 3, 6]) 

96 >>> indices = np.array([0, 2, 2, 0, 1, 2]) 

97 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

98 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray() 

99 array([[1, 0, 2], 

100 [0, 0, 3], 

101 [4, 5, 6]]) 

102 

103 Duplicate entries are summed together: 

104 

105 >>> row = np.array([0, 1, 2, 0]) 

106 >>> col = np.array([0, 1, 1, 0]) 

107 >>> data = np.array([1, 2, 4, 8]) 

108 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

109 array([[9, 0, 0], 

110 [0, 2, 0], 

111 [0, 4, 0]]) 

112 

113 As an example of how to construct a CSR matrix incrementally, 

114 the following snippet builds a term-document matrix from texts: 

115 

116 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]] 

117 >>> indptr = [0] 

118 >>> indices = [] 

119 >>> data = [] 

120 >>> vocabulary = {} 

121 >>> for d in docs: 

122 ... for term in d: 

123 ... index = vocabulary.setdefault(term, len(vocabulary)) 

124 ... indices.append(index) 

125 ... data.append(1) 

126 ... indptr.append(len(indices)) 

127 ... 

128 >>> csr_matrix((data, indices, indptr), dtype=int).toarray() 

129 array([[2, 1, 0, 0], 

130 [0, 1, 1, 1]]) 

131 

132 """ 

133 format = 'csr' 

134 

135 def transpose(self, axes=None, copy=False): 

136 if axes is not None: 

137 raise ValueError(("Sparse matrices do not support " 

138 "an 'axes' parameter because swapping " 

139 "dimensions is the only logical permutation.")) 

140 

141 M, N = self.shape 

142 return self._csc_container((self.data, self.indices, 

143 self.indptr), shape=(N, M), copy=copy) 

144 

145 transpose.__doc__ = spmatrix.transpose.__doc__ 

146 

147 def tolil(self, copy=False): 

148 lil = self._lil_container(self.shape, dtype=self.dtype) 

149 

150 self.sum_duplicates() 

151 ptr,ind,dat = self.indptr,self.indices,self.data 

152 rows, data = lil.rows, lil.data 

153 

154 for n in range(self.shape[0]): 

155 start = ptr[n] 

156 end = ptr[n+1] 

157 rows[n] = ind[start:end].tolist() 

158 data[n] = dat[start:end].tolist() 

159 

160 return lil 

161 

162 tolil.__doc__ = spmatrix.tolil.__doc__ 

163 

164 def tocsr(self, copy=False): 

165 if copy: 

166 return self.copy() 

167 else: 

168 return self 

169 

170 tocsr.__doc__ = spmatrix.tocsr.__doc__ 

171 

172 def tocsc(self, copy=False): 

173 idx_dtype = get_index_dtype((self.indptr, self.indices), 

174 maxval=max(self.nnz, self.shape[0])) 

175 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype) 

176 indices = np.empty(self.nnz, dtype=idx_dtype) 

177 data = np.empty(self.nnz, dtype=upcast(self.dtype)) 

178 

179 csr_tocsc(self.shape[0], self.shape[1], 

180 self.indptr.astype(idx_dtype), 

181 self.indices.astype(idx_dtype), 

182 self.data, 

183 indptr, 

184 indices, 

185 data) 

186 

187 A = self._csc_container((data, indices, indptr), shape=self.shape) 

188 A.has_sorted_indices = True 

189 return A 

190 

191 tocsc.__doc__ = spmatrix.tocsc.__doc__ 

192 

193 def tobsr(self, blocksize=None, copy=True): 

194 if blocksize is None: 

195 from ._spfuncs import estimate_blocksize 

196 return self.tobsr(blocksize=estimate_blocksize(self)) 

197 

198 elif blocksize == (1,1): 

199 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr) 

200 return self._bsr_container(arg1, shape=self.shape, copy=copy) 

201 

202 else: 

203 R,C = blocksize 

204 M,N = self.shape 

205 

206 if R < 1 or C < 1 or M % R != 0 or N % C != 0: 

207 raise ValueError('invalid blocksize %s' % blocksize) 

208 

209 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices) 

210 

211 idx_dtype = get_index_dtype((self.indptr, self.indices), 

212 maxval=max(N//C, blks)) 

213 indptr = np.empty(M//R+1, dtype=idx_dtype) 

214 indices = np.empty(blks, dtype=idx_dtype) 

215 data = np.zeros((blks,R,C), dtype=self.dtype) 

216 

217 csr_tobsr(M, N, R, C, 

218 self.indptr.astype(idx_dtype), 

219 self.indices.astype(idx_dtype), 

220 self.data, 

221 indptr, indices, data.ravel()) 

222 

223 return self._bsr_container( 

224 (data, indices, indptr), shape=self.shape 

225 ) 

226 

227 tobsr.__doc__ = spmatrix.tobsr.__doc__ 

228 

229 # these functions are used by the parent class (_cs_matrix) 

230 # to remove redundancy between csc_matrix and csr_matrix 

231 def _swap(self, x): 

232 """swap the members of x if this is a column-oriented matrix 

233 """ 

234 return x 

235 

236 def __iter__(self): 

237 indptr = np.zeros(2, dtype=self.indptr.dtype) 

238 shape = (1, self.shape[1]) 

239 i0 = 0 

240 for i1 in self.indptr[1:]: 

241 indptr[1] = i1 - i0 

242 indices = self.indices[i0:i1] 

243 data = self.data[i0:i1] 

244 yield self.__class__( 

245 (data, indices, indptr), shape=shape, copy=True 

246 ) 

247 i0 = i1 

248 

249 def getrow(self, i): 

250 """Returns a copy of row i of the matrix, as a (1 x n) 

251 CSR matrix (row vector). 

252 """ 

253 M, N = self.shape 

254 i = int(i) 

255 if i < 0: 

256 i += M 

257 if i < 0 or i >= M: 

258 raise IndexError('index (%d) out of range' % i) 

259 indptr, indices, data = get_csr_submatrix( 

260 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N) 

261 return self.__class__((data, indices, indptr), shape=(1, N), 

262 dtype=self.dtype, copy=False) 

263 

264 def getcol(self, i): 

265 """Returns a copy of column i of the matrix, as a (m x 1) 

266 CSR matrix (column vector). 

267 """ 

268 M, N = self.shape 

269 i = int(i) 

270 if i < 0: 

271 i += N 

272 if i < 0 or i >= N: 

273 raise IndexError('index (%d) out of range' % i) 

274 indptr, indices, data = get_csr_submatrix( 

275 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1) 

276 return self.__class__((data, indices, indptr), shape=(M, 1), 

277 dtype=self.dtype, copy=False) 

278 

279 def _get_intXarray(self, row, col): 

280 return self.getrow(row)._minor_index_fancy(col) 

281 

282 def _get_intXslice(self, row, col): 

283 if col.step in (1, None): 

284 return self._get_submatrix(row, col, copy=True) 

285 # TODO: uncomment this once it's faster: 

286 # return self.getrow(row)._minor_slice(col) 

287 

288 M, N = self.shape 

289 start, stop, stride = col.indices(N) 

290 

291 ii, jj = self.indptr[row:row+2] 

292 row_indices = self.indices[ii:jj] 

293 row_data = self.data[ii:jj] 

294 

295 if stride > 0: 

296 ind = (row_indices >= start) & (row_indices < stop) 

297 else: 

298 ind = (row_indices <= start) & (row_indices > stop) 

299 

300 if abs(stride) > 1: 

301 ind &= (row_indices - start) % stride == 0 

302 

303 row_indices = (row_indices[ind] - start) // stride 

304 row_data = row_data[ind] 

305 row_indptr = np.array([0, len(row_indices)]) 

306 

307 if stride < 0: 

308 row_data = row_data[::-1] 

309 row_indices = abs(row_indices[::-1]) 

310 

311 shape = (1, max(0, int(np.ceil(float(stop - start) / stride)))) 

312 return self.__class__((row_data, row_indices, row_indptr), shape=shape, 

313 dtype=self.dtype, copy=False) 

314 

315 def _get_sliceXint(self, row, col): 

316 if row.step in (1, None): 

317 return self._get_submatrix(row, col, copy=True) 

318 return self._major_slice(row)._get_submatrix(minor=col) 

319 

320 def _get_sliceXarray(self, row, col): 

321 return self._major_slice(row)._minor_index_fancy(col) 

322 

323 def _get_arrayXint(self, row, col): 

324 return self._major_index_fancy(row)._get_submatrix(minor=col) 

325 

326 def _get_arrayXslice(self, row, col): 

327 if col.step not in (1, None): 

328 col = np.arange(*col.indices(self.shape[1])) 

329 return self._get_arrayXarray(row, col) 

330 return self._major_index_fancy(row)._get_submatrix(minor=col) 

331 

332 

333def isspmatrix_csr(x): 

334 """Is x of csr_matrix type? 

335 

336 Parameters 

337 ---------- 

338 x 

339 object to check for being a csr matrix 

340 

341 Returns 

342 ------- 

343 bool 

344 True if x is a csr matrix, False otherwise 

345 

346 Examples 

347 -------- 

348 >>> from scipy.sparse import csr_matrix, isspmatrix_csr 

349 >>> isspmatrix_csr(csr_matrix([[5]])) 

350 True 

351 

352 >>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc 

353 >>> isspmatrix_csr(csc_matrix([[5]])) 

354 False 

355 """ 

356 from ._arrays import csr_array 

357 return isinstance(x, csr_matrix) or isinstance(x, csr_array)