Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_csr.py: 24%
130 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""Compressed Sparse Row matrix format"""
3__docformat__ = "restructuredtext en"
5__all__ = ['csr_matrix', 'isspmatrix_csr']
7import numpy as np
9from ._base import spmatrix
10from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
11 get_csr_submatrix)
12from ._sputils import upcast, get_index_dtype
14from ._compressed import _cs_matrix
17class csr_matrix(_cs_matrix):
18 """
19 Compressed Sparse Row matrix
21 This can be instantiated in several ways:
22 csr_matrix(D)
23 with a dense matrix or rank-2 ndarray D
25 csr_matrix(S)
26 with another sparse matrix S (equivalent to S.tocsr())
28 csr_matrix((M, N), [dtype])
29 to construct an empty matrix with shape (M, N)
30 dtype is optional, defaulting to dtype='d'.
32 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
33 where ``data``, ``row_ind`` and ``col_ind`` satisfy the
34 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
36 csr_matrix((data, indices, indptr), [shape=(M, N)])
37 is the standard CSR representation where the column indices for
38 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
39 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
40 If the shape parameter is not supplied, the matrix dimensions
41 are inferred from the index arrays.
43 Attributes
44 ----------
45 dtype : dtype
46 Data type of the matrix
47 shape : 2-tuple
48 Shape of the matrix
49 ndim : int
50 Number of dimensions (this is always 2)
51 nnz
52 Number of stored values, including explicit zeros
53 data
54 CSR format data array of the matrix
55 indices
56 CSR format index array of the matrix
57 indptr
58 CSR format index pointer array of the matrix
59 has_sorted_indices
60 Whether indices are sorted
62 Notes
63 -----
65 Sparse matrices can be used in arithmetic operations: they support
66 addition, subtraction, multiplication, division, and matrix power.
68 Advantages of the CSR format
69 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
70 - efficient row slicing
71 - fast matrix vector products
73 Disadvantages of the CSR format
74 - slow column slicing operations (consider CSC)
75 - changes to the sparsity structure are expensive (consider LIL or DOK)
77 Examples
78 --------
80 >>> import numpy as np
81 >>> from scipy.sparse import csr_matrix
82 >>> csr_matrix((3, 4), dtype=np.int8).toarray()
83 array([[0, 0, 0, 0],
84 [0, 0, 0, 0],
85 [0, 0, 0, 0]], dtype=int8)
87 >>> row = np.array([0, 0, 1, 2, 2, 2])
88 >>> col = np.array([0, 2, 2, 0, 1, 2])
89 >>> data = np.array([1, 2, 3, 4, 5, 6])
90 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
91 array([[1, 0, 2],
92 [0, 0, 3],
93 [4, 5, 6]])
95 >>> indptr = np.array([0, 2, 3, 6])
96 >>> indices = np.array([0, 2, 2, 0, 1, 2])
97 >>> data = np.array([1, 2, 3, 4, 5, 6])
98 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
99 array([[1, 0, 2],
100 [0, 0, 3],
101 [4, 5, 6]])
103 Duplicate entries are summed together:
105 >>> row = np.array([0, 1, 2, 0])
106 >>> col = np.array([0, 1, 1, 0])
107 >>> data = np.array([1, 2, 4, 8])
108 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
109 array([[9, 0, 0],
110 [0, 2, 0],
111 [0, 4, 0]])
113 As an example of how to construct a CSR matrix incrementally,
114 the following snippet builds a term-document matrix from texts:
116 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
117 >>> indptr = [0]
118 >>> indices = []
119 >>> data = []
120 >>> vocabulary = {}
121 >>> for d in docs:
122 ... for term in d:
123 ... index = vocabulary.setdefault(term, len(vocabulary))
124 ... indices.append(index)
125 ... data.append(1)
126 ... indptr.append(len(indices))
127 ...
128 >>> csr_matrix((data, indices, indptr), dtype=int).toarray()
129 array([[2, 1, 0, 0],
130 [0, 1, 1, 1]])
132 """
133 format = 'csr'
135 def transpose(self, axes=None, copy=False):
136 if axes is not None:
137 raise ValueError(("Sparse matrices do not support "
138 "an 'axes' parameter because swapping "
139 "dimensions is the only logical permutation."))
141 M, N = self.shape
142 return self._csc_container((self.data, self.indices,
143 self.indptr), shape=(N, M), copy=copy)
145 transpose.__doc__ = spmatrix.transpose.__doc__
147 def tolil(self, copy=False):
148 lil = self._lil_container(self.shape, dtype=self.dtype)
150 self.sum_duplicates()
151 ptr,ind,dat = self.indptr,self.indices,self.data
152 rows, data = lil.rows, lil.data
154 for n in range(self.shape[0]):
155 start = ptr[n]
156 end = ptr[n+1]
157 rows[n] = ind[start:end].tolist()
158 data[n] = dat[start:end].tolist()
160 return lil
162 tolil.__doc__ = spmatrix.tolil.__doc__
164 def tocsr(self, copy=False):
165 if copy:
166 return self.copy()
167 else:
168 return self
170 tocsr.__doc__ = spmatrix.tocsr.__doc__
172 def tocsc(self, copy=False):
173 idx_dtype = get_index_dtype((self.indptr, self.indices),
174 maxval=max(self.nnz, self.shape[0]))
175 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)
176 indices = np.empty(self.nnz, dtype=idx_dtype)
177 data = np.empty(self.nnz, dtype=upcast(self.dtype))
179 csr_tocsc(self.shape[0], self.shape[1],
180 self.indptr.astype(idx_dtype),
181 self.indices.astype(idx_dtype),
182 self.data,
183 indptr,
184 indices,
185 data)
187 A = self._csc_container((data, indices, indptr), shape=self.shape)
188 A.has_sorted_indices = True
189 return A
191 tocsc.__doc__ = spmatrix.tocsc.__doc__
193 def tobsr(self, blocksize=None, copy=True):
194 if blocksize is None:
195 from ._spfuncs import estimate_blocksize
196 return self.tobsr(blocksize=estimate_blocksize(self))
198 elif blocksize == (1,1):
199 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
200 return self._bsr_container(arg1, shape=self.shape, copy=copy)
202 else:
203 R,C = blocksize
204 M,N = self.shape
206 if R < 1 or C < 1 or M % R != 0 or N % C != 0:
207 raise ValueError('invalid blocksize %s' % blocksize)
209 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
211 idx_dtype = get_index_dtype((self.indptr, self.indices),
212 maxval=max(N//C, blks))
213 indptr = np.empty(M//R+1, dtype=idx_dtype)
214 indices = np.empty(blks, dtype=idx_dtype)
215 data = np.zeros((blks,R,C), dtype=self.dtype)
217 csr_tobsr(M, N, R, C,
218 self.indptr.astype(idx_dtype),
219 self.indices.astype(idx_dtype),
220 self.data,
221 indptr, indices, data.ravel())
223 return self._bsr_container(
224 (data, indices, indptr), shape=self.shape
225 )
227 tobsr.__doc__ = spmatrix.tobsr.__doc__
229 # these functions are used by the parent class (_cs_matrix)
230 # to remove redundancy between csc_matrix and csr_matrix
231 def _swap(self, x):
232 """swap the members of x if this is a column-oriented matrix
233 """
234 return x
236 def __iter__(self):
237 indptr = np.zeros(2, dtype=self.indptr.dtype)
238 shape = (1, self.shape[1])
239 i0 = 0
240 for i1 in self.indptr[1:]:
241 indptr[1] = i1 - i0
242 indices = self.indices[i0:i1]
243 data = self.data[i0:i1]
244 yield self.__class__(
245 (data, indices, indptr), shape=shape, copy=True
246 )
247 i0 = i1
249 def getrow(self, i):
250 """Returns a copy of row i of the matrix, as a (1 x n)
251 CSR matrix (row vector).
252 """
253 M, N = self.shape
254 i = int(i)
255 if i < 0:
256 i += M
257 if i < 0 or i >= M:
258 raise IndexError('index (%d) out of range' % i)
259 indptr, indices, data = get_csr_submatrix(
260 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
261 return self.__class__((data, indices, indptr), shape=(1, N),
262 dtype=self.dtype, copy=False)
264 def getcol(self, i):
265 """Returns a copy of column i of the matrix, as a (m x 1)
266 CSR matrix (column vector).
267 """
268 M, N = self.shape
269 i = int(i)
270 if i < 0:
271 i += N
272 if i < 0 or i >= N:
273 raise IndexError('index (%d) out of range' % i)
274 indptr, indices, data = get_csr_submatrix(
275 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
276 return self.__class__((data, indices, indptr), shape=(M, 1),
277 dtype=self.dtype, copy=False)
279 def _get_intXarray(self, row, col):
280 return self.getrow(row)._minor_index_fancy(col)
282 def _get_intXslice(self, row, col):
283 if col.step in (1, None):
284 return self._get_submatrix(row, col, copy=True)
285 # TODO: uncomment this once it's faster:
286 # return self.getrow(row)._minor_slice(col)
288 M, N = self.shape
289 start, stop, stride = col.indices(N)
291 ii, jj = self.indptr[row:row+2]
292 row_indices = self.indices[ii:jj]
293 row_data = self.data[ii:jj]
295 if stride > 0:
296 ind = (row_indices >= start) & (row_indices < stop)
297 else:
298 ind = (row_indices <= start) & (row_indices > stop)
300 if abs(stride) > 1:
301 ind &= (row_indices - start) % stride == 0
303 row_indices = (row_indices[ind] - start) // stride
304 row_data = row_data[ind]
305 row_indptr = np.array([0, len(row_indices)])
307 if stride < 0:
308 row_data = row_data[::-1]
309 row_indices = abs(row_indices[::-1])
311 shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))
312 return self.__class__((row_data, row_indices, row_indptr), shape=shape,
313 dtype=self.dtype, copy=False)
315 def _get_sliceXint(self, row, col):
316 if row.step in (1, None):
317 return self._get_submatrix(row, col, copy=True)
318 return self._major_slice(row)._get_submatrix(minor=col)
320 def _get_sliceXarray(self, row, col):
321 return self._major_slice(row)._minor_index_fancy(col)
323 def _get_arrayXint(self, row, col):
324 return self._major_index_fancy(row)._get_submatrix(minor=col)
326 def _get_arrayXslice(self, row, col):
327 if col.step not in (1, None):
328 col = np.arange(*col.indices(self.shape[1]))
329 return self._get_arrayXarray(row, col)
330 return self._major_index_fancy(row)._get_submatrix(minor=col)
333def isspmatrix_csr(x):
334 """Is x of csr_matrix type?
336 Parameters
337 ----------
338 x
339 object to check for being a csr matrix
341 Returns
342 -------
343 bool
344 True if x is a csr matrix, False otherwise
346 Examples
347 --------
348 >>> from scipy.sparse import csr_matrix, isspmatrix_csr
349 >>> isspmatrix_csr(csr_matrix([[5]]))
350 True
352 >>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc
353 >>> isspmatrix_csr(csc_matrix([[5]]))
354 False
355 """
356 from ._arrays import csr_array
357 return isinstance(x, csr_matrix) or isinstance(x, csr_array)