Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_csr.py: 25%
132 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1"""Compressed Sparse Row matrix format"""
3__docformat__ = "restructuredtext en"
5__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr']
7import numpy as np
9from ._matrix import spmatrix
10from ._base import _spbase, sparray
11from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
12 get_csr_submatrix)
13from ._sputils import upcast
15from ._compressed import _cs_matrix
18class _csr_base(_cs_matrix):
19 _format = 'csr'
21 def transpose(self, axes=None, copy=False):
22 if axes is not None and axes != (1, 0):
23 raise ValueError("Sparse arrays/matrices do not support "
24 "an 'axes' parameter because swapping "
25 "dimensions is the only logical permutation.")
27 M, N = self.shape
28 return self._csc_container((self.data, self.indices,
29 self.indptr), shape=(N, M), copy=copy)
31 transpose.__doc__ = _spbase.transpose.__doc__
33 def tolil(self, copy=False):
34 lil = self._lil_container(self.shape, dtype=self.dtype)
36 self.sum_duplicates()
37 ptr,ind,dat = self.indptr,self.indices,self.data
38 rows, data = lil.rows, lil.data
40 for n in range(self.shape[0]):
41 start = ptr[n]
42 end = ptr[n+1]
43 rows[n] = ind[start:end].tolist()
44 data[n] = dat[start:end].tolist()
46 return lil
48 tolil.__doc__ = _spbase.tolil.__doc__
50 def tocsr(self, copy=False):
51 if copy:
52 return self.copy()
53 else:
54 return self
56 tocsr.__doc__ = _spbase.tocsr.__doc__
58 def tocsc(self, copy=False):
59 idx_dtype = self._get_index_dtype((self.indptr, self.indices),
60 maxval=max(self.nnz, self.shape[0]))
61 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)
62 indices = np.empty(self.nnz, dtype=idx_dtype)
63 data = np.empty(self.nnz, dtype=upcast(self.dtype))
65 csr_tocsc(self.shape[0], self.shape[1],
66 self.indptr.astype(idx_dtype),
67 self.indices.astype(idx_dtype),
68 self.data,
69 indptr,
70 indices,
71 data)
73 A = self._csc_container((data, indices, indptr), shape=self.shape)
74 A.has_sorted_indices = True
75 return A
77 tocsc.__doc__ = _spbase.tocsc.__doc__
79 def tobsr(self, blocksize=None, copy=True):
80 if blocksize is None:
81 from ._spfuncs import estimate_blocksize
82 return self.tobsr(blocksize=estimate_blocksize(self))
84 elif blocksize == (1,1):
85 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
86 return self._bsr_container(arg1, shape=self.shape, copy=copy)
88 else:
89 R,C = blocksize
90 M,N = self.shape
92 if R < 1 or C < 1 or M % R != 0 or N % C != 0:
93 raise ValueError('invalid blocksize %s' % blocksize)
95 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
97 idx_dtype = self._get_index_dtype((self.indptr, self.indices),
98 maxval=max(N//C, blks))
99 indptr = np.empty(M//R+1, dtype=idx_dtype)
100 indices = np.empty(blks, dtype=idx_dtype)
101 data = np.zeros((blks,R,C), dtype=self.dtype)
103 csr_tobsr(M, N, R, C,
104 self.indptr.astype(idx_dtype),
105 self.indices.astype(idx_dtype),
106 self.data,
107 indptr, indices, data.ravel())
109 return self._bsr_container(
110 (data, indices, indptr), shape=self.shape
111 )
113 tobsr.__doc__ = _spbase.tobsr.__doc__
115 # these functions are used by the parent class (_cs_matrix)
116 # to remove redundancy between csc_matrix and csr_array
117 def _swap(self, x):
118 """swap the members of x if this is a column-oriented matrix
119 """
120 return x
122 def __iter__(self):
123 indptr = np.zeros(2, dtype=self.indptr.dtype)
124 shape = (1, self.shape[1])
125 i0 = 0
126 for i1 in self.indptr[1:]:
127 indptr[1] = i1 - i0
128 indices = self.indices[i0:i1]
129 data = self.data[i0:i1]
130 yield self.__class__(
131 (data, indices, indptr), shape=shape, copy=True
132 )
133 i0 = i1
135 def _getrow(self, i):
136 """Returns a copy of row i of the matrix, as a (1 x n)
137 CSR matrix (row vector).
138 """
139 M, N = self.shape
140 i = int(i)
141 if i < 0:
142 i += M
143 if i < 0 or i >= M:
144 raise IndexError('index (%d) out of range' % i)
145 indptr, indices, data = get_csr_submatrix(
146 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
147 return self.__class__((data, indices, indptr), shape=(1, N),
148 dtype=self.dtype, copy=False)
150 def _getcol(self, i):
151 """Returns a copy of column i of the matrix, as a (m x 1)
152 CSR matrix (column vector).
153 """
154 M, N = self.shape
155 i = int(i)
156 if i < 0:
157 i += N
158 if i < 0 or i >= N:
159 raise IndexError('index (%d) out of range' % i)
160 indptr, indices, data = get_csr_submatrix(
161 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
162 return self.__class__((data, indices, indptr), shape=(M, 1),
163 dtype=self.dtype, copy=False)
165 def _get_intXarray(self, row, col):
166 return self._getrow(row)._minor_index_fancy(col)
168 def _get_intXslice(self, row, col):
169 if col.step in (1, None):
170 return self._get_submatrix(row, col, copy=True)
171 # TODO: uncomment this once it's faster:
172 # return self._getrow(row)._minor_slice(col)
174 M, N = self.shape
175 start, stop, stride = col.indices(N)
177 ii, jj = self.indptr[row:row+2]
178 row_indices = self.indices[ii:jj]
179 row_data = self.data[ii:jj]
181 if stride > 0:
182 ind = (row_indices >= start) & (row_indices < stop)
183 else:
184 ind = (row_indices <= start) & (row_indices > stop)
186 if abs(stride) > 1:
187 ind &= (row_indices - start) % stride == 0
189 row_indices = (row_indices[ind] - start) // stride
190 row_data = row_data[ind]
191 row_indptr = np.array([0, len(row_indices)])
193 if stride < 0:
194 row_data = row_data[::-1]
195 row_indices = abs(row_indices[::-1])
197 shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))
198 return self.__class__((row_data, row_indices, row_indptr), shape=shape,
199 dtype=self.dtype, copy=False)
201 def _get_sliceXint(self, row, col):
202 if row.step in (1, None):
203 return self._get_submatrix(row, col, copy=True)
204 return self._major_slice(row)._get_submatrix(minor=col)
206 def _get_sliceXarray(self, row, col):
207 return self._major_slice(row)._minor_index_fancy(col)
209 def _get_arrayXint(self, row, col):
210 return self._major_index_fancy(row)._get_submatrix(minor=col)
212 def _get_arrayXslice(self, row, col):
213 if col.step not in (1, None):
214 col = np.arange(*col.indices(self.shape[1]))
215 return self._get_arrayXarray(row, col)
216 return self._major_index_fancy(row)._get_submatrix(minor=col)
219def isspmatrix_csr(x):
220 """Is `x` of csr_matrix type?
222 Parameters
223 ----------
224 x
225 object to check for being a csr matrix
227 Returns
228 -------
229 bool
230 True if `x` is a csr matrix, False otherwise
232 Examples
233 --------
234 >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr
235 >>> isspmatrix_csr(csr_matrix([[5]]))
236 True
237 >>> isspmatrix_csr(csr_array([[5]]))
238 False
239 >>> isspmatrix_csr(coo_matrix([[5]]))
240 False
241 """
242 return isinstance(x, csr_matrix)
245# This namespace class separates array from matrix with isinstance
246class csr_array(_csr_base, sparray):
247 """
248 Compressed Sparse Row array.
250 This can be instantiated in several ways:
251 csr_array(D)
252 where D is a 2-D ndarray
254 csr_array(S)
255 with another sparse array or matrix S (equivalent to S.tocsr())
257 csr_array((M, N), [dtype])
258 to construct an empty array with shape (M, N)
259 dtype is optional, defaulting to dtype='d'.
261 csr_array((data, (row_ind, col_ind)), [shape=(M, N)])
262 where ``data``, ``row_ind`` and ``col_ind`` satisfy the
263 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
265 csr_array((data, indices, indptr), [shape=(M, N)])
266 is the standard CSR representation where the column indices for
267 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
268 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
269 If the shape parameter is not supplied, the array dimensions
270 are inferred from the index arrays.
272 Attributes
273 ----------
274 dtype : dtype
275 Data type of the array
276 shape : 2-tuple
277 Shape of the array
278 ndim : int
279 Number of dimensions (this is always 2)
280 nnz
281 size
282 data
283 CSR format data array of the array
284 indices
285 CSR format index array of the array
286 indptr
287 CSR format index pointer array of the array
288 has_sorted_indices
289 has_canonical_format
290 T
292 Notes
293 -----
295 Sparse arrays can be used in arithmetic operations: they support
296 addition, subtraction, multiplication, division, and matrix power.
298 Advantages of the CSR format
299 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
300 - efficient row slicing
301 - fast matrix vector products
303 Disadvantages of the CSR format
304 - slow column slicing operations (consider CSC)
305 - changes to the sparsity structure are expensive (consider LIL or DOK)
307 Canonical Format
308 - Within each row, indices are sorted by column.
309 - There are no duplicate entries.
311 Examples
312 --------
314 >>> import numpy as np
315 >>> from scipy.sparse import csr_array
316 >>> csr_array((3, 4), dtype=np.int8).toarray()
317 array([[0, 0, 0, 0],
318 [0, 0, 0, 0],
319 [0, 0, 0, 0]], dtype=int8)
321 >>> row = np.array([0, 0, 1, 2, 2, 2])
322 >>> col = np.array([0, 2, 2, 0, 1, 2])
323 >>> data = np.array([1, 2, 3, 4, 5, 6])
324 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
325 array([[1, 0, 2],
326 [0, 0, 3],
327 [4, 5, 6]])
329 >>> indptr = np.array([0, 2, 3, 6])
330 >>> indices = np.array([0, 2, 2, 0, 1, 2])
331 >>> data = np.array([1, 2, 3, 4, 5, 6])
332 >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()
333 array([[1, 0, 2],
334 [0, 0, 3],
335 [4, 5, 6]])
337 Duplicate entries are summed together:
339 >>> row = np.array([0, 1, 2, 0])
340 >>> col = np.array([0, 1, 1, 0])
341 >>> data = np.array([1, 2, 4, 8])
342 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
343 array([[9, 0, 0],
344 [0, 2, 0],
345 [0, 4, 0]])
347 As an example of how to construct a CSR array incrementally,
348 the following snippet builds a term-document array from texts:
350 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
351 >>> indptr = [0]
352 >>> indices = []
353 >>> data = []
354 >>> vocabulary = {}
355 >>> for d in docs:
356 ... for term in d:
357 ... index = vocabulary.setdefault(term, len(vocabulary))
358 ... indices.append(index)
359 ... data.append(1)
360 ... indptr.append(len(indices))
361 ...
362 >>> csr_array((data, indices, indptr), dtype=int).toarray()
363 array([[2, 1, 0, 0],
364 [0, 1, 1, 1]])
366 """
369class csr_matrix(spmatrix, _csr_base):
370 """
371 Compressed Sparse Row matrix.
373 This can be instantiated in several ways:
374 csr_matrix(D)
375 where D is a 2-D ndarray
377 csr_matrix(S)
378 with another sparse array or matrix S (equivalent to S.tocsr())
380 csr_matrix((M, N), [dtype])
381 to construct an empty matrix with shape (M, N)
382 dtype is optional, defaulting to dtype='d'.
384 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
385 where ``data``, ``row_ind`` and ``col_ind`` satisfy the
386 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
388 csr_matrix((data, indices, indptr), [shape=(M, N)])
389 is the standard CSR representation where the column indices for
390 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
391 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
392 If the shape parameter is not supplied, the matrix dimensions
393 are inferred from the index arrays.
395 Attributes
396 ----------
397 dtype : dtype
398 Data type of the matrix
399 shape : 2-tuple
400 Shape of the matrix
401 ndim : int
402 Number of dimensions (this is always 2)
403 nnz
404 size
405 data
406 CSR format data array of the matrix
407 indices
408 CSR format index array of the matrix
409 indptr
410 CSR format index pointer array of the matrix
411 has_sorted_indices
412 has_canonical_format
413 T
415 Notes
416 -----
418 Sparse matrices can be used in arithmetic operations: they support
419 addition, subtraction, multiplication, division, and matrix power.
421 Advantages of the CSR format
422 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
423 - efficient row slicing
424 - fast matrix vector products
426 Disadvantages of the CSR format
427 - slow column slicing operations (consider CSC)
428 - changes to the sparsity structure are expensive (consider LIL or DOK)
430 Canonical Format
431 - Within each row, indices are sorted by column.
432 - There are no duplicate entries.
434 Examples
435 --------
437 >>> import numpy as np
438 >>> from scipy.sparse import csr_matrix
439 >>> csr_matrix((3, 4), dtype=np.int8).toarray()
440 array([[0, 0, 0, 0],
441 [0, 0, 0, 0],
442 [0, 0, 0, 0]], dtype=int8)
444 >>> row = np.array([0, 0, 1, 2, 2, 2])
445 >>> col = np.array([0, 2, 2, 0, 1, 2])
446 >>> data = np.array([1, 2, 3, 4, 5, 6])
447 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
448 array([[1, 0, 2],
449 [0, 0, 3],
450 [4, 5, 6]])
452 >>> indptr = np.array([0, 2, 3, 6])
453 >>> indices = np.array([0, 2, 2, 0, 1, 2])
454 >>> data = np.array([1, 2, 3, 4, 5, 6])
455 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
456 array([[1, 0, 2],
457 [0, 0, 3],
458 [4, 5, 6]])
460 Duplicate entries are summed together:
462 >>> row = np.array([0, 1, 2, 0])
463 >>> col = np.array([0, 1, 1, 0])
464 >>> data = np.array([1, 2, 4, 8])
465 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
466 array([[9, 0, 0],
467 [0, 2, 0],
468 [0, 4, 0]])
470 As an example of how to construct a CSR matrix incrementally,
471 the following snippet builds a term-document matrix from texts:
473 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
474 >>> indptr = [0]
475 >>> indices = []
476 >>> data = []
477 >>> vocabulary = {}
478 >>> for d in docs:
479 ... for term in d:
480 ... index = vocabulary.setdefault(term, len(vocabulary))
481 ... indices.append(index)
482 ... data.append(1)
483 ... indptr.append(len(indices))
484 ...
485 >>> csr_matrix((data, indices, indptr), dtype=int).toarray()
486 array([[2, 1, 0, 0],
487 [0, 1, 1, 1]])
489 """