Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_csr.py: 26%
133 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-03-22 06:44 +0000
1"""Compressed Sparse Row matrix format"""
3__docformat__ = "restructuredtext en"
5__all__ = ['csr_array', 'csr_matrix', 'isspmatrix_csr']
7import numpy as np
9from ._matrix import spmatrix
10from ._base import _spbase, sparray
11from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks,
12 get_csr_submatrix)
13from ._sputils import upcast
15from ._compressed import _cs_matrix
18class _csr_base(_cs_matrix):
19 _format = 'csr'
21 def transpose(self, axes=None, copy=False):
22 if axes is not None and axes != (1, 0):
23 raise ValueError("Sparse arrays/matrices do not support "
24 "an 'axes' parameter because swapping "
25 "dimensions is the only logical permutation.")
27 M, N = self.shape
28 return self._csc_container((self.data, self.indices,
29 self.indptr), shape=(N, M), copy=copy)
31 transpose.__doc__ = _spbase.transpose.__doc__
33 def tolil(self, copy=False):
34 lil = self._lil_container(self.shape, dtype=self.dtype)
36 self.sum_duplicates()
37 ptr,ind,dat = self.indptr,self.indices,self.data
38 rows, data = lil.rows, lil.data
40 for n in range(self.shape[0]):
41 start = ptr[n]
42 end = ptr[n+1]
43 rows[n] = ind[start:end].tolist()
44 data[n] = dat[start:end].tolist()
46 return lil
48 tolil.__doc__ = _spbase.tolil.__doc__
50 def tocsr(self, copy=False):
51 if copy:
52 return self.copy()
53 else:
54 return self
56 tocsr.__doc__ = _spbase.tocsr.__doc__
58 def tocsc(self, copy=False):
59 idx_dtype = self._get_index_dtype((self.indptr, self.indices),
60 maxval=max(self.nnz, self.shape[0]))
61 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)
62 indices = np.empty(self.nnz, dtype=idx_dtype)
63 data = np.empty(self.nnz, dtype=upcast(self.dtype))
65 csr_tocsc(self.shape[0], self.shape[1],
66 self.indptr.astype(idx_dtype),
67 self.indices.astype(idx_dtype),
68 self.data,
69 indptr,
70 indices,
71 data)
73 A = self._csc_container((data, indices, indptr), shape=self.shape)
74 A.has_sorted_indices = True
75 return A
77 tocsc.__doc__ = _spbase.tocsc.__doc__
79 def tobsr(self, blocksize=None, copy=True):
80 if blocksize is None:
81 from ._spfuncs import estimate_blocksize
82 return self.tobsr(blocksize=estimate_blocksize(self))
84 elif blocksize == (1,1):
85 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)
86 return self._bsr_container(arg1, shape=self.shape, copy=copy)
88 else:
89 R,C = blocksize
90 M,N = self.shape
92 if R < 1 or C < 1 or M % R != 0 or N % C != 0:
93 raise ValueError('invalid blocksize %s' % blocksize)
95 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices)
97 idx_dtype = self._get_index_dtype((self.indptr, self.indices),
98 maxval=max(N//C, blks))
99 indptr = np.empty(M//R+1, dtype=idx_dtype)
100 indices = np.empty(blks, dtype=idx_dtype)
101 data = np.zeros((blks,R,C), dtype=self.dtype)
103 csr_tobsr(M, N, R, C,
104 self.indptr.astype(idx_dtype),
105 self.indices.astype(idx_dtype),
106 self.data,
107 indptr, indices, data.ravel())
109 return self._bsr_container(
110 (data, indices, indptr), shape=self.shape
111 )
113 tobsr.__doc__ = _spbase.tobsr.__doc__
115 # these functions are used by the parent class (_cs_matrix)
116 # to remove redundancy between csc_matrix and csr_array
117 @staticmethod
118 def _swap(x):
119 """swap the members of x if this is a column-oriented matrix
120 """
121 return x
123 def __iter__(self):
124 indptr = np.zeros(2, dtype=self.indptr.dtype)
125 shape = (1, self.shape[1])
126 i0 = 0
127 for i1 in self.indptr[1:]:
128 indptr[1] = i1 - i0
129 indices = self.indices[i0:i1]
130 data = self.data[i0:i1]
131 yield self.__class__(
132 (data, indices, indptr), shape=shape, copy=True
133 )
134 i0 = i1
136 def _getrow(self, i):
137 """Returns a copy of row i of the matrix, as a (1 x n)
138 CSR matrix (row vector).
139 """
140 M, N = self.shape
141 i = int(i)
142 if i < 0:
143 i += M
144 if i < 0 or i >= M:
145 raise IndexError('index (%d) out of range' % i)
146 indptr, indices, data = get_csr_submatrix(
147 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N)
148 return self.__class__((data, indices, indptr), shape=(1, N),
149 dtype=self.dtype, copy=False)
151 def _getcol(self, i):
152 """Returns a copy of column i of the matrix, as a (m x 1)
153 CSR matrix (column vector).
154 """
155 M, N = self.shape
156 i = int(i)
157 if i < 0:
158 i += N
159 if i < 0 or i >= N:
160 raise IndexError('index (%d) out of range' % i)
161 indptr, indices, data = get_csr_submatrix(
162 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1)
163 return self.__class__((data, indices, indptr), shape=(M, 1),
164 dtype=self.dtype, copy=False)
166 def _get_intXarray(self, row, col):
167 return self._getrow(row)._minor_index_fancy(col)
169 def _get_intXslice(self, row, col):
170 if col.step in (1, None):
171 return self._get_submatrix(row, col, copy=True)
172 # TODO: uncomment this once it's faster:
173 # return self._getrow(row)._minor_slice(col)
175 M, N = self.shape
176 start, stop, stride = col.indices(N)
178 ii, jj = self.indptr[row:row+2]
179 row_indices = self.indices[ii:jj]
180 row_data = self.data[ii:jj]
182 if stride > 0:
183 ind = (row_indices >= start) & (row_indices < stop)
184 else:
185 ind = (row_indices <= start) & (row_indices > stop)
187 if abs(stride) > 1:
188 ind &= (row_indices - start) % stride == 0
190 row_indices = (row_indices[ind] - start) // stride
191 row_data = row_data[ind]
192 row_indptr = np.array([0, len(row_indices)])
194 if stride < 0:
195 row_data = row_data[::-1]
196 row_indices = abs(row_indices[::-1])
198 shape = (1, max(0, int(np.ceil(float(stop - start) / stride))))
199 return self.__class__((row_data, row_indices, row_indptr), shape=shape,
200 dtype=self.dtype, copy=False)
202 def _get_sliceXint(self, row, col):
203 if row.step in (1, None):
204 return self._get_submatrix(row, col, copy=True)
205 return self._major_slice(row)._get_submatrix(minor=col)
207 def _get_sliceXarray(self, row, col):
208 return self._major_slice(row)._minor_index_fancy(col)
210 def _get_arrayXint(self, row, col):
211 return self._major_index_fancy(row)._get_submatrix(minor=col)
213 def _get_arrayXslice(self, row, col):
214 if col.step not in (1, None):
215 col = np.arange(*col.indices(self.shape[1]))
216 return self._get_arrayXarray(row, col)
217 return self._major_index_fancy(row)._get_submatrix(minor=col)
220def isspmatrix_csr(x):
221 """Is `x` of csr_matrix type?
223 Parameters
224 ----------
225 x
226 object to check for being a csr matrix
228 Returns
229 -------
230 bool
231 True if `x` is a csr matrix, False otherwise
233 Examples
234 --------
235 >>> from scipy.sparse import csr_array, csr_matrix, coo_matrix, isspmatrix_csr
236 >>> isspmatrix_csr(csr_matrix([[5]]))
237 True
238 >>> isspmatrix_csr(csr_array([[5]]))
239 False
240 >>> isspmatrix_csr(coo_matrix([[5]]))
241 False
242 """
243 return isinstance(x, csr_matrix)
246# This namespace class separates array from matrix with isinstance
247class csr_array(_csr_base, sparray):
248 """
249 Compressed Sparse Row array.
251 This can be instantiated in several ways:
252 csr_array(D)
253 where D is a 2-D ndarray
255 csr_array(S)
256 with another sparse array or matrix S (equivalent to S.tocsr())
258 csr_array((M, N), [dtype])
259 to construct an empty array with shape (M, N)
260 dtype is optional, defaulting to dtype='d'.
262 csr_array((data, (row_ind, col_ind)), [shape=(M, N)])
263 where ``data``, ``row_ind`` and ``col_ind`` satisfy the
264 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
266 csr_array((data, indices, indptr), [shape=(M, N)])
267 is the standard CSR representation where the column indices for
268 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
269 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
270 If the shape parameter is not supplied, the array dimensions
271 are inferred from the index arrays.
273 Attributes
274 ----------
275 dtype : dtype
276 Data type of the array
277 shape : 2-tuple
278 Shape of the array
279 ndim : int
280 Number of dimensions (this is always 2)
281 nnz
282 size
283 data
284 CSR format data array of the array
285 indices
286 CSR format index array of the array
287 indptr
288 CSR format index pointer array of the array
289 has_sorted_indices
290 has_canonical_format
291 T
293 Notes
294 -----
296 Sparse arrays can be used in arithmetic operations: they support
297 addition, subtraction, multiplication, division, and matrix power.
299 Advantages of the CSR format
300 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
301 - efficient row slicing
302 - fast matrix vector products
304 Disadvantages of the CSR format
305 - slow column slicing operations (consider CSC)
306 - changes to the sparsity structure are expensive (consider LIL or DOK)
308 Canonical Format
309 - Within each row, indices are sorted by column.
310 - There are no duplicate entries.
312 Examples
313 --------
315 >>> import numpy as np
316 >>> from scipy.sparse import csr_array
317 >>> csr_array((3, 4), dtype=np.int8).toarray()
318 array([[0, 0, 0, 0],
319 [0, 0, 0, 0],
320 [0, 0, 0, 0]], dtype=int8)
322 >>> row = np.array([0, 0, 1, 2, 2, 2])
323 >>> col = np.array([0, 2, 2, 0, 1, 2])
324 >>> data = np.array([1, 2, 3, 4, 5, 6])
325 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
326 array([[1, 0, 2],
327 [0, 0, 3],
328 [4, 5, 6]])
330 >>> indptr = np.array([0, 2, 3, 6])
331 >>> indices = np.array([0, 2, 2, 0, 1, 2])
332 >>> data = np.array([1, 2, 3, 4, 5, 6])
333 >>> csr_array((data, indices, indptr), shape=(3, 3)).toarray()
334 array([[1, 0, 2],
335 [0, 0, 3],
336 [4, 5, 6]])
338 Duplicate entries are summed together:
340 >>> row = np.array([0, 1, 2, 0])
341 >>> col = np.array([0, 1, 1, 0])
342 >>> data = np.array([1, 2, 4, 8])
343 >>> csr_array((data, (row, col)), shape=(3, 3)).toarray()
344 array([[9, 0, 0],
345 [0, 2, 0],
346 [0, 4, 0]])
348 As an example of how to construct a CSR array incrementally,
349 the following snippet builds a term-document array from texts:
351 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
352 >>> indptr = [0]
353 >>> indices = []
354 >>> data = []
355 >>> vocabulary = {}
356 >>> for d in docs:
357 ... for term in d:
358 ... index = vocabulary.setdefault(term, len(vocabulary))
359 ... indices.append(index)
360 ... data.append(1)
361 ... indptr.append(len(indices))
362 ...
363 >>> csr_array((data, indices, indptr), dtype=int).toarray()
364 array([[2, 1, 0, 0],
365 [0, 1, 1, 1]])
367 """
370class csr_matrix(spmatrix, _csr_base):
371 """
372 Compressed Sparse Row matrix.
374 This can be instantiated in several ways:
375 csr_matrix(D)
376 where D is a 2-D ndarray
378 csr_matrix(S)
379 with another sparse array or matrix S (equivalent to S.tocsr())
381 csr_matrix((M, N), [dtype])
382 to construct an empty matrix with shape (M, N)
383 dtype is optional, defaulting to dtype='d'.
385 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)])
386 where ``data``, ``row_ind`` and ``col_ind`` satisfy the
387 relationship ``a[row_ind[k], col_ind[k]] = data[k]``.
389 csr_matrix((data, indices, indptr), [shape=(M, N)])
390 is the standard CSR representation where the column indices for
391 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their
392 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``.
393 If the shape parameter is not supplied, the matrix dimensions
394 are inferred from the index arrays.
396 Attributes
397 ----------
398 dtype : dtype
399 Data type of the matrix
400 shape : 2-tuple
401 Shape of the matrix
402 ndim : int
403 Number of dimensions (this is always 2)
404 nnz
405 size
406 data
407 CSR format data array of the matrix
408 indices
409 CSR format index array of the matrix
410 indptr
411 CSR format index pointer array of the matrix
412 has_sorted_indices
413 has_canonical_format
414 T
416 Notes
417 -----
419 Sparse matrices can be used in arithmetic operations: they support
420 addition, subtraction, multiplication, division, and matrix power.
422 Advantages of the CSR format
423 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc.
424 - efficient row slicing
425 - fast matrix vector products
427 Disadvantages of the CSR format
428 - slow column slicing operations (consider CSC)
429 - changes to the sparsity structure are expensive (consider LIL or DOK)
431 Canonical Format
432 - Within each row, indices are sorted by column.
433 - There are no duplicate entries.
435 Examples
436 --------
438 >>> import numpy as np
439 >>> from scipy.sparse import csr_matrix
440 >>> csr_matrix((3, 4), dtype=np.int8).toarray()
441 array([[0, 0, 0, 0],
442 [0, 0, 0, 0],
443 [0, 0, 0, 0]], dtype=int8)
445 >>> row = np.array([0, 0, 1, 2, 2, 2])
446 >>> col = np.array([0, 2, 2, 0, 1, 2])
447 >>> data = np.array([1, 2, 3, 4, 5, 6])
448 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
449 array([[1, 0, 2],
450 [0, 0, 3],
451 [4, 5, 6]])
453 >>> indptr = np.array([0, 2, 3, 6])
454 >>> indices = np.array([0, 2, 2, 0, 1, 2])
455 >>> data = np.array([1, 2, 3, 4, 5, 6])
456 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()
457 array([[1, 0, 2],
458 [0, 0, 3],
459 [4, 5, 6]])
461 Duplicate entries are summed together:
463 >>> row = np.array([0, 1, 2, 0])
464 >>> col = np.array([0, 1, 1, 0])
465 >>> data = np.array([1, 2, 4, 8])
466 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray()
467 array([[9, 0, 0],
468 [0, 2, 0],
469 [0, 4, 0]])
471 As an example of how to construct a CSR matrix incrementally,
472 the following snippet builds a term-document matrix from texts:
474 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]]
475 >>> indptr = [0]
476 >>> indices = []
477 >>> data = []
478 >>> vocabulary = {}
479 >>> for d in docs:
480 ... for term in d:
481 ... index = vocabulary.setdefault(term, len(vocabulary))
482 ... indices.append(index)
483 ... data.append(1)
484 ... indptr.append(len(indices))
485 ...
486 >>> csr_matrix((data, indices, indptr), dtype=int).toarray()
487 array([[2, 1, 0, 0],
488 [0, 1, 1, 1]])
490 """