Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_coo.py: 14%
290 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1""" A sparse matrix in COOrdinate or 'triplet' format"""
3__docformat__ = "restructuredtext en"
5__all__ = ['coo_matrix', 'isspmatrix_coo']
7from warnings import warn
9import numpy as np
12from ._sparsetools import coo_tocsr, coo_todense, coo_matvec
13from ._base import isspmatrix, SparseEfficiencyWarning, spmatrix
14from ._data import _data_matrix, _minmax_mixin
15from ._sputils import (upcast, upcast_char, to_native, isshape, getdtype,
16 getdata, get_index_dtype, downcast_intp_index,
17 check_shape, check_reshape_kwargs)
19import operator
22class coo_matrix(_data_matrix, _minmax_mixin):
23 """
24 A sparse matrix in COOrdinate format.
26 Also known as the 'ijv' or 'triplet' format.
28 This can be instantiated in several ways:
29 coo_matrix(D)
30 with a dense matrix D
32 coo_matrix(S)
33 with another sparse matrix S (equivalent to S.tocoo())
35 coo_matrix((M, N), [dtype])
36 to construct an empty matrix with shape (M, N)
37 dtype is optional, defaulting to dtype='d'.
39 coo_matrix((data, (i, j)), [shape=(M, N)])
40 to construct from three arrays:
41 1. data[:] the entries of the matrix, in any order
42 2. i[:] the row indices of the matrix entries
43 3. j[:] the column indices of the matrix entries
45 Where ``A[i[k], j[k]] = data[k]``. When shape is not
46 specified, it is inferred from the index arrays
48 Attributes
49 ----------
50 dtype : dtype
51 Data type of the matrix
52 shape : 2-tuple
53 Shape of the matrix
54 ndim : int
55 Number of dimensions (this is always 2)
56 nnz
57 Number of stored values, including explicit zeros
58 data
59 COO format data array of the matrix
60 row
61 COO format row index array of the matrix
62 col
63 COO format column index array of the matrix
65 Notes
66 -----
68 Sparse matrices can be used in arithmetic operations: they support
69 addition, subtraction, multiplication, division, and matrix power.
71 Advantages of the COO format
72 - facilitates fast conversion among sparse formats
73 - permits duplicate entries (see example)
74 - very fast conversion to and from CSR/CSC formats
76 Disadvantages of the COO format
77 - does not directly support:
78 + arithmetic operations
79 + slicing
81 Intended Usage
82 - COO is a fast format for constructing sparse matrices
83 - Once a matrix has been constructed, convert to CSR or
84 CSC format for fast arithmetic and matrix vector operations
85 - By default when converting to CSR or CSC format, duplicate (i,j)
86 entries will be summed together. This facilitates efficient
87 construction of finite element matrices and the like. (see example)
89 Examples
90 --------
92 >>> # Constructing an empty matrix
93 >>> import numpy as np
94 >>> from scipy.sparse import coo_matrix
95 >>> coo_matrix((3, 4), dtype=np.int8).toarray()
96 array([[0, 0, 0, 0],
97 [0, 0, 0, 0],
98 [0, 0, 0, 0]], dtype=int8)
100 >>> # Constructing a matrix using ijv format
101 >>> row = np.array([0, 3, 1, 0])
102 >>> col = np.array([0, 3, 1, 2])
103 >>> data = np.array([4, 5, 7, 9])
104 >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
105 array([[4, 0, 9, 0],
106 [0, 7, 0, 0],
107 [0, 0, 0, 0],
108 [0, 0, 0, 5]])
110 >>> # Constructing a matrix with duplicate indices
111 >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
112 >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
113 >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
114 >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
115 >>> # Duplicate indices are maintained until implicitly or explicitly summed
116 >>> np.max(coo.data)
117 1
118 >>> coo.toarray()
119 array([[3, 0, 1, 0],
120 [0, 2, 0, 0],
121 [0, 0, 0, 0],
122 [0, 0, 0, 1]])
124 """
125 format = 'coo'
127 def __init__(self, arg1, shape=None, dtype=None, copy=False):
128 _data_matrix.__init__(self)
130 if isinstance(arg1, tuple):
131 if isshape(arg1):
132 M, N = arg1
133 self._shape = check_shape((M, N))
134 idx_dtype = get_index_dtype(maxval=max(M, N))
135 data_dtype = getdtype(dtype, default=float)
136 self.row = np.array([], dtype=idx_dtype)
137 self.col = np.array([], dtype=idx_dtype)
138 self.data = np.array([], dtype=data_dtype)
139 self.has_canonical_format = True
140 else:
141 try:
142 obj, (row, col) = arg1
143 except (TypeError, ValueError) as e:
144 raise TypeError('invalid input format') from e
146 if shape is None:
147 if len(row) == 0 or len(col) == 0:
148 raise ValueError('cannot infer dimensions from zero '
149 'sized index arrays')
150 M = operator.index(np.max(row)) + 1
151 N = operator.index(np.max(col)) + 1
152 self._shape = check_shape((M, N))
153 else:
154 # Use 2 steps to ensure shape has length 2.
155 M, N = shape
156 self._shape = check_shape((M, N))
158 idx_dtype = get_index_dtype(maxval=max(self.shape))
159 self.row = np.array(row, copy=copy, dtype=idx_dtype)
160 self.col = np.array(col, copy=copy, dtype=idx_dtype)
161 self.data = getdata(obj, copy=copy, dtype=dtype)
162 self.has_canonical_format = False
163 else:
164 if isspmatrix(arg1):
165 if isspmatrix_coo(arg1) and copy:
166 self.row = arg1.row.copy()
167 self.col = arg1.col.copy()
168 self.data = arg1.data.copy()
169 self._shape = check_shape(arg1.shape)
170 else:
171 coo = arg1.tocoo()
172 self.row = coo.row
173 self.col = coo.col
174 self.data = coo.data
175 self._shape = check_shape(coo.shape)
176 self.has_canonical_format = False
177 else:
178 #dense argument
179 M = np.atleast_2d(np.asarray(arg1))
181 if M.ndim != 2:
182 raise TypeError('expected dimension <= 2 array or matrix')
184 self._shape = check_shape(M.shape)
185 if shape is not None:
186 if check_shape(shape) != self._shape:
187 raise ValueError('inconsistent shapes: %s != %s' %
188 (shape, self._shape))
190 self.row, self.col = M.nonzero()
191 self.data = M[self.row, self.col]
192 self.has_canonical_format = True
194 if dtype is not None:
195 self.data = self.data.astype(dtype, copy=False)
197 self._check()
199 def reshape(self, *args, **kwargs):
200 shape = check_shape(args, self.shape)
201 order, copy = check_reshape_kwargs(kwargs)
203 # Return early if reshape is not required
204 if shape == self.shape:
205 if copy:
206 return self.copy()
207 else:
208 return self
210 nrows, ncols = self.shape
212 if order == 'C':
213 # Upcast to avoid overflows: the coo_matrix constructor
214 # below will downcast the results to a smaller dtype, if
215 # possible.
216 dtype = get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1)))
218 flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col
219 new_row, new_col = divmod(flat_indices, shape[1])
220 elif order == 'F':
221 dtype = get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1)))
223 flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row
224 new_col, new_row = divmod(flat_indices, shape[0])
225 else:
226 raise ValueError("'order' must be 'C' or 'F'")
228 # Handle copy here rather than passing on to the constructor so that no
229 # copy will be made of new_row and new_col regardless
230 if copy:
231 new_data = self.data.copy()
232 else:
233 new_data = self.data
235 return self.__class__((new_data, (new_row, new_col)),
236 shape=shape, copy=False)
238 reshape.__doc__ = spmatrix.reshape.__doc__
240 def getnnz(self, axis=None):
241 if axis is None:
242 nnz = len(self.data)
243 if nnz != len(self.row) or nnz != len(self.col):
244 raise ValueError('row, column, and data array must all be the '
245 'same length')
247 if self.data.ndim != 1 or self.row.ndim != 1 or \
248 self.col.ndim != 1:
249 raise ValueError('row, column, and data arrays must be 1-D')
251 return int(nnz)
253 if axis < 0:
254 axis += 2
255 if axis == 0:
256 return np.bincount(downcast_intp_index(self.col),
257 minlength=self.shape[1])
258 elif axis == 1:
259 return np.bincount(downcast_intp_index(self.row),
260 minlength=self.shape[0])
261 else:
262 raise ValueError('axis out of bounds')
264 getnnz.__doc__ = spmatrix.getnnz.__doc__
266 def _check(self):
267 """ Checks data structure for consistency """
269 # index arrays should have integer data types
270 if self.row.dtype.kind != 'i':
271 warn("row index array has non-integer dtype (%s) "
272 % self.row.dtype.name)
273 if self.col.dtype.kind != 'i':
274 warn("col index array has non-integer dtype (%s) "
275 % self.col.dtype.name)
277 idx_dtype = get_index_dtype(maxval=max(self.shape))
278 self.row = np.asarray(self.row, dtype=idx_dtype)
279 self.col = np.asarray(self.col, dtype=idx_dtype)
280 self.data = to_native(self.data)
282 if self.nnz > 0:
283 if self.row.max() >= self.shape[0]:
284 raise ValueError('row index exceeds matrix dimensions')
285 if self.col.max() >= self.shape[1]:
286 raise ValueError('column index exceeds matrix dimensions')
287 if self.row.min() < 0:
288 raise ValueError('negative row index found')
289 if self.col.min() < 0:
290 raise ValueError('negative column index found')
292 def transpose(self, axes=None, copy=False):
293 if axes is not None:
294 raise ValueError(("Sparse matrices do not support "
295 "an 'axes' parameter because swapping "
296 "dimensions is the only logical permutation."))
298 M, N = self.shape
299 return self.__class__((self.data, (self.col, self.row)),
300 shape=(N, M), copy=copy)
302 transpose.__doc__ = spmatrix.transpose.__doc__
304 def resize(self, *shape):
305 shape = check_shape(shape)
306 new_M, new_N = shape
307 M, N = self.shape
309 if new_M < M or new_N < N:
310 mask = np.logical_and(self.row < new_M, self.col < new_N)
311 if not mask.all():
312 self.row = self.row[mask]
313 self.col = self.col[mask]
314 self.data = self.data[mask]
316 self._shape = shape
318 resize.__doc__ = spmatrix.resize.__doc__
320 def toarray(self, order=None, out=None):
321 """See the docstring for `spmatrix.toarray`."""
322 B = self._process_toarray_args(order, out)
323 fortran = int(B.flags.f_contiguous)
324 if not fortran and not B.flags.c_contiguous:
325 raise ValueError("Output array must be C or F contiguous")
326 M,N = self.shape
327 coo_todense(M, N, self.nnz, self.row, self.col, self.data,
328 B.ravel('A'), fortran)
329 return B
331 def tocsc(self, copy=False):
332 """Convert this matrix to Compressed Sparse Column format
334 Duplicate entries will be summed together.
336 Examples
337 --------
338 >>> from numpy import array
339 >>> from scipy.sparse import coo_matrix
340 >>> row = array([0, 0, 1, 3, 1, 0, 0])
341 >>> col = array([0, 2, 1, 3, 1, 0, 0])
342 >>> data = array([1, 1, 1, 1, 1, 1, 1])
343 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsc()
344 >>> A.toarray()
345 array([[3, 0, 1, 0],
346 [0, 2, 0, 0],
347 [0, 0, 0, 0],
348 [0, 0, 0, 1]])
350 """
351 if self.nnz == 0:
352 return self._csc_container(self.shape, dtype=self.dtype)
353 else:
354 M,N = self.shape
355 idx_dtype = get_index_dtype((self.col, self.row),
356 maxval=max(self.nnz, M))
357 row = self.row.astype(idx_dtype, copy=False)
358 col = self.col.astype(idx_dtype, copy=False)
360 indptr = np.empty(N + 1, dtype=idx_dtype)
361 indices = np.empty_like(row, dtype=idx_dtype)
362 data = np.empty_like(self.data, dtype=upcast(self.dtype))
364 coo_tocsr(N, M, self.nnz, col, row, self.data,
365 indptr, indices, data)
367 x = self._csc_container((data, indices, indptr), shape=self.shape)
368 if not self.has_canonical_format:
369 x.sum_duplicates()
370 return x
372 def tocsr(self, copy=False):
373 """Convert this matrix to Compressed Sparse Row format
375 Duplicate entries will be summed together.
377 Examples
378 --------
379 >>> from numpy import array
380 >>> from scipy.sparse import coo_matrix
381 >>> row = array([0, 0, 1, 3, 1, 0, 0])
382 >>> col = array([0, 2, 1, 3, 1, 0, 0])
383 >>> data = array([1, 1, 1, 1, 1, 1, 1])
384 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr()
385 >>> A.toarray()
386 array([[3, 0, 1, 0],
387 [0, 2, 0, 0],
388 [0, 0, 0, 0],
389 [0, 0, 0, 1]])
391 """
392 if self.nnz == 0:
393 return self._csr_container(self.shape, dtype=self.dtype)
394 else:
395 M,N = self.shape
396 idx_dtype = get_index_dtype((self.row, self.col),
397 maxval=max(self.nnz, N))
398 row = self.row.astype(idx_dtype, copy=False)
399 col = self.col.astype(idx_dtype, copy=False)
401 indptr = np.empty(M + 1, dtype=idx_dtype)
402 indices = np.empty_like(col, dtype=idx_dtype)
403 data = np.empty_like(self.data, dtype=upcast(self.dtype))
405 coo_tocsr(M, N, self.nnz, row, col, self.data,
406 indptr, indices, data)
408 x = self._csr_container((data, indices, indptr), shape=self.shape)
409 if not self.has_canonical_format:
410 x.sum_duplicates()
411 return x
413 def tocoo(self, copy=False):
414 if copy:
415 return self.copy()
416 else:
417 return self
419 tocoo.__doc__ = spmatrix.tocoo.__doc__
421 def todia(self, copy=False):
422 self.sum_duplicates()
423 ks = self.col - self.row # the diagonal for each nonzero
424 diags, diag_idx = np.unique(ks, return_inverse=True)
426 if len(diags) > 100:
427 # probably undesired, should todia() have a maxdiags parameter?
428 warn("Constructing a DIA matrix with %d diagonals "
429 "is inefficient" % len(diags), SparseEfficiencyWarning)
431 #initialize and fill in data array
432 if self.data.size == 0:
433 data = np.zeros((0, 0), dtype=self.dtype)
434 else:
435 data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
436 data[diag_idx, self.col] = self.data
438 return self._dia_container((data, diags), shape=self.shape)
440 todia.__doc__ = spmatrix.todia.__doc__
442 def todok(self, copy=False):
443 self.sum_duplicates()
444 dok = self._dok_container((self.shape), dtype=self.dtype)
445 dok._update(zip(zip(self.row,self.col),self.data))
447 return dok
449 todok.__doc__ = spmatrix.todok.__doc__
451 def diagonal(self, k=0):
452 rows, cols = self.shape
453 if k <= -rows or k >= cols:
454 return np.empty(0, dtype=self.data.dtype)
455 diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
456 dtype=self.dtype)
457 diag_mask = (self.row + k) == self.col
459 if self.has_canonical_format:
460 row = self.row[diag_mask]
461 data = self.data[diag_mask]
462 else:
463 row, _, data = self._sum_duplicates(self.row[diag_mask],
464 self.col[diag_mask],
465 self.data[diag_mask])
466 diag[row + min(k, 0)] = data
468 return diag
470 diagonal.__doc__ = _data_matrix.diagonal.__doc__
472 def _setdiag(self, values, k):
473 M, N = self.shape
474 if values.ndim and not len(values):
475 return
476 idx_dtype = self.row.dtype
478 # Determine which triples to keep and where to put the new ones.
479 full_keep = self.col - self.row != k
480 if k < 0:
481 max_index = min(M+k, N)
482 if values.ndim:
483 max_index = min(max_index, len(values))
484 keep = np.logical_or(full_keep, self.col >= max_index)
485 new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
486 new_col = np.arange(max_index, dtype=idx_dtype)
487 else:
488 max_index = min(M, N-k)
489 if values.ndim:
490 max_index = min(max_index, len(values))
491 keep = np.logical_or(full_keep, self.row >= max_index)
492 new_row = np.arange(max_index, dtype=idx_dtype)
493 new_col = np.arange(k, k + max_index, dtype=idx_dtype)
495 # Define the array of data consisting of the entries to be added.
496 if values.ndim:
497 new_data = values[:max_index]
498 else:
499 new_data = np.empty(max_index, dtype=self.dtype)
500 new_data[:] = values
502 # Update the internal structure.
503 self.row = np.concatenate((self.row[keep], new_row))
504 self.col = np.concatenate((self.col[keep], new_col))
505 self.data = np.concatenate((self.data[keep], new_data))
506 self.has_canonical_format = False
508 # needed by _data_matrix
509 def _with_data(self,data,copy=True):
510 """Returns a matrix with the same sparsity structure as self,
511 but with different data. By default the index arrays
512 (i.e. .row and .col) are copied.
513 """
514 if copy:
515 return self.__class__((data, (self.row.copy(), self.col.copy())),
516 shape=self.shape, dtype=data.dtype)
517 else:
518 return self.__class__((data, (self.row, self.col)),
519 shape=self.shape, dtype=data.dtype)
521 def sum_duplicates(self):
522 """Eliminate duplicate matrix entries by adding them together
524 This is an *in place* operation
525 """
526 if self.has_canonical_format:
527 return
528 summed = self._sum_duplicates(self.row, self.col, self.data)
529 self.row, self.col, self.data = summed
530 self.has_canonical_format = True
532 def _sum_duplicates(self, row, col, data):
533 # Assumes (data, row, col) not in canonical format.
534 if len(data) == 0:
535 return row, col, data
536 order = np.lexsort((row, col))
537 row = row[order]
538 col = col[order]
539 data = data[order]
540 unique_mask = ((row[1:] != row[:-1]) |
541 (col[1:] != col[:-1]))
542 unique_mask = np.append(True, unique_mask)
543 row = row[unique_mask]
544 col = col[unique_mask]
545 unique_inds, = np.nonzero(unique_mask)
546 data = np.add.reduceat(data, unique_inds, dtype=self.dtype)
547 return row, col, data
549 def eliminate_zeros(self):
550 """Remove zero entries from the matrix
552 This is an *in place* operation
553 """
554 mask = self.data != 0
555 self.data = self.data[mask]
556 self.row = self.row[mask]
557 self.col = self.col[mask]
559 #######################
560 # Arithmetic handlers #
561 #######################
563 def _add_dense(self, other):
564 if other.shape != self.shape:
565 raise ValueError('Incompatible shapes ({} and {})'
566 .format(self.shape, other.shape))
567 dtype = upcast_char(self.dtype.char, other.dtype.char)
568 result = np.array(other, dtype=dtype, copy=True)
569 fortran = int(result.flags.f_contiguous)
570 M, N = self.shape
571 coo_todense(M, N, self.nnz, self.row, self.col, self.data,
572 result.ravel('A'), fortran)
573 return self._container(result, copy=False)
575 def _mul_vector(self, other):
576 #output array
577 result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
578 other.dtype.char))
579 coo_matvec(self.nnz, self.row, self.col, self.data, other, result)
580 return result
582 def _mul_multivector(self, other):
583 result = np.zeros((other.shape[1], self.shape[0]),
584 dtype=upcast_char(self.dtype.char, other.dtype.char))
585 for i, col in enumerate(other.T):
586 coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i])
587 return result.T.view(type=type(other))
590def isspmatrix_coo(x):
591 """Is x of coo_matrix type?
593 Parameters
594 ----------
595 x
596 object to check for being a coo matrix
598 Returns
599 -------
600 bool
601 True if x is a coo matrix, False otherwise
603 Examples
604 --------
605 >>> from scipy.sparse import coo_matrix, isspmatrix_coo
606 >>> isspmatrix_coo(coo_matrix([[5]]))
607 True
609 >>> from scipy.sparse import coo_matrix, csr_matrix, isspmatrix_coo
610 >>> isspmatrix_coo(csr_matrix([[5]]))
611 False
612 """
613 from ._arrays import coo_array
614 return isinstance(x, coo_matrix) or isinstance(x, coo_array)