Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_compressed.py: 17%
738 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""Base class for sparse matrix formats using compressed storage."""
2__all__ = []
4from warnings import warn
5import operator
7import numpy as np
8from scipy._lib._util import _prune_array
10from ._base import spmatrix, isspmatrix, SparseEfficiencyWarning
11from ._data import _data_matrix, _minmax_mixin
12from . import _sparsetools
13from ._sparsetools import (get_csr_submatrix, csr_sample_offsets, csr_todense,
14 csr_sample_values, csr_row_index, csr_row_slice,
15 csr_column_index1, csr_column_index2)
16from ._index import IndexMixin
17from ._sputils import (upcast, upcast_char, to_native, isdense, isshape,
18 getdtype, isscalarlike, isintlike, get_index_dtype,
19 downcast_intp_index, get_sum_dtype, check_shape,
20 is_pydata_spmatrix)
23class _cs_matrix(_data_matrix, _minmax_mixin, IndexMixin):
24 """base matrix class for compressed row- and column-oriented matrices"""
26 def __init__(self, arg1, shape=None, dtype=None, copy=False):
27 _data_matrix.__init__(self)
29 if isspmatrix(arg1):
30 if arg1.format == self.format and copy:
31 arg1 = arg1.copy()
32 else:
33 arg1 = arg1.asformat(self.format)
34 self._set_self(arg1)
36 elif isinstance(arg1, tuple):
37 if isshape(arg1):
38 # It's a tuple of matrix dimensions (M, N)
39 # create empty matrix
40 self._shape = check_shape(arg1)
41 M, N = self.shape
42 # Select index dtype large enough to pass array and
43 # scalar parameters to sparsetools
44 idx_dtype = get_index_dtype(maxval=max(M, N))
45 self.data = np.zeros(0, getdtype(dtype, default=float))
46 self.indices = np.zeros(0, idx_dtype)
47 self.indptr = np.zeros(self._swap((M, N))[0] + 1,
48 dtype=idx_dtype)
49 else:
50 if len(arg1) == 2:
51 # (data, ij) format
52 other = self.__class__(
53 self._coo_container(arg1, shape=shape, dtype=dtype)
54 )
55 self._set_self(other)
56 elif len(arg1) == 3:
57 # (data, indices, indptr) format
58 (data, indices, indptr) = arg1
60 # Select index dtype large enough to pass array and
61 # scalar parameters to sparsetools
62 maxval = None
63 if shape is not None:
64 maxval = max(shape)
65 idx_dtype = get_index_dtype((indices, indptr),
66 maxval=maxval,
67 check_contents=True)
69 self.indices = np.array(indices, copy=copy,
70 dtype=idx_dtype)
71 self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype)
72 self.data = np.array(data, copy=copy, dtype=dtype)
73 else:
74 raise ValueError("unrecognized {}_matrix "
75 "constructor usage".format(self.format))
77 else:
78 # must be dense
79 try:
80 arg1 = np.asarray(arg1)
81 except Exception as e:
82 raise ValueError("unrecognized {}_matrix constructor usage"
83 "".format(self.format)) from e
84 self._set_self(self.__class__(
85 self._coo_container(arg1, dtype=dtype)
86 ))
88 # Read matrix dimensions given, if any
89 if shape is not None:
90 self._shape = check_shape(shape)
91 else:
92 if self.shape is None:
93 # shape not already set, try to infer dimensions
94 try:
95 major_dim = len(self.indptr) - 1
96 minor_dim = self.indices.max() + 1
97 except Exception as e:
98 raise ValueError('unable to infer matrix dimensions') from e
99 else:
100 self._shape = check_shape(self._swap((major_dim,
101 minor_dim)))
103 if dtype is not None:
104 self.data = self.data.astype(dtype, copy=False)
106 self.check_format(full_check=False)
108 def getnnz(self, axis=None):
109 if axis is None:
110 return int(self.indptr[-1])
111 else:
112 if axis < 0:
113 axis += 2
114 axis, _ = self._swap((axis, 1 - axis))
115 _, N = self._swap(self.shape)
116 if axis == 0:
117 return np.bincount(downcast_intp_index(self.indices),
118 minlength=N)
119 elif axis == 1:
120 return np.diff(self.indptr)
121 raise ValueError('axis out of bounds')
123 getnnz.__doc__ = spmatrix.getnnz.__doc__
125 def _set_self(self, other, copy=False):
126 """take the member variables of other and assign them to self"""
128 if copy:
129 other = other.copy()
131 self.data = other.data
132 self.indices = other.indices
133 self.indptr = other.indptr
134 self._shape = check_shape(other.shape)
136 def check_format(self, full_check=True):
137 """check whether the matrix format is valid
139 Parameters
140 ----------
141 full_check : bool, optional
142 If `True`, rigorous check, O(N) operations. Otherwise
143 basic check, O(1) operations (default True).
144 """
145 # use _swap to determine proper bounds
146 major_name, minor_name = self._swap(('row', 'column'))
147 major_dim, minor_dim = self._swap(self.shape)
149 # index arrays should have integer data types
150 if self.indptr.dtype.kind != 'i':
151 warn("indptr array has non-integer dtype ({})"
152 "".format(self.indptr.dtype.name), stacklevel=3)
153 if self.indices.dtype.kind != 'i':
154 warn("indices array has non-integer dtype ({})"
155 "".format(self.indices.dtype.name), stacklevel=3)
157 idx_dtype = get_index_dtype((self.indptr, self.indices))
158 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
159 self.indices = np.asarray(self.indices, dtype=idx_dtype)
160 self.data = to_native(self.data)
162 # check array shapes
163 for x in [self.data.ndim, self.indices.ndim, self.indptr.ndim]:
164 if x != 1:
165 raise ValueError('data, indices, and indptr should be 1-D')
167 # check index pointer
168 if (len(self.indptr) != major_dim + 1):
169 raise ValueError("index pointer size ({}) should be ({})"
170 "".format(len(self.indptr), major_dim + 1))
171 if (self.indptr[0] != 0):
172 raise ValueError("index pointer should start with 0")
174 # check index and data arrays
175 if (len(self.indices) != len(self.data)):
176 raise ValueError("indices and data should have the same size")
177 if (self.indptr[-1] > len(self.indices)):
178 raise ValueError("Last value of index pointer should be less than "
179 "the size of index and data arrays")
181 self.prune()
183 if full_check:
184 # check format validity (more expensive)
185 if self.nnz > 0:
186 if self.indices.max() >= minor_dim:
187 raise ValueError("{} index values must be < {}"
188 "".format(minor_name, minor_dim))
189 if self.indices.min() < 0:
190 raise ValueError("{} index values must be >= 0"
191 "".format(minor_name))
192 if np.diff(self.indptr).min() < 0:
193 raise ValueError("index pointer values must form a "
194 "non-decreasing sequence")
196 # if not self.has_sorted_indices():
197 # warn('Indices were not in sorted order. Sorting indices.')
198 # self.sort_indices()
199 # assert(self.has_sorted_indices())
200 # TODO check for duplicates?
202 #######################
203 # Boolean comparisons #
204 #######################
206 def _scalar_binopt(self, other, op):
207 """Scalar version of self._binopt, for cases in which no new nonzeros
208 are added. Produces a new spmatrix in canonical form.
209 """
210 self.sum_duplicates()
211 res = self._with_data(op(self.data, other), copy=True)
212 res.eliminate_zeros()
213 return res
215 def __eq__(self, other):
216 # Scalar other.
217 if isscalarlike(other):
218 if np.isnan(other):
219 return self.__class__(self.shape, dtype=np.bool_)
221 if other == 0:
222 warn("Comparing a sparse matrix with 0 using == is inefficient"
223 ", try using != instead.", SparseEfficiencyWarning,
224 stacklevel=3)
225 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
226 inv = self._scalar_binopt(other, operator.ne)
227 return all_true - inv
228 else:
229 return self._scalar_binopt(other, operator.eq)
230 # Dense other.
231 elif isdense(other):
232 return self.todense() == other
233 # Pydata sparse other.
234 elif is_pydata_spmatrix(other):
235 return NotImplemented
236 # Sparse other.
237 elif isspmatrix(other):
238 warn("Comparing sparse matrices using == is inefficient, try using"
239 " != instead.", SparseEfficiencyWarning, stacklevel=3)
240 # TODO sparse broadcasting
241 if self.shape != other.shape:
242 return False
243 elif self.format != other.format:
244 other = other.asformat(self.format)
245 res = self._binopt(other, '_ne_')
246 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
247 return all_true - res
248 else:
249 return False
251 def __ne__(self, other):
252 # Scalar other.
253 if isscalarlike(other):
254 if np.isnan(other):
255 warn("Comparing a sparse matrix with nan using != is"
256 " inefficient", SparseEfficiencyWarning, stacklevel=3)
257 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
258 return all_true
259 elif other != 0:
260 warn("Comparing a sparse matrix with a nonzero scalar using !="
261 " is inefficient, try using == instead.",
262 SparseEfficiencyWarning, stacklevel=3)
263 all_true = self.__class__(np.ones(self.shape), dtype=np.bool_)
264 inv = self._scalar_binopt(other, operator.eq)
265 return all_true - inv
266 else:
267 return self._scalar_binopt(other, operator.ne)
268 # Dense other.
269 elif isdense(other):
270 return self.todense() != other
271 # Pydata sparse other.
272 elif is_pydata_spmatrix(other):
273 return NotImplemented
274 # Sparse other.
275 elif isspmatrix(other):
276 # TODO sparse broadcasting
277 if self.shape != other.shape:
278 return True
279 elif self.format != other.format:
280 other = other.asformat(self.format)
281 return self._binopt(other, '_ne_')
282 else:
283 return True
285 def _inequality(self, other, op, op_name, bad_scalar_msg):
286 # Scalar other.
287 if isscalarlike(other):
288 if 0 == other and op_name in ('_le_', '_ge_'):
289 raise NotImplementedError(" >= and <= don't work with 0.")
290 elif op(0, other):
291 warn(bad_scalar_msg, SparseEfficiencyWarning)
292 other_arr = np.empty(self.shape, dtype=np.result_type(other))
293 other_arr.fill(other)
294 other_arr = self.__class__(other_arr)
295 return self._binopt(other_arr, op_name)
296 else:
297 return self._scalar_binopt(other, op)
298 # Dense other.
299 elif isdense(other):
300 return op(self.todense(), other)
301 # Sparse other.
302 elif isspmatrix(other):
303 # TODO sparse broadcasting
304 if self.shape != other.shape:
305 raise ValueError("inconsistent shapes")
306 elif self.format != other.format:
307 other = other.asformat(self.format)
308 if op_name not in ('_ge_', '_le_'):
309 return self._binopt(other, op_name)
311 warn("Comparing sparse matrices using >= and <= is inefficient, "
312 "using <, >, or !=, instead.", SparseEfficiencyWarning)
313 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
314 res = self._binopt(other, '_gt_' if op_name == '_le_' else '_lt_')
315 return all_true - res
316 else:
317 raise ValueError("Operands could not be compared.")
319 def __lt__(self, other):
320 return self._inequality(other, operator.lt, '_lt_',
321 "Comparing a sparse matrix with a scalar "
322 "greater than zero using < is inefficient, "
323 "try using >= instead.")
325 def __gt__(self, other):
326 return self._inequality(other, operator.gt, '_gt_',
327 "Comparing a sparse matrix with a scalar "
328 "less than zero using > is inefficient, "
329 "try using <= instead.")
331 def __le__(self, other):
332 return self._inequality(other, operator.le, '_le_',
333 "Comparing a sparse matrix with a scalar "
334 "greater than zero using <= is inefficient, "
335 "try using > instead.")
337 def __ge__(self, other):
338 return self._inequality(other, operator.ge, '_ge_',
339 "Comparing a sparse matrix with a scalar "
340 "less than zero using >= is inefficient, "
341 "try using < instead.")
343 #################################
344 # Arithmetic operator overrides #
345 #################################
347 def _add_dense(self, other):
348 if other.shape != self.shape:
349 raise ValueError('Incompatible shapes ({} and {})'
350 .format(self.shape, other.shape))
351 dtype = upcast_char(self.dtype.char, other.dtype.char)
352 order = self._swap('CF')[0]
353 result = np.array(other, dtype=dtype, order=order, copy=True)
354 M, N = self._swap(self.shape)
355 y = result if result.flags.c_contiguous else result.T
356 csr_todense(M, N, self.indptr, self.indices, self.data, y)
357 return self._container(result, copy=False)
359 def _add_sparse(self, other):
360 return self._binopt(other, '_plus_')
362 def _sub_sparse(self, other):
363 return self._binopt(other, '_minus_')
365 def multiply(self, other):
366 """Point-wise multiplication by another matrix, vector, or
367 scalar.
368 """
369 # Scalar multiplication.
370 if isscalarlike(other):
371 return self._mul_scalar(other)
372 # Sparse matrix or vector.
373 if isspmatrix(other):
374 if self.shape == other.shape:
375 other = self.__class__(other)
376 return self._binopt(other, '_elmul_')
377 # Single element.
378 elif other.shape == (1, 1):
379 return self._mul_scalar(other.toarray()[0, 0])
380 elif self.shape == (1, 1):
381 return other._mul_scalar(self.toarray()[0, 0])
382 # A row times a column.
383 elif self.shape[1] == 1 and other.shape[0] == 1:
384 return self._mul_sparse_matrix(other.tocsc())
385 elif self.shape[0] == 1 and other.shape[1] == 1:
386 return other._mul_sparse_matrix(self.tocsc())
387 # Row vector times matrix. other is a row.
388 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
389 other = self._dia_container(
390 (other.toarray().ravel(), [0]),
391 shape=(other.shape[1], other.shape[1])
392 )
393 return self._mul_sparse_matrix(other)
394 # self is a row.
395 elif self.shape[0] == 1 and self.shape[1] == other.shape[1]:
396 copy = self._dia_container(
397 (self.toarray().ravel(), [0]),
398 shape=(self.shape[1], self.shape[1])
399 )
400 return other._mul_sparse_matrix(copy)
401 # Column vector times matrix. other is a column.
402 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
403 other = self._dia_container(
404 (other.toarray().ravel(), [0]),
405 shape=(other.shape[0], other.shape[0])
406 )
407 return other._mul_sparse_matrix(self)
408 # self is a column.
409 elif self.shape[1] == 1 and self.shape[0] == other.shape[0]:
410 copy = self._dia_container(
411 (self.toarray().ravel(), [0]),
412 shape=(self.shape[0], self.shape[0])
413 )
414 return copy._mul_sparse_matrix(other)
415 else:
416 raise ValueError("inconsistent shapes")
418 # Assume other is a dense matrix/array, which produces a single-item
419 # object array if other isn't convertible to ndarray.
420 other = np.atleast_2d(other)
422 if other.ndim != 2:
423 return np.multiply(self.toarray(), other)
424 # Single element / wrapped object.
425 if other.size == 1:
426 return self._mul_scalar(other.flat[0])
427 # Fast case for trivial sparse matrix.
428 elif self.shape == (1, 1):
429 return np.multiply(self.toarray()[0, 0], other)
431 ret = self.tocoo()
432 # Matching shapes.
433 if self.shape == other.shape:
434 data = np.multiply(ret.data, other[ret.row, ret.col])
435 # Sparse row vector times...
436 elif self.shape[0] == 1:
437 if other.shape[1] == 1: # Dense column vector.
438 data = np.multiply(ret.data, other)
439 elif other.shape[1] == self.shape[1]: # Dense matrix.
440 data = np.multiply(ret.data, other[:, ret.col])
441 else:
442 raise ValueError("inconsistent shapes")
443 row = np.repeat(np.arange(other.shape[0]), len(ret.row))
444 col = np.tile(ret.col, other.shape[0])
445 return self._coo_container(
446 (data.view(np.ndarray).ravel(), (row, col)),
447 shape=(other.shape[0], self.shape[1]),
448 copy=False
449 )
450 # Sparse column vector times...
451 elif self.shape[1] == 1:
452 if other.shape[0] == 1: # Dense row vector.
453 data = np.multiply(ret.data[:, None], other)
454 elif other.shape[0] == self.shape[0]: # Dense matrix.
455 data = np.multiply(ret.data[:, None], other[ret.row])
456 else:
457 raise ValueError("inconsistent shapes")
458 row = np.repeat(ret.row, other.shape[1])
459 col = np.tile(np.arange(other.shape[1]), len(ret.col))
460 return self._coo_container(
461 (data.view(np.ndarray).ravel(), (row, col)),
462 shape=(self.shape[0], other.shape[1]),
463 copy=False
464 )
465 # Sparse matrix times dense row vector.
466 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
467 data = np.multiply(ret.data, other[:, ret.col].ravel())
468 # Sparse matrix times dense column vector.
469 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
470 data = np.multiply(ret.data, other[ret.row].ravel())
471 else:
472 raise ValueError("inconsistent shapes")
473 ret.data = data.view(np.ndarray).ravel()
474 return ret
476 ###########################
477 # Multiplication handlers #
478 ###########################
480 def _mul_vector(self, other):
481 M, N = self.shape
483 # output array
484 result = np.zeros(M, dtype=upcast_char(self.dtype.char,
485 other.dtype.char))
487 # csr_matvec or csc_matvec
488 fn = getattr(_sparsetools, self.format + '_matvec')
489 fn(M, N, self.indptr, self.indices, self.data, other, result)
491 return result
493 def _mul_multivector(self, other):
494 M, N = self.shape
495 n_vecs = other.shape[1] # number of column vectors
497 result = np.zeros((M, n_vecs),
498 dtype=upcast_char(self.dtype.char, other.dtype.char))
500 # csr_matvecs or csc_matvecs
501 fn = getattr(_sparsetools, self.format + '_matvecs')
502 fn(M, N, n_vecs, self.indptr, self.indices, self.data,
503 other.ravel(), result.ravel())
505 return result
507 def _mul_sparse_matrix(self, other):
508 M, K1 = self.shape
509 K2, N = other.shape
511 major_axis = self._swap((M, N))[0]
512 other = self.__class__(other) # convert to this format
514 idx_dtype = get_index_dtype((self.indptr, self.indices,
515 other.indptr, other.indices))
517 fn = getattr(_sparsetools, self.format + '_matmat_maxnnz')
518 nnz = fn(M, N,
519 np.asarray(self.indptr, dtype=idx_dtype),
520 np.asarray(self.indices, dtype=idx_dtype),
521 np.asarray(other.indptr, dtype=idx_dtype),
522 np.asarray(other.indices, dtype=idx_dtype))
524 idx_dtype = get_index_dtype((self.indptr, self.indices,
525 other.indptr, other.indices),
526 maxval=nnz)
528 indptr = np.empty(major_axis + 1, dtype=idx_dtype)
529 indices = np.empty(nnz, dtype=idx_dtype)
530 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
532 fn = getattr(_sparsetools, self.format + '_matmat')
533 fn(M, N, np.asarray(self.indptr, dtype=idx_dtype),
534 np.asarray(self.indices, dtype=idx_dtype),
535 self.data,
536 np.asarray(other.indptr, dtype=idx_dtype),
537 np.asarray(other.indices, dtype=idx_dtype),
538 other.data,
539 indptr, indices, data)
541 return self.__class__((data, indices, indptr), shape=(M, N))
543 def diagonal(self, k=0):
544 rows, cols = self.shape
545 if k <= -rows or k >= cols:
546 return np.empty(0, dtype=self.data.dtype)
547 fn = getattr(_sparsetools, self.format + "_diagonal")
548 y = np.empty(min(rows + min(k, 0), cols - max(k, 0)),
549 dtype=upcast(self.dtype))
550 fn(k, self.shape[0], self.shape[1], self.indptr, self.indices,
551 self.data, y)
552 return y
554 diagonal.__doc__ = spmatrix.diagonal.__doc__
556 #####################
557 # Other binary ops #
558 #####################
560 def _maximum_minimum(self, other, npop, op_name, dense_check):
561 if isscalarlike(other):
562 if dense_check(other):
563 warn("Taking maximum (minimum) with > 0 (< 0) number results"
564 " to a dense matrix.", SparseEfficiencyWarning,
565 stacklevel=3)
566 other_arr = np.empty(self.shape, dtype=np.asarray(other).dtype)
567 other_arr.fill(other)
568 other_arr = self.__class__(other_arr)
569 return self._binopt(other_arr, op_name)
570 else:
571 self.sum_duplicates()
572 new_data = npop(self.data, np.asarray(other))
573 mat = self.__class__((new_data, self.indices, self.indptr),
574 dtype=new_data.dtype, shape=self.shape)
575 return mat
576 elif isdense(other):
577 return npop(self.todense(), other)
578 elif isspmatrix(other):
579 return self._binopt(other, op_name)
580 else:
581 raise ValueError("Operands not compatible.")
583 def maximum(self, other):
584 return self._maximum_minimum(other, np.maximum,
585 '_maximum_', lambda x: np.asarray(x) > 0)
587 maximum.__doc__ = spmatrix.maximum.__doc__
589 def minimum(self, other):
590 return self._maximum_minimum(other, np.minimum,
591 '_minimum_', lambda x: np.asarray(x) < 0)
593 minimum.__doc__ = spmatrix.minimum.__doc__
595 #####################
596 # Reduce operations #
597 #####################
599 def sum(self, axis=None, dtype=None, out=None):
600 """Sum the matrix over the given axis. If the axis is None, sum
601 over both rows and columns, returning a scalar.
602 """
603 # The spmatrix base class already does axis=0 and axis=1 efficiently
604 # so we only do the case axis=None here
605 if (not hasattr(self, 'blocksize') and
606 axis in self._swap(((1, -1), (0, 2)))[0]):
607 # faster than multiplication for large minor axis in CSC/CSR
608 res_dtype = get_sum_dtype(self.dtype)
609 ret = np.zeros(len(self.indptr) - 1, dtype=res_dtype)
611 major_index, value = self._minor_reduce(np.add)
612 ret[major_index] = value
613 ret = self._ascontainer(ret)
614 if axis % 2 == 1:
615 ret = ret.T
617 if out is not None and out.shape != ret.shape:
618 raise ValueError('dimensions do not match')
620 return ret.sum(axis=(), dtype=dtype, out=out)
621 # spmatrix will handle the remaining situations when axis
622 # is in {None, -1, 0, 1}
623 else:
624 return spmatrix.sum(self, axis=axis, dtype=dtype, out=out)
626 sum.__doc__ = spmatrix.sum.__doc__
628 def _minor_reduce(self, ufunc, data=None):
629 """Reduce nonzeros with a ufunc over the minor axis when non-empty
631 Can be applied to a function of self.data by supplying data parameter.
633 Warning: this does not call sum_duplicates()
635 Returns
636 -------
637 major_index : array of ints
638 Major indices where nonzero
640 value : array of self.dtype
641 Reduce result for nonzeros in each major_index
642 """
643 if data is None:
644 data = self.data
645 major_index = np.flatnonzero(np.diff(self.indptr))
646 value = ufunc.reduceat(data,
647 downcast_intp_index(self.indptr[major_index]))
648 return major_index, value
650 #######################
651 # Getting and Setting #
652 #######################
654 def _get_intXint(self, row, col):
655 M, N = self._swap(self.shape)
656 major, minor = self._swap((row, col))
657 indptr, indices, data = get_csr_submatrix(
658 M, N, self.indptr, self.indices, self.data,
659 major, major + 1, minor, minor + 1)
660 return data.sum(dtype=self.dtype)
662 def _get_sliceXslice(self, row, col):
663 major, minor = self._swap((row, col))
664 if major.step in (1, None) and minor.step in (1, None):
665 return self._get_submatrix(major, minor, copy=True)
666 return self._major_slice(major)._minor_slice(minor)
668 def _get_arrayXarray(self, row, col):
669 # inner indexing
670 idx_dtype = self.indices.dtype
671 M, N = self._swap(self.shape)
672 major, minor = self._swap((row, col))
673 major = np.asarray(major, dtype=idx_dtype)
674 minor = np.asarray(minor, dtype=idx_dtype)
676 val = np.empty(major.size, dtype=self.dtype)
677 csr_sample_values(M, N, self.indptr, self.indices, self.data,
678 major.size, major.ravel(), minor.ravel(), val)
679 if major.ndim == 1:
680 return self._ascontainer(val)
681 return self.__class__(val.reshape(major.shape))
683 def _get_columnXarray(self, row, col):
684 # outer indexing
685 major, minor = self._swap((row, col))
686 return self._major_index_fancy(major)._minor_index_fancy(minor)
688 def _major_index_fancy(self, idx):
689 """Index along the major axis where idx is an array of ints.
690 """
691 idx_dtype = self.indices.dtype
692 indices = np.asarray(idx, dtype=idx_dtype).ravel()
694 _, N = self._swap(self.shape)
695 M = len(indices)
696 new_shape = self._swap((M, N))
697 if M == 0:
698 return self.__class__(new_shape, dtype=self.dtype)
700 row_nnz = self.indptr[indices + 1] - self.indptr[indices]
701 idx_dtype = self.indices.dtype
702 res_indptr = np.zeros(M+1, dtype=idx_dtype)
703 np.cumsum(row_nnz, out=res_indptr[1:])
705 nnz = res_indptr[-1]
706 res_indices = np.empty(nnz, dtype=idx_dtype)
707 res_data = np.empty(nnz, dtype=self.dtype)
708 csr_row_index(M, indices, self.indptr, self.indices, self.data,
709 res_indices, res_data)
711 return self.__class__((res_data, res_indices, res_indptr),
712 shape=new_shape, copy=False)
714 def _major_slice(self, idx, copy=False):
715 """Index along the major axis where idx is a slice object.
716 """
717 if idx == slice(None):
718 return self.copy() if copy else self
720 M, N = self._swap(self.shape)
721 start, stop, step = idx.indices(M)
722 M = len(range(start, stop, step))
723 new_shape = self._swap((M, N))
724 if M == 0:
725 return self.__class__(new_shape, dtype=self.dtype)
727 # Work out what slices are needed for `row_nnz`
728 # start,stop can be -1, only if step is negative
729 start0, stop0 = start, stop
730 if stop == -1 and start >= 0:
731 stop0 = None
732 start1, stop1 = start + 1, stop + 1
734 row_nnz = self.indptr[start1:stop1:step] - \
735 self.indptr[start0:stop0:step]
736 idx_dtype = self.indices.dtype
737 res_indptr = np.zeros(M+1, dtype=idx_dtype)
738 np.cumsum(row_nnz, out=res_indptr[1:])
740 if step == 1:
741 all_idx = slice(self.indptr[start], self.indptr[stop])
742 res_indices = np.array(self.indices[all_idx], copy=copy)
743 res_data = np.array(self.data[all_idx], copy=copy)
744 else:
745 nnz = res_indptr[-1]
746 res_indices = np.empty(nnz, dtype=idx_dtype)
747 res_data = np.empty(nnz, dtype=self.dtype)
748 csr_row_slice(start, stop, step, self.indptr, self.indices,
749 self.data, res_indices, res_data)
751 return self.__class__((res_data, res_indices, res_indptr),
752 shape=new_shape, copy=False)
754 def _minor_index_fancy(self, idx):
755 """Index along the minor axis where idx is an array of ints.
756 """
757 idx_dtype = self.indices.dtype
758 idx = np.asarray(idx, dtype=idx_dtype).ravel()
760 M, N = self._swap(self.shape)
761 k = len(idx)
762 new_shape = self._swap((M, k))
763 if k == 0:
764 return self.__class__(new_shape, dtype=self.dtype)
766 # pass 1: count idx entries and compute new indptr
767 col_offsets = np.zeros(N, dtype=idx_dtype)
768 res_indptr = np.empty_like(self.indptr)
769 csr_column_index1(k, idx, M, N, self.indptr, self.indices,
770 col_offsets, res_indptr)
772 # pass 2: copy indices/data for selected idxs
773 col_order = np.argsort(idx).astype(idx_dtype, copy=False)
774 nnz = res_indptr[-1]
775 res_indices = np.empty(nnz, dtype=idx_dtype)
776 res_data = np.empty(nnz, dtype=self.dtype)
777 csr_column_index2(col_order, col_offsets, len(self.indices),
778 self.indices, self.data, res_indices, res_data)
779 return self.__class__((res_data, res_indices, res_indptr),
780 shape=new_shape, copy=False)
782 def _minor_slice(self, idx, copy=False):
783 """Index along the minor axis where idx is a slice object.
784 """
785 if idx == slice(None):
786 return self.copy() if copy else self
788 M, N = self._swap(self.shape)
789 start, stop, step = idx.indices(N)
790 N = len(range(start, stop, step))
791 if N == 0:
792 return self.__class__(self._swap((M, N)), dtype=self.dtype)
793 if step == 1:
794 return self._get_submatrix(minor=idx, copy=copy)
795 # TODO: don't fall back to fancy indexing here
796 return self._minor_index_fancy(np.arange(start, stop, step))
798 def _get_submatrix(self, major=None, minor=None, copy=False):
799 """Return a submatrix of this matrix.
801 major, minor: None, int, or slice with step 1
802 """
803 M, N = self._swap(self.shape)
804 i0, i1 = _process_slice(major, M)
805 j0, j1 = _process_slice(minor, N)
807 if i0 == 0 and j0 == 0 and i1 == M and j1 == N:
808 return self.copy() if copy else self
810 indptr, indices, data = get_csr_submatrix(
811 M, N, self.indptr, self.indices, self.data, i0, i1, j0, j1)
813 shape = self._swap((i1 - i0, j1 - j0))
814 return self.__class__((data, indices, indptr), shape=shape,
815 dtype=self.dtype, copy=False)
817 def _set_intXint(self, row, col, x):
818 i, j = self._swap((row, col))
819 self._set_many(i, j, x)
821 def _set_arrayXarray(self, row, col, x):
822 i, j = self._swap((row, col))
823 self._set_many(i, j, x)
825 def _set_arrayXarray_sparse(self, row, col, x):
826 # clear entries that will be overwritten
827 self._zero_many(*self._swap((row, col)))
829 M, N = row.shape # matches col.shape
830 broadcast_row = M != 1 and x.shape[0] == 1
831 broadcast_col = N != 1 and x.shape[1] == 1
832 r, c = x.row, x.col
834 x = np.asarray(x.data, dtype=self.dtype)
835 if x.size == 0:
836 return
838 if broadcast_row:
839 r = np.repeat(np.arange(M), len(r))
840 c = np.tile(c, M)
841 x = np.tile(x, M)
842 if broadcast_col:
843 r = np.repeat(r, N)
844 c = np.tile(np.arange(N), len(c))
845 x = np.repeat(x, N)
846 # only assign entries in the new sparsity structure
847 i, j = self._swap((row[r, c], col[r, c]))
848 self._set_many(i, j, x)
850 def _setdiag(self, values, k):
851 if 0 in self.shape:
852 return
854 M, N = self.shape
855 broadcast = (values.ndim == 0)
857 if k < 0:
858 if broadcast:
859 max_index = min(M + k, N)
860 else:
861 max_index = min(M + k, N, len(values))
862 i = np.arange(max_index, dtype=self.indices.dtype)
863 j = np.arange(max_index, dtype=self.indices.dtype)
864 i -= k
866 else:
867 if broadcast:
868 max_index = min(M, N - k)
869 else:
870 max_index = min(M, N - k, len(values))
871 i = np.arange(max_index, dtype=self.indices.dtype)
872 j = np.arange(max_index, dtype=self.indices.dtype)
873 j += k
875 if not broadcast:
876 values = values[:len(i)]
878 self[i, j] = values
880 def _prepare_indices(self, i, j):
881 M, N = self._swap(self.shape)
883 def check_bounds(indices, bound):
884 idx = indices.max()
885 if idx >= bound:
886 raise IndexError('index (%d) out of range (>= %d)' %
887 (idx, bound))
888 idx = indices.min()
889 if idx < -bound:
890 raise IndexError('index (%d) out of range (< -%d)' %
891 (idx, bound))
893 i = np.array(i, dtype=self.indices.dtype, copy=False, ndmin=1).ravel()
894 j = np.array(j, dtype=self.indices.dtype, copy=False, ndmin=1).ravel()
895 check_bounds(i, M)
896 check_bounds(j, N)
897 return i, j, M, N
899 def _set_many(self, i, j, x):
900 """Sets value at each (i, j) to x
902 Here (i,j) index major and minor respectively, and must not contain
903 duplicate entries.
904 """
905 i, j, M, N = self._prepare_indices(i, j)
906 x = np.array(x, dtype=self.dtype, copy=False, ndmin=1).ravel()
908 n_samples = x.size
909 offsets = np.empty(n_samples, dtype=self.indices.dtype)
910 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
911 i, j, offsets)
912 if ret == 1:
913 # rinse and repeat
914 self.sum_duplicates()
915 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
916 i, j, offsets)
918 if -1 not in offsets:
919 # only affects existing non-zero cells
920 self.data[offsets] = x
921 return
923 else:
924 warn("Changing the sparsity structure of a {}_matrix is expensive."
925 " lil_matrix is more efficient.".format(self.format),
926 SparseEfficiencyWarning, stacklevel=3)
927 # replace where possible
928 mask = offsets > -1
929 self.data[offsets[mask]] = x[mask]
930 # only insertions remain
931 mask = ~mask
932 i = i[mask]
933 i[i < 0] += M
934 j = j[mask]
935 j[j < 0] += N
936 self._insert_many(i, j, x[mask])
938 def _zero_many(self, i, j):
939 """Sets value at each (i, j) to zero, preserving sparsity structure.
941 Here (i,j) index major and minor respectively.
942 """
943 i, j, M, N = self._prepare_indices(i, j)
945 n_samples = len(i)
946 offsets = np.empty(n_samples, dtype=self.indices.dtype)
947 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
948 i, j, offsets)
949 if ret == 1:
950 # rinse and repeat
951 self.sum_duplicates()
952 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
953 i, j, offsets)
955 # only assign zeros to the existing sparsity structure
956 self.data[offsets[offsets > -1]] = 0
958 def _insert_many(self, i, j, x):
959 """Inserts new nonzero at each (i, j) with value x
961 Here (i,j) index major and minor respectively.
962 i, j and x must be non-empty, 1d arrays.
963 Inserts each major group (e.g. all entries per row) at a time.
964 Maintains has_sorted_indices property.
965 Modifies i, j, x in place.
966 """
967 order = np.argsort(i, kind='mergesort') # stable for duplicates
968 i = i.take(order, mode='clip')
969 j = j.take(order, mode='clip')
970 x = x.take(order, mode='clip')
972 do_sort = self.has_sorted_indices
974 # Update index data type
975 idx_dtype = get_index_dtype((self.indices, self.indptr),
976 maxval=(self.indptr[-1] + x.size))
977 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
978 self.indices = np.asarray(self.indices, dtype=idx_dtype)
979 i = np.asarray(i, dtype=idx_dtype)
980 j = np.asarray(j, dtype=idx_dtype)
982 # Collate old and new in chunks by major index
983 indices_parts = []
984 data_parts = []
985 ui, ui_indptr = np.unique(i, return_index=True)
986 ui_indptr = np.append(ui_indptr, len(j))
987 new_nnzs = np.diff(ui_indptr)
988 prev = 0
989 for c, (ii, js, je) in enumerate(zip(ui, ui_indptr, ui_indptr[1:])):
990 # old entries
991 start = self.indptr[prev]
992 stop = self.indptr[ii]
993 indices_parts.append(self.indices[start:stop])
994 data_parts.append(self.data[start:stop])
996 # handle duplicate j: keep last setting
997 uj, uj_indptr = np.unique(j[js:je][::-1], return_index=True)
998 if len(uj) == je - js:
999 indices_parts.append(j[js:je])
1000 data_parts.append(x[js:je])
1001 else:
1002 indices_parts.append(j[js:je][::-1][uj_indptr])
1003 data_parts.append(x[js:je][::-1][uj_indptr])
1004 new_nnzs[c] = len(uj)
1006 prev = ii
1008 # remaining old entries
1009 start = self.indptr[ii]
1010 indices_parts.append(self.indices[start:])
1011 data_parts.append(self.data[start:])
1013 # update attributes
1014 self.indices = np.concatenate(indices_parts)
1015 self.data = np.concatenate(data_parts)
1016 nnzs = np.empty(self.indptr.shape, dtype=idx_dtype)
1017 nnzs[0] = idx_dtype(0)
1018 indptr_diff = np.diff(self.indptr)
1019 indptr_diff[ui] += new_nnzs
1020 nnzs[1:] = indptr_diff
1021 self.indptr = np.cumsum(nnzs, out=nnzs)
1023 if do_sort:
1024 # TODO: only sort where necessary
1025 self.has_sorted_indices = False
1026 self.sort_indices()
1028 self.check_format(full_check=False)
1030 ######################
1031 # Conversion methods #
1032 ######################
1034 def tocoo(self, copy=True):
1035 major_dim, minor_dim = self._swap(self.shape)
1036 minor_indices = self.indices
1037 major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
1038 _sparsetools.expandptr(major_dim, self.indptr, major_indices)
1039 row, col = self._swap((major_indices, minor_indices))
1041 return self._coo_container(
1042 (self.data, (row, col)), self.shape, copy=copy,
1043 dtype=self.dtype
1044 )
1046 tocoo.__doc__ = spmatrix.tocoo.__doc__
1048 def toarray(self, order=None, out=None):
1049 if out is None and order is None:
1050 order = self._swap('cf')[0]
1051 out = self._process_toarray_args(order, out)
1052 if not (out.flags.c_contiguous or out.flags.f_contiguous):
1053 raise ValueError('Output array must be C or F contiguous')
1054 # align ideal order with output array order
1055 if out.flags.c_contiguous:
1056 x = self.tocsr()
1057 y = out
1058 else:
1059 x = self.tocsc()
1060 y = out.T
1061 M, N = x._swap(x.shape)
1062 csr_todense(M, N, x.indptr, x.indices, x.data, y)
1063 return out
1065 toarray.__doc__ = spmatrix.toarray.__doc__
1067 ##############################################################
1068 # methods that examine or modify the internal data structure #
1069 ##############################################################
1071 def eliminate_zeros(self):
1072 """Remove zero entries from the matrix
1074 This is an *in place* operation.
1075 """
1076 M, N = self._swap(self.shape)
1077 _sparsetools.csr_eliminate_zeros(M, N, self.indptr, self.indices,
1078 self.data)
1079 self.prune() # nnz may have changed
1081 def __get_has_canonical_format(self):
1082 """Determine whether the matrix has sorted indices and no duplicates
1084 Returns
1085 - True: if the above applies
1086 - False: otherwise
1088 has_canonical_format implies has_sorted_indices, so if the latter flag
1089 is False, so will the former be; if the former is found True, the
1090 latter flag is also set.
1091 """
1093 # first check to see if result was cached
1094 if not getattr(self, '_has_sorted_indices', True):
1095 # not sorted => not canonical
1096 self._has_canonical_format = False
1097 elif not hasattr(self, '_has_canonical_format'):
1098 self.has_canonical_format = bool(
1099 _sparsetools.csr_has_canonical_format(
1100 len(self.indptr) - 1, self.indptr, self.indices))
1101 return self._has_canonical_format
1103 def __set_has_canonical_format(self, val):
1104 self._has_canonical_format = bool(val)
1105 if val:
1106 self.has_sorted_indices = True
1108 has_canonical_format = property(fget=__get_has_canonical_format,
1109 fset=__set_has_canonical_format)
1111 def sum_duplicates(self):
1112 """Eliminate duplicate matrix entries by adding them together
1114 This is an *in place* operation.
1115 """
1116 if self.has_canonical_format:
1117 return
1118 self.sort_indices()
1120 M, N = self._swap(self.shape)
1121 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices,
1122 self.data)
1124 self.prune() # nnz may have changed
1125 self.has_canonical_format = True
1127 def __get_sorted(self):
1128 """Determine whether the matrix has sorted indices
1130 Returns
1131 - True: if the indices of the matrix are in sorted order
1132 - False: otherwise
1134 """
1136 # first check to see if result was cached
1137 if not hasattr(self, '_has_sorted_indices'):
1138 self._has_sorted_indices = bool(
1139 _sparsetools.csr_has_sorted_indices(
1140 len(self.indptr) - 1, self.indptr, self.indices))
1141 return self._has_sorted_indices
1143 def __set_sorted(self, val):
1144 self._has_sorted_indices = bool(val)
1146 has_sorted_indices = property(fget=__get_sorted, fset=__set_sorted)
1148 def sorted_indices(self):
1149 """Return a copy of this matrix with sorted indices
1150 """
1151 A = self.copy()
1152 A.sort_indices()
1153 return A
1155 # an alternative that has linear complexity is the following
1156 # although the previous option is typically faster
1157 # return self.toother().toother()
1159 def sort_indices(self):
1160 """Sort the indices of this matrix *in place*
1161 """
1163 if not self.has_sorted_indices:
1164 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr,
1165 self.indices, self.data)
1166 self.has_sorted_indices = True
1168 def prune(self):
1169 """Remove empty space after all non-zero elements.
1170 """
1171 major_dim = self._swap(self.shape)[0]
1173 if len(self.indptr) != major_dim + 1:
1174 raise ValueError('index pointer has invalid length')
1175 if len(self.indices) < self.nnz:
1176 raise ValueError('indices array has fewer than nnz elements')
1177 if len(self.data) < self.nnz:
1178 raise ValueError('data array has fewer than nnz elements')
1180 self.indices = _prune_array(self.indices[:self.nnz])
1181 self.data = _prune_array(self.data[:self.nnz])
1183 def resize(self, *shape):
1184 shape = check_shape(shape)
1185 if hasattr(self, 'blocksize'):
1186 bm, bn = self.blocksize
1187 new_M, rm = divmod(shape[0], bm)
1188 new_N, rn = divmod(shape[1], bn)
1189 if rm or rn:
1190 raise ValueError("shape must be divisible into %s blocks. "
1191 "Got %s" % (self.blocksize, shape))
1192 M, N = self.shape[0] // bm, self.shape[1] // bn
1193 else:
1194 new_M, new_N = self._swap(shape)
1195 M, N = self._swap(self.shape)
1197 if new_M < M:
1198 self.indices = self.indices[:self.indptr[new_M]]
1199 self.data = self.data[:self.indptr[new_M]]
1200 self.indptr = self.indptr[:new_M + 1]
1201 elif new_M > M:
1202 self.indptr = np.resize(self.indptr, new_M + 1)
1203 self.indptr[M + 1:].fill(self.indptr[M])
1205 if new_N < N:
1206 mask = self.indices < new_N
1207 if not np.all(mask):
1208 self.indices = self.indices[mask]
1209 self.data = self.data[mask]
1210 major_index, val = self._minor_reduce(np.add, mask)
1211 self.indptr.fill(0)
1212 self.indptr[1:][major_index] = val
1213 np.cumsum(self.indptr, out=self.indptr)
1215 self._shape = shape
1217 resize.__doc__ = spmatrix.resize.__doc__
1219 ###################
1220 # utility methods #
1221 ###################
1223 # needed by _data_matrix
1224 def _with_data(self, data, copy=True):
1225 """Returns a matrix with the same sparsity structure as self,
1226 but with different data. By default the structure arrays
1227 (i.e. .indptr and .indices) are copied.
1228 """
1229 if copy:
1230 return self.__class__((data, self.indices.copy(),
1231 self.indptr.copy()),
1232 shape=self.shape,
1233 dtype=data.dtype)
1234 else:
1235 return self.__class__((data, self.indices, self.indptr),
1236 shape=self.shape, dtype=data.dtype)
1238 def _binopt(self, other, op):
1239 """apply the binary operation fn to two sparse matrices."""
1240 other = self.__class__(other)
1242 # e.g. csr_plus_csr, csr_minus_csr, etc.
1243 fn = getattr(_sparsetools, self.format + op + self.format)
1245 maxnnz = self.nnz + other.nnz
1246 idx_dtype = get_index_dtype((self.indptr, self.indices,
1247 other.indptr, other.indices),
1248 maxval=maxnnz)
1249 indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
1250 indices = np.empty(maxnnz, dtype=idx_dtype)
1252 bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
1253 if op in bool_ops:
1254 data = np.empty(maxnnz, dtype=np.bool_)
1255 else:
1256 data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype))
1258 fn(self.shape[0], self.shape[1],
1259 np.asarray(self.indptr, dtype=idx_dtype),
1260 np.asarray(self.indices, dtype=idx_dtype),
1261 self.data,
1262 np.asarray(other.indptr, dtype=idx_dtype),
1263 np.asarray(other.indices, dtype=idx_dtype),
1264 other.data,
1265 indptr, indices, data)
1267 A = self.__class__((data, indices, indptr), shape=self.shape)
1268 A.prune()
1270 return A
1272 def _divide_sparse(self, other):
1273 """
1274 Divide this matrix by a second sparse matrix.
1275 """
1276 if other.shape != self.shape:
1277 raise ValueError('inconsistent shapes')
1279 r = self._binopt(other, '_eldiv_')
1281 if np.issubdtype(r.dtype, np.inexact):
1282 # Eldiv leaves entries outside the combined sparsity
1283 # pattern empty, so they must be filled manually.
1284 # Everything outside of other's sparsity is NaN, and everything
1285 # inside it is either zero or defined by eldiv.
1286 out = np.empty(self.shape, dtype=self.dtype)
1287 out.fill(np.nan)
1288 row, col = other.nonzero()
1289 out[row, col] = 0
1290 r = r.tocoo()
1291 out[r.row, r.col] = r.data
1292 out = self._container(out)
1293 else:
1294 # integers types go with nan <-> 0
1295 out = r
1297 return out
1300def _process_slice(sl, num):
1301 if sl is None:
1302 i0, i1 = 0, num
1303 elif isinstance(sl, slice):
1304 i0, i1, stride = sl.indices(num)
1305 if stride != 1:
1306 raise ValueError('slicing with step != 1 not supported')
1307 i0 = min(i0, i1) # give an empty slice when i0 > i1
1308 elif isintlike(sl):
1309 if sl < 0:
1310 sl += num
1311 i0, i1 = sl, sl + 1
1312 if i0 < 0 or i1 > num:
1313 raise IndexError('index out of bounds: 0 <= %d < %d <= %d' %
1314 (i0, i1, num))
1315 else:
1316 raise TypeError('expected slice or scalar')
1318 return i0, i1