Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_compressed.py: 11%
740 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-23 06:43 +0000
1"""Base class for sparse matrix formats using compressed storage."""
2__all__ = []
4from warnings import warn
5import operator
7import numpy as np
8from scipy._lib._util import _prune_array
10from ._base import _spbase, issparse, SparseEfficiencyWarning
11from ._data import _data_matrix, _minmax_mixin
12from . import _sparsetools
13from ._sparsetools import (get_csr_submatrix, csr_sample_offsets, csr_todense,
14 csr_sample_values, csr_row_index, csr_row_slice,
15 csr_column_index1, csr_column_index2)
16from ._index import IndexMixin
17from ._sputils import (upcast, upcast_char, to_native, isdense, isshape,
18 getdtype, isscalarlike, isintlike, downcast_intp_index, get_sum_dtype, check_shape,
19 is_pydata_spmatrix)
22class _cs_matrix(_data_matrix, _minmax_mixin, IndexMixin):
23 """base array/matrix class for compressed row- and column-oriented arrays/matrices"""
25 def __init__(self, arg1, shape=None, dtype=None, copy=False):
26 _data_matrix.__init__(self)
28 if issparse(arg1):
29 if arg1.format == self.format and copy:
30 arg1 = arg1.copy()
31 else:
32 arg1 = arg1.asformat(self.format)
33 self._set_self(arg1)
35 elif isinstance(arg1, tuple):
36 if isshape(arg1):
37 # It's a tuple of matrix dimensions (M, N)
38 # create empty matrix
39 self._shape = check_shape(arg1)
40 M, N = self.shape
41 # Select index dtype large enough to pass array and
42 # scalar parameters to sparsetools
43 idx_dtype = self._get_index_dtype(maxval=max(M, N))
44 self.data = np.zeros(0, getdtype(dtype, default=float))
45 self.indices = np.zeros(0, idx_dtype)
46 self.indptr = np.zeros(self._swap((M, N))[0] + 1,
47 dtype=idx_dtype)
48 else:
49 if len(arg1) == 2:
50 # (data, ij) format
51 other = self.__class__(
52 self._coo_container(arg1, shape=shape, dtype=dtype)
53 )
54 self._set_self(other)
55 elif len(arg1) == 3:
56 # (data, indices, indptr) format
57 (data, indices, indptr) = arg1
59 # Select index dtype large enough to pass array and
60 # scalar parameters to sparsetools
61 maxval = None
62 if shape is not None:
63 maxval = max(shape)
64 idx_dtype = self._get_index_dtype((indices, indptr),
65 maxval=maxval,
66 check_contents=True)
68 self.indices = np.array(indices, copy=copy,
69 dtype=idx_dtype)
70 self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype)
71 self.data = np.array(data, copy=copy, dtype=dtype)
72 else:
73 raise ValueError("unrecognized {}_matrix "
74 "constructor usage".format(self.format))
76 else:
77 # must be dense
78 try:
79 arg1 = np.asarray(arg1)
80 except Exception as e:
81 raise ValueError("unrecognized {}_matrix constructor usage"
82 "".format(self.format)) from e
83 self._set_self(self.__class__(
84 self._coo_container(arg1, dtype=dtype)
85 ))
87 # Read matrix dimensions given, if any
88 if shape is not None:
89 self._shape = check_shape(shape)
90 else:
91 if self.shape is None:
92 # shape not already set, try to infer dimensions
93 try:
94 major_dim = len(self.indptr) - 1
95 minor_dim = self.indices.max() + 1
96 except Exception as e:
97 raise ValueError('unable to infer matrix dimensions') from e
98 else:
99 self._shape = check_shape(self._swap((major_dim,
100 minor_dim)))
102 if dtype is not None:
103 self.data = self.data.astype(dtype, copy=False)
105 self.check_format(full_check=False)
107 def _getnnz(self, axis=None):
108 if axis is None:
109 return int(self.indptr[-1])
110 else:
111 if axis < 0:
112 axis += 2
113 axis, _ = self._swap((axis, 1 - axis))
114 _, N = self._swap(self.shape)
115 if axis == 0:
116 return np.bincount(downcast_intp_index(self.indices),
117 minlength=N)
118 elif axis == 1:
119 return np.diff(self.indptr)
120 raise ValueError('axis out of bounds')
122 _getnnz.__doc__ = _spbase._getnnz.__doc__
124 def _set_self(self, other, copy=False):
125 """take the member variables of other and assign them to self"""
127 if copy:
128 other = other.copy()
130 self.data = other.data
131 self.indices = other.indices
132 self.indptr = other.indptr
133 self._shape = check_shape(other.shape)
135 def check_format(self, full_check=True):
136 """Check whether the array/matrix respects the CSR or CSC format.
138 Parameters
139 ----------
140 full_check : bool, optional
141 If `True`, run rigorous check, scanning arrays for valid values.
142 Note that activating those check might copy arrays for casting,
143 modifying indices and index pointers' inplace.
144 If `False`, run basic checks on attributes. O(1) operations.
145 Default is `True`.
146 """
147 # use _swap to determine proper bounds
148 major_name, minor_name = self._swap(('row', 'column'))
149 major_dim, minor_dim = self._swap(self.shape)
151 # index arrays should have integer data types
152 if self.indptr.dtype.kind != 'i':
153 warn("indptr array has non-integer dtype ({})"
154 "".format(self.indptr.dtype.name), stacklevel=3)
155 if self.indices.dtype.kind != 'i':
156 warn("indices array has non-integer dtype ({})"
157 "".format(self.indices.dtype.name), stacklevel=3)
159 # check array shapes
160 for x in [self.data.ndim, self.indices.ndim, self.indptr.ndim]:
161 if x != 1:
162 raise ValueError('data, indices, and indptr should be 1-D')
164 # check index pointer
165 if (len(self.indptr) != major_dim + 1):
166 raise ValueError("index pointer size ({}) should be ({})"
167 "".format(len(self.indptr), major_dim + 1))
168 if (self.indptr[0] != 0):
169 raise ValueError("index pointer should start with 0")
171 # check index and data arrays
172 if (len(self.indices) != len(self.data)):
173 raise ValueError("indices and data should have the same size")
174 if (self.indptr[-1] > len(self.indices)):
175 raise ValueError("Last value of index pointer should be less than "
176 "the size of index and data arrays")
178 self.prune()
180 if full_check:
181 # check format validity (more expensive)
182 if self.nnz > 0:
183 if self.indices.max() >= minor_dim:
184 raise ValueError("{} index values must be < {}"
185 "".format(minor_name, minor_dim))
186 if self.indices.min() < 0:
187 raise ValueError("{} index values must be >= 0"
188 "".format(minor_name))
189 if np.diff(self.indptr).min() < 0:
190 raise ValueError("index pointer values must form a "
191 "non-decreasing sequence")
193 idx_dtype = self._get_index_dtype((self.indptr, self.indices))
194 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
195 self.indices = np.asarray(self.indices, dtype=idx_dtype)
196 self.data = to_native(self.data)
198 # if not self.has_sorted_indices():
199 # warn('Indices were not in sorted order. Sorting indices.')
200 # self.sort_indices()
201 # assert(self.has_sorted_indices())
202 # TODO check for duplicates?
204 #######################
205 # Boolean comparisons #
206 #######################
208 def _scalar_binopt(self, other, op):
209 """Scalar version of self._binopt, for cases in which no new nonzeros
210 are added. Produces a new sparse array in canonical form.
211 """
212 self.sum_duplicates()
213 res = self._with_data(op(self.data, other), copy=True)
214 res.eliminate_zeros()
215 return res
217 def __eq__(self, other):
218 # Scalar other.
219 if isscalarlike(other):
220 if np.isnan(other):
221 return self.__class__(self.shape, dtype=np.bool_)
223 if other == 0:
224 warn("Comparing a sparse matrix with 0 using == is inefficient"
225 ", try using != instead.", SparseEfficiencyWarning,
226 stacklevel=3)
227 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
228 inv = self._scalar_binopt(other, operator.ne)
229 return all_true - inv
230 else:
231 return self._scalar_binopt(other, operator.eq)
232 # Dense other.
233 elif isdense(other):
234 return self.todense() == other
235 # Pydata sparse other.
236 elif is_pydata_spmatrix(other):
237 return NotImplemented
238 # Sparse other.
239 elif issparse(other):
240 warn("Comparing sparse matrices using == is inefficient, try using"
241 " != instead.", SparseEfficiencyWarning, stacklevel=3)
242 # TODO sparse broadcasting
243 if self.shape != other.shape:
244 return False
245 elif self.format != other.format:
246 other = other.asformat(self.format)
247 res = self._binopt(other, '_ne_')
248 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
249 return all_true - res
250 else:
251 return False
253 def __ne__(self, other):
254 # Scalar other.
255 if isscalarlike(other):
256 if np.isnan(other):
257 warn("Comparing a sparse matrix with nan using != is"
258 " inefficient", SparseEfficiencyWarning, stacklevel=3)
259 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
260 return all_true
261 elif other != 0:
262 warn("Comparing a sparse matrix with a nonzero scalar using !="
263 " is inefficient, try using == instead.",
264 SparseEfficiencyWarning, stacklevel=3)
265 all_true = self.__class__(np.ones(self.shape), dtype=np.bool_)
266 inv = self._scalar_binopt(other, operator.eq)
267 return all_true - inv
268 else:
269 return self._scalar_binopt(other, operator.ne)
270 # Dense other.
271 elif isdense(other):
272 return self.todense() != other
273 # Pydata sparse other.
274 elif is_pydata_spmatrix(other):
275 return NotImplemented
276 # Sparse other.
277 elif issparse(other):
278 # TODO sparse broadcasting
279 if self.shape != other.shape:
280 return True
281 elif self.format != other.format:
282 other = other.asformat(self.format)
283 return self._binopt(other, '_ne_')
284 else:
285 return True
287 def _inequality(self, other, op, op_name, bad_scalar_msg):
288 # Scalar other.
289 if isscalarlike(other):
290 if 0 == other and op_name in ('_le_', '_ge_'):
291 raise NotImplementedError(" >= and <= don't work with 0.")
292 elif op(0, other):
293 warn(bad_scalar_msg, SparseEfficiencyWarning)
294 other_arr = np.empty(self.shape, dtype=np.result_type(other))
295 other_arr.fill(other)
296 other_arr = self.__class__(other_arr)
297 return self._binopt(other_arr, op_name)
298 else:
299 return self._scalar_binopt(other, op)
300 # Dense other.
301 elif isdense(other):
302 return op(self.todense(), other)
303 # Sparse other.
304 elif issparse(other):
305 # TODO sparse broadcasting
306 if self.shape != other.shape:
307 raise ValueError("inconsistent shapes")
308 elif self.format != other.format:
309 other = other.asformat(self.format)
310 if op_name not in ('_ge_', '_le_'):
311 return self._binopt(other, op_name)
313 warn("Comparing sparse matrices using >= and <= is inefficient, "
314 "using <, >, or !=, instead.", SparseEfficiencyWarning)
315 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
316 res = self._binopt(other, '_gt_' if op_name == '_le_' else '_lt_')
317 return all_true - res
318 else:
319 raise ValueError("Operands could not be compared.")
321 def __lt__(self, other):
322 return self._inequality(other, operator.lt, '_lt_',
323 "Comparing a sparse matrix with a scalar "
324 "greater than zero using < is inefficient, "
325 "try using >= instead.")
327 def __gt__(self, other):
328 return self._inequality(other, operator.gt, '_gt_',
329 "Comparing a sparse matrix with a scalar "
330 "less than zero using > is inefficient, "
331 "try using <= instead.")
333 def __le__(self, other):
334 return self._inequality(other, operator.le, '_le_',
335 "Comparing a sparse matrix with a scalar "
336 "greater than zero using <= is inefficient, "
337 "try using > instead.")
339 def __ge__(self, other):
340 return self._inequality(other, operator.ge, '_ge_',
341 "Comparing a sparse matrix with a scalar "
342 "less than zero using >= is inefficient, "
343 "try using < instead.")
345 #################################
346 # Arithmetic operator overrides #
347 #################################
349 def _add_dense(self, other):
350 if other.shape != self.shape:
351 raise ValueError('Incompatible shapes ({} and {})'
352 .format(self.shape, other.shape))
353 dtype = upcast_char(self.dtype.char, other.dtype.char)
354 order = self._swap('CF')[0]
355 result = np.array(other, dtype=dtype, order=order, copy=True)
356 M, N = self._swap(self.shape)
357 y = result if result.flags.c_contiguous else result.T
358 csr_todense(M, N, self.indptr, self.indices, self.data, y)
359 return self._container(result, copy=False)
361 def _add_sparse(self, other):
362 return self._binopt(other, '_plus_')
364 def _sub_sparse(self, other):
365 return self._binopt(other, '_minus_')
367 def multiply(self, other):
368 """Point-wise multiplication by another array/matrix, vector, or
369 scalar.
370 """
371 # Scalar multiplication.
372 if isscalarlike(other):
373 return self._mul_scalar(other)
374 # Sparse matrix or vector.
375 if issparse(other):
376 if self.shape == other.shape:
377 other = self.__class__(other)
378 return self._binopt(other, '_elmul_')
379 # Single element.
380 elif other.shape == (1, 1):
381 return self._mul_scalar(other.toarray()[0, 0])
382 elif self.shape == (1, 1):
383 return other._mul_scalar(self.toarray()[0, 0])
384 # A row times a column.
385 elif self.shape[1] == 1 and other.shape[0] == 1:
386 return self._mul_sparse_matrix(other.tocsc())
387 elif self.shape[0] == 1 and other.shape[1] == 1:
388 return other._mul_sparse_matrix(self.tocsc())
389 # Row vector times matrix. other is a row.
390 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
391 other = self._dia_container(
392 (other.toarray().ravel(), [0]),
393 shape=(other.shape[1], other.shape[1])
394 )
395 return self._mul_sparse_matrix(other)
396 # self is a row.
397 elif self.shape[0] == 1 and self.shape[1] == other.shape[1]:
398 copy = self._dia_container(
399 (self.toarray().ravel(), [0]),
400 shape=(self.shape[1], self.shape[1])
401 )
402 return other._mul_sparse_matrix(copy)
403 # Column vector times matrix. other is a column.
404 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
405 other = self._dia_container(
406 (other.toarray().ravel(), [0]),
407 shape=(other.shape[0], other.shape[0])
408 )
409 return other._mul_sparse_matrix(self)
410 # self is a column.
411 elif self.shape[1] == 1 and self.shape[0] == other.shape[0]:
412 copy = self._dia_container(
413 (self.toarray().ravel(), [0]),
414 shape=(self.shape[0], self.shape[0])
415 )
416 return copy._mul_sparse_matrix(other)
417 else:
418 raise ValueError("inconsistent shapes")
420 # Assume other is a dense matrix/array, which produces a single-item
421 # object array if other isn't convertible to ndarray.
422 other = np.atleast_2d(other)
424 if other.ndim != 2:
425 return np.multiply(self.toarray(), other)
426 # Single element / wrapped object.
427 if other.size == 1:
428 return self._mul_scalar(other.flat[0])
429 # Fast case for trivial sparse matrix.
430 elif self.shape == (1, 1):
431 return np.multiply(self.toarray()[0, 0], other)
433 ret = self.tocoo()
434 # Matching shapes.
435 if self.shape == other.shape:
436 data = np.multiply(ret.data, other[ret.row, ret.col])
437 # Sparse row vector times...
438 elif self.shape[0] == 1:
439 if other.shape[1] == 1: # Dense column vector.
440 data = np.multiply(ret.data, other)
441 elif other.shape[1] == self.shape[1]: # Dense matrix.
442 data = np.multiply(ret.data, other[:, ret.col])
443 else:
444 raise ValueError("inconsistent shapes")
445 row = np.repeat(np.arange(other.shape[0]), len(ret.row))
446 col = np.tile(ret.col, other.shape[0])
447 return self._coo_container(
448 (data.view(np.ndarray).ravel(), (row, col)),
449 shape=(other.shape[0], self.shape[1]),
450 copy=False
451 )
452 # Sparse column vector times...
453 elif self.shape[1] == 1:
454 if other.shape[0] == 1: # Dense row vector.
455 data = np.multiply(ret.data[:, None], other)
456 elif other.shape[0] == self.shape[0]: # Dense matrix.
457 data = np.multiply(ret.data[:, None], other[ret.row])
458 else:
459 raise ValueError("inconsistent shapes")
460 row = np.repeat(ret.row, other.shape[1])
461 col = np.tile(np.arange(other.shape[1]), len(ret.col))
462 return self._coo_container(
463 (data.view(np.ndarray).ravel(), (row, col)),
464 shape=(self.shape[0], other.shape[1]),
465 copy=False
466 )
467 # Sparse matrix times dense row vector.
468 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
469 data = np.multiply(ret.data, other[:, ret.col].ravel())
470 # Sparse matrix times dense column vector.
471 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
472 data = np.multiply(ret.data, other[ret.row].ravel())
473 else:
474 raise ValueError("inconsistent shapes")
475 ret.data = data.view(np.ndarray).ravel()
476 return ret
478 ###########################
479 # Multiplication handlers #
480 ###########################
482 def _mul_vector(self, other):
483 M, N = self.shape
485 # output array
486 result = np.zeros(M, dtype=upcast_char(self.dtype.char,
487 other.dtype.char))
489 # csr_matvec or csc_matvec
490 fn = getattr(_sparsetools, self.format + '_matvec')
491 fn(M, N, self.indptr, self.indices, self.data, other, result)
493 return result
495 def _mul_multivector(self, other):
496 M, N = self.shape
497 n_vecs = other.shape[1] # number of column vectors
499 result = np.zeros((M, n_vecs),
500 dtype=upcast_char(self.dtype.char, other.dtype.char))
502 # csr_matvecs or csc_matvecs
503 fn = getattr(_sparsetools, self.format + '_matvecs')
504 fn(M, N, n_vecs, self.indptr, self.indices, self.data,
505 other.ravel(), result.ravel())
507 return result
509 def _mul_sparse_matrix(self, other):
510 M, K1 = self.shape
511 K2, N = other.shape
513 major_axis = self._swap((M, N))[0]
514 other = self.__class__(other) # convert to this format
516 idx_dtype = self._get_index_dtype((self.indptr, self.indices,
517 other.indptr, other.indices))
519 fn = getattr(_sparsetools, self.format + '_matmat_maxnnz')
520 nnz = fn(M, N,
521 np.asarray(self.indptr, dtype=idx_dtype),
522 np.asarray(self.indices, dtype=idx_dtype),
523 np.asarray(other.indptr, dtype=idx_dtype),
524 np.asarray(other.indices, dtype=idx_dtype))
526 idx_dtype = self._get_index_dtype((self.indptr, self.indices,
527 other.indptr, other.indices),
528 maxval=nnz)
530 indptr = np.empty(major_axis + 1, dtype=idx_dtype)
531 indices = np.empty(nnz, dtype=idx_dtype)
532 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
534 fn = getattr(_sparsetools, self.format + '_matmat')
535 fn(M, N, np.asarray(self.indptr, dtype=idx_dtype),
536 np.asarray(self.indices, dtype=idx_dtype),
537 self.data,
538 np.asarray(other.indptr, dtype=idx_dtype),
539 np.asarray(other.indices, dtype=idx_dtype),
540 other.data,
541 indptr, indices, data)
543 return self.__class__((data, indices, indptr), shape=(M, N))
545 def diagonal(self, k=0):
546 rows, cols = self.shape
547 if k <= -rows or k >= cols:
548 return np.empty(0, dtype=self.data.dtype)
549 fn = getattr(_sparsetools, self.format + "_diagonal")
550 y = np.empty(min(rows + min(k, 0), cols - max(k, 0)),
551 dtype=upcast(self.dtype))
552 fn(k, self.shape[0], self.shape[1], self.indptr, self.indices,
553 self.data, y)
554 return y
556 diagonal.__doc__ = _spbase.diagonal.__doc__
558 #####################
559 # Other binary ops #
560 #####################
562 def _maximum_minimum(self, other, npop, op_name, dense_check):
563 if isscalarlike(other):
564 if dense_check(other):
565 warn("Taking maximum (minimum) with > 0 (< 0) number results"
566 " to a dense matrix.", SparseEfficiencyWarning,
567 stacklevel=3)
568 other_arr = np.empty(self.shape, dtype=np.asarray(other).dtype)
569 other_arr.fill(other)
570 other_arr = self.__class__(other_arr)
571 return self._binopt(other_arr, op_name)
572 else:
573 self.sum_duplicates()
574 new_data = npop(self.data, np.asarray(other))
575 mat = self.__class__((new_data, self.indices, self.indptr),
576 dtype=new_data.dtype, shape=self.shape)
577 return mat
578 elif isdense(other):
579 return npop(self.todense(), other)
580 elif issparse(other):
581 return self._binopt(other, op_name)
582 else:
583 raise ValueError("Operands not compatible.")
585 def maximum(self, other):
586 return self._maximum_minimum(other, np.maximum,
587 '_maximum_', lambda x: np.asarray(x) > 0)
589 maximum.__doc__ = _spbase.maximum.__doc__
591 def minimum(self, other):
592 return self._maximum_minimum(other, np.minimum,
593 '_minimum_', lambda x: np.asarray(x) < 0)
595 minimum.__doc__ = _spbase.minimum.__doc__
597 #####################
598 # Reduce operations #
599 #####################
601 def sum(self, axis=None, dtype=None, out=None):
602 """Sum the array/matrix over the given axis. If the axis is None, sum
603 over both rows and columns, returning a scalar.
604 """
605 # The _spbase base class already does axis=0 and axis=1 efficiently
606 # so we only do the case axis=None here
607 if (not hasattr(self, 'blocksize') and
608 axis in self._swap(((1, -1), (0, 2)))[0]):
609 # faster than multiplication for large minor axis in CSC/CSR
610 res_dtype = get_sum_dtype(self.dtype)
611 ret = np.zeros(len(self.indptr) - 1, dtype=res_dtype)
613 major_index, value = self._minor_reduce(np.add)
614 ret[major_index] = value
615 ret = self._ascontainer(ret)
616 if axis % 2 == 1:
617 ret = ret.T
619 if out is not None and out.shape != ret.shape:
620 raise ValueError('dimensions do not match')
622 return ret.sum(axis=(), dtype=dtype, out=out)
623 # _spbase will handle the remaining situations when axis
624 # is in {None, -1, 0, 1}
625 else:
626 return _spbase.sum(self, axis=axis, dtype=dtype, out=out)
628 sum.__doc__ = _spbase.sum.__doc__
630 def _minor_reduce(self, ufunc, data=None):
631 """Reduce nonzeros with a ufunc over the minor axis when non-empty
633 Can be applied to a function of self.data by supplying data parameter.
635 Warning: this does not call sum_duplicates()
637 Returns
638 -------
639 major_index : array of ints
640 Major indices where nonzero
642 value : array of self.dtype
643 Reduce result for nonzeros in each major_index
644 """
645 if data is None:
646 data = self.data
647 major_index = np.flatnonzero(np.diff(self.indptr))
648 value = ufunc.reduceat(data,
649 downcast_intp_index(self.indptr[major_index]))
650 return major_index, value
652 #######################
653 # Getting and Setting #
654 #######################
656 def _get_intXint(self, row, col):
657 M, N = self._swap(self.shape)
658 major, minor = self._swap((row, col))
659 indptr, indices, data = get_csr_submatrix(
660 M, N, self.indptr, self.indices, self.data,
661 major, major + 1, minor, minor + 1)
662 return data.sum(dtype=self.dtype)
664 def _get_sliceXslice(self, row, col):
665 major, minor = self._swap((row, col))
666 if major.step in (1, None) and minor.step in (1, None):
667 return self._get_submatrix(major, minor, copy=True)
668 return self._major_slice(major)._minor_slice(minor)
670 def _get_arrayXarray(self, row, col):
671 # inner indexing
672 idx_dtype = self.indices.dtype
673 M, N = self._swap(self.shape)
674 major, minor = self._swap((row, col))
675 major = np.asarray(major, dtype=idx_dtype)
676 minor = np.asarray(minor, dtype=idx_dtype)
678 val = np.empty(major.size, dtype=self.dtype)
679 csr_sample_values(M, N, self.indptr, self.indices, self.data,
680 major.size, major.ravel(), minor.ravel(), val)
681 if major.ndim == 1:
682 return self._ascontainer(val)
683 return self.__class__(val.reshape(major.shape))
685 def _get_columnXarray(self, row, col):
686 # outer indexing
687 major, minor = self._swap((row, col))
688 return self._major_index_fancy(major)._minor_index_fancy(minor)
690 def _major_index_fancy(self, idx):
691 """Index along the major axis where idx is an array of ints.
692 """
693 idx_dtype = self.indices.dtype
694 indices = np.asarray(idx, dtype=idx_dtype).ravel()
696 _, N = self._swap(self.shape)
697 M = len(indices)
698 new_shape = self._swap((M, N))
699 if M == 0:
700 return self.__class__(new_shape, dtype=self.dtype)
702 row_nnz = self.indptr[indices + 1] - self.indptr[indices]
703 idx_dtype = self.indices.dtype
704 res_indptr = np.zeros(M+1, dtype=idx_dtype)
705 np.cumsum(row_nnz, out=res_indptr[1:])
707 nnz = res_indptr[-1]
708 res_indices = np.empty(nnz, dtype=idx_dtype)
709 res_data = np.empty(nnz, dtype=self.dtype)
710 csr_row_index(M, indices, self.indptr, self.indices, self.data,
711 res_indices, res_data)
713 return self.__class__((res_data, res_indices, res_indptr),
714 shape=new_shape, copy=False)
716 def _major_slice(self, idx, copy=False):
717 """Index along the major axis where idx is a slice object.
718 """
719 if idx == slice(None):
720 return self.copy() if copy else self
722 M, N = self._swap(self.shape)
723 start, stop, step = idx.indices(M)
724 M = len(range(start, stop, step))
725 new_shape = self._swap((M, N))
726 if M == 0:
727 return self.__class__(new_shape, dtype=self.dtype)
729 # Work out what slices are needed for `row_nnz`
730 # start,stop can be -1, only if step is negative
731 start0, stop0 = start, stop
732 if stop == -1 and start >= 0:
733 stop0 = None
734 start1, stop1 = start + 1, stop + 1
736 row_nnz = self.indptr[start1:stop1:step] - \
737 self.indptr[start0:stop0:step]
738 idx_dtype = self.indices.dtype
739 res_indptr = np.zeros(M+1, dtype=idx_dtype)
740 np.cumsum(row_nnz, out=res_indptr[1:])
742 if step == 1:
743 all_idx = slice(self.indptr[start], self.indptr[stop])
744 res_indices = np.array(self.indices[all_idx], copy=copy)
745 res_data = np.array(self.data[all_idx], copy=copy)
746 else:
747 nnz = res_indptr[-1]
748 res_indices = np.empty(nnz, dtype=idx_dtype)
749 res_data = np.empty(nnz, dtype=self.dtype)
750 csr_row_slice(start, stop, step, self.indptr, self.indices,
751 self.data, res_indices, res_data)
753 return self.__class__((res_data, res_indices, res_indptr),
754 shape=new_shape, copy=False)
756 def _minor_index_fancy(self, idx):
757 """Index along the minor axis where idx is an array of ints.
758 """
759 idx_dtype = self.indices.dtype
760 idx = np.asarray(idx, dtype=idx_dtype).ravel()
762 M, N = self._swap(self.shape)
763 k = len(idx)
764 new_shape = self._swap((M, k))
765 if k == 0:
766 return self.__class__(new_shape, dtype=self.dtype)
768 # pass 1: count idx entries and compute new indptr
769 col_offsets = np.zeros(N, dtype=idx_dtype)
770 res_indptr = np.empty_like(self.indptr)
771 csr_column_index1(k, idx, M, N, self.indptr, self.indices,
772 col_offsets, res_indptr)
774 # pass 2: copy indices/data for selected idxs
775 col_order = np.argsort(idx).astype(idx_dtype, copy=False)
776 nnz = res_indptr[-1]
777 res_indices = np.empty(nnz, dtype=idx_dtype)
778 res_data = np.empty(nnz, dtype=self.dtype)
779 csr_column_index2(col_order, col_offsets, len(self.indices),
780 self.indices, self.data, res_indices, res_data)
781 return self.__class__((res_data, res_indices, res_indptr),
782 shape=new_shape, copy=False)
784 def _minor_slice(self, idx, copy=False):
785 """Index along the minor axis where idx is a slice object.
786 """
787 if idx == slice(None):
788 return self.copy() if copy else self
790 M, N = self._swap(self.shape)
791 start, stop, step = idx.indices(N)
792 N = len(range(start, stop, step))
793 if N == 0:
794 return self.__class__(self._swap((M, N)), dtype=self.dtype)
795 if step == 1:
796 return self._get_submatrix(minor=idx, copy=copy)
797 # TODO: don't fall back to fancy indexing here
798 return self._minor_index_fancy(np.arange(start, stop, step))
800 def _get_submatrix(self, major=None, minor=None, copy=False):
801 """Return a submatrix of this matrix.
803 major, minor: None, int, or slice with step 1
804 """
805 M, N = self._swap(self.shape)
806 i0, i1 = _process_slice(major, M)
807 j0, j1 = _process_slice(minor, N)
809 if i0 == 0 and j0 == 0 and i1 == M and j1 == N:
810 return self.copy() if copy else self
812 indptr, indices, data = get_csr_submatrix(
813 M, N, self.indptr, self.indices, self.data, i0, i1, j0, j1)
815 shape = self._swap((i1 - i0, j1 - j0))
816 return self.__class__((data, indices, indptr), shape=shape,
817 dtype=self.dtype, copy=False)
819 def _set_intXint(self, row, col, x):
820 i, j = self._swap((row, col))
821 self._set_many(i, j, x)
823 def _set_arrayXarray(self, row, col, x):
824 i, j = self._swap((row, col))
825 self._set_many(i, j, x)
827 def _set_arrayXarray_sparse(self, row, col, x):
828 # clear entries that will be overwritten
829 self._zero_many(*self._swap((row, col)))
831 M, N = row.shape # matches col.shape
832 broadcast_row = M != 1 and x.shape[0] == 1
833 broadcast_col = N != 1 and x.shape[1] == 1
834 r, c = x.row, x.col
836 x = np.asarray(x.data, dtype=self.dtype)
837 if x.size == 0:
838 return
840 if broadcast_row:
841 r = np.repeat(np.arange(M), len(r))
842 c = np.tile(c, M)
843 x = np.tile(x, M)
844 if broadcast_col:
845 r = np.repeat(r, N)
846 c = np.tile(np.arange(N), len(c))
847 x = np.repeat(x, N)
848 # only assign entries in the new sparsity structure
849 i, j = self._swap((row[r, c], col[r, c]))
850 self._set_many(i, j, x)
852 def _setdiag(self, values, k):
853 if 0 in self.shape:
854 return
856 M, N = self.shape
857 broadcast = (values.ndim == 0)
859 if k < 0:
860 if broadcast:
861 max_index = min(M + k, N)
862 else:
863 max_index = min(M + k, N, len(values))
864 i = np.arange(max_index, dtype=self.indices.dtype)
865 j = np.arange(max_index, dtype=self.indices.dtype)
866 i -= k
868 else:
869 if broadcast:
870 max_index = min(M, N - k)
871 else:
872 max_index = min(M, N - k, len(values))
873 i = np.arange(max_index, dtype=self.indices.dtype)
874 j = np.arange(max_index, dtype=self.indices.dtype)
875 j += k
877 if not broadcast:
878 values = values[:len(i)]
880 self[i, j] = values
882 def _prepare_indices(self, i, j):
883 M, N = self._swap(self.shape)
885 def check_bounds(indices, bound):
886 idx = indices.max()
887 if idx >= bound:
888 raise IndexError('index (%d) out of range (>= %d)' %
889 (idx, bound))
890 idx = indices.min()
891 if idx < -bound:
892 raise IndexError('index (%d) out of range (< -%d)' %
893 (idx, bound))
895 i = np.array(i, dtype=self.indices.dtype, copy=False, ndmin=1).ravel()
896 j = np.array(j, dtype=self.indices.dtype, copy=False, ndmin=1).ravel()
897 check_bounds(i, M)
898 check_bounds(j, N)
899 return i, j, M, N
901 def _set_many(self, i, j, x):
902 """Sets value at each (i, j) to x
904 Here (i,j) index major and minor respectively, and must not contain
905 duplicate entries.
906 """
907 i, j, M, N = self._prepare_indices(i, j)
908 x = np.array(x, dtype=self.dtype, copy=False, ndmin=1).ravel()
910 n_samples = x.size
911 offsets = np.empty(n_samples, dtype=self.indices.dtype)
912 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
913 i, j, offsets)
914 if ret == 1:
915 # rinse and repeat
916 self.sum_duplicates()
917 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
918 i, j, offsets)
920 if -1 not in offsets:
921 # only affects existing non-zero cells
922 self.data[offsets] = x
923 return
925 else:
926 warn("Changing the sparsity structure of a {}_matrix is expensive."
927 " lil_matrix is more efficient.".format(self.format),
928 SparseEfficiencyWarning, stacklevel=3)
929 # replace where possible
930 mask = offsets > -1
931 self.data[offsets[mask]] = x[mask]
932 # only insertions remain
933 mask = ~mask
934 i = i[mask]
935 i[i < 0] += M
936 j = j[mask]
937 j[j < 0] += N
938 self._insert_many(i, j, x[mask])
940 def _zero_many(self, i, j):
941 """Sets value at each (i, j) to zero, preserving sparsity structure.
943 Here (i,j) index major and minor respectively.
944 """
945 i, j, M, N = self._prepare_indices(i, j)
947 n_samples = len(i)
948 offsets = np.empty(n_samples, dtype=self.indices.dtype)
949 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
950 i, j, offsets)
951 if ret == 1:
952 # rinse and repeat
953 self.sum_duplicates()
954 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
955 i, j, offsets)
957 # only assign zeros to the existing sparsity structure
958 self.data[offsets[offsets > -1]] = 0
960 def _insert_many(self, i, j, x):
961 """Inserts new nonzero at each (i, j) with value x
963 Here (i,j) index major and minor respectively.
964 i, j and x must be non-empty, 1d arrays.
965 Inserts each major group (e.g. all entries per row) at a time.
966 Maintains has_sorted_indices property.
967 Modifies i, j, x in place.
968 """
969 order = np.argsort(i, kind='mergesort') # stable for duplicates
970 i = i.take(order, mode='clip')
971 j = j.take(order, mode='clip')
972 x = x.take(order, mode='clip')
974 do_sort = self.has_sorted_indices
976 # Update index data type
977 idx_dtype = self._get_index_dtype((self.indices, self.indptr),
978 maxval=(self.indptr[-1] + x.size))
979 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
980 self.indices = np.asarray(self.indices, dtype=idx_dtype)
981 i = np.asarray(i, dtype=idx_dtype)
982 j = np.asarray(j, dtype=idx_dtype)
984 # Collate old and new in chunks by major index
985 indices_parts = []
986 data_parts = []
987 ui, ui_indptr = np.unique(i, return_index=True)
988 ui_indptr = np.append(ui_indptr, len(j))
989 new_nnzs = np.diff(ui_indptr)
990 prev = 0
991 for c, (ii, js, je) in enumerate(zip(ui, ui_indptr, ui_indptr[1:])):
992 # old entries
993 start = self.indptr[prev]
994 stop = self.indptr[ii]
995 indices_parts.append(self.indices[start:stop])
996 data_parts.append(self.data[start:stop])
998 # handle duplicate j: keep last setting
999 uj, uj_indptr = np.unique(j[js:je][::-1], return_index=True)
1000 if len(uj) == je - js:
1001 indices_parts.append(j[js:je])
1002 data_parts.append(x[js:je])
1003 else:
1004 indices_parts.append(j[js:je][::-1][uj_indptr])
1005 data_parts.append(x[js:je][::-1][uj_indptr])
1006 new_nnzs[c] = len(uj)
1008 prev = ii
1010 # remaining old entries
1011 start = self.indptr[ii]
1012 indices_parts.append(self.indices[start:])
1013 data_parts.append(self.data[start:])
1015 # update attributes
1016 self.indices = np.concatenate(indices_parts)
1017 self.data = np.concatenate(data_parts)
1018 nnzs = np.empty(self.indptr.shape, dtype=idx_dtype)
1019 nnzs[0] = idx_dtype(0)
1020 indptr_diff = np.diff(self.indptr)
1021 indptr_diff[ui] += new_nnzs
1022 nnzs[1:] = indptr_diff
1023 self.indptr = np.cumsum(nnzs, out=nnzs)
1025 if do_sort:
1026 # TODO: only sort where necessary
1027 self.has_sorted_indices = False
1028 self.sort_indices()
1030 self.check_format(full_check=False)
1032 ######################
1033 # Conversion methods #
1034 ######################
1036 def tocoo(self, copy=True):
1037 major_dim, minor_dim = self._swap(self.shape)
1038 minor_indices = self.indices
1039 major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
1040 _sparsetools.expandptr(major_dim, self.indptr, major_indices)
1041 row, col = self._swap((major_indices, minor_indices))
1043 return self._coo_container(
1044 (self.data, (row, col)), self.shape, copy=copy,
1045 dtype=self.dtype
1046 )
1048 tocoo.__doc__ = _spbase.tocoo.__doc__
1050 def toarray(self, order=None, out=None):
1051 if out is None and order is None:
1052 order = self._swap('cf')[0]
1053 out = self._process_toarray_args(order, out)
1054 if not (out.flags.c_contiguous or out.flags.f_contiguous):
1055 raise ValueError('Output array must be C or F contiguous')
1056 # align ideal order with output array order
1057 if out.flags.c_contiguous:
1058 x = self.tocsr()
1059 y = out
1060 else:
1061 x = self.tocsc()
1062 y = out.T
1063 M, N = x._swap(x.shape)
1064 csr_todense(M, N, x.indptr, x.indices, x.data, y)
1065 return out
1067 toarray.__doc__ = _spbase.toarray.__doc__
1069 ##############################################################
1070 # methods that examine or modify the internal data structure #
1071 ##############################################################
1073 def eliminate_zeros(self):
1074 """Remove zero entries from the array/matrix
1076 This is an *in place* operation.
1077 """
1078 M, N = self._swap(self.shape)
1079 _sparsetools.csr_eliminate_zeros(M, N, self.indptr, self.indices,
1080 self.data)
1081 self.prune() # nnz may have changed
1083 @property
1084 def has_canonical_format(self) -> bool:
1085 """Whether the array/matrix has sorted indices and no duplicates
1087 Returns
1088 - True: if the above applies
1089 - False: otherwise
1091 has_canonical_format implies has_sorted_indices, so if the latter flag
1092 is False, so will the former be; if the former is found True, the
1093 latter flag is also set.
1094 """
1095 # first check to see if result was cached
1096 if not getattr(self, '_has_sorted_indices', True):
1097 # not sorted => not canonical
1098 self._has_canonical_format = False
1099 elif not hasattr(self, '_has_canonical_format'):
1100 self.has_canonical_format = bool(
1101 _sparsetools.csr_has_canonical_format(
1102 len(self.indptr) - 1, self.indptr, self.indices)
1103 )
1104 return self._has_canonical_format
1106 @has_canonical_format.setter
1107 def has_canonical_format(self, val: bool):
1108 self._has_canonical_format = bool(val)
1109 if val:
1110 self.has_sorted_indices = True
1112 def sum_duplicates(self):
1113 """Eliminate duplicate entries by adding them together
1115 This is an *in place* operation.
1116 """
1117 if self.has_canonical_format:
1118 return
1119 self.sort_indices()
1121 M, N = self._swap(self.shape)
1122 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices,
1123 self.data)
1125 self.prune() # nnz may have changed
1126 self.has_canonical_format = True
1128 @property
1129 def has_sorted_indices(self) -> bool:
1130 """Whether the indices are sorted
1132 Returns
1133 - True: if the indices of the array/matrix are in sorted order
1134 - False: otherwise
1135 """
1136 # first check to see if result was cached
1137 if not hasattr(self, '_has_sorted_indices'):
1138 self._has_sorted_indices = bool(
1139 _sparsetools.csr_has_sorted_indices(
1140 len(self.indptr) - 1, self.indptr, self.indices)
1141 )
1142 return self._has_sorted_indices
1144 @has_sorted_indices.setter
1145 def has_sorted_indices(self, val: bool):
1146 self._has_sorted_indices = bool(val)
1149 def sorted_indices(self):
1150 """Return a copy of this array/matrix with sorted indices
1151 """
1152 A = self.copy()
1153 A.sort_indices()
1154 return A
1156 # an alternative that has linear complexity is the following
1157 # although the previous option is typically faster
1158 # return self.toother().toother()
1160 def sort_indices(self):
1161 """Sort the indices of this array/matrix *in place*
1162 """
1164 if not self.has_sorted_indices:
1165 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr,
1166 self.indices, self.data)
1167 self.has_sorted_indices = True
1169 def prune(self):
1170 """Remove empty space after all non-zero elements.
1171 """
1172 major_dim = self._swap(self.shape)[0]
1174 if len(self.indptr) != major_dim + 1:
1175 raise ValueError('index pointer has invalid length')
1176 if len(self.indices) < self.nnz:
1177 raise ValueError('indices array has fewer than nnz elements')
1178 if len(self.data) < self.nnz:
1179 raise ValueError('data array has fewer than nnz elements')
1181 self.indices = _prune_array(self.indices[:self.nnz])
1182 self.data = _prune_array(self.data[:self.nnz])
1184 def resize(self, *shape):
1185 shape = check_shape(shape)
1186 if hasattr(self, 'blocksize'):
1187 bm, bn = self.blocksize
1188 new_M, rm = divmod(shape[0], bm)
1189 new_N, rn = divmod(shape[1], bn)
1190 if rm or rn:
1191 raise ValueError("shape must be divisible into {} blocks. "
1192 "Got {}".format(self.blocksize, shape))
1193 M, N = self.shape[0] // bm, self.shape[1] // bn
1194 else:
1195 new_M, new_N = self._swap(shape)
1196 M, N = self._swap(self.shape)
1198 if new_M < M:
1199 self.indices = self.indices[:self.indptr[new_M]]
1200 self.data = self.data[:self.indptr[new_M]]
1201 self.indptr = self.indptr[:new_M + 1]
1202 elif new_M > M:
1203 self.indptr = np.resize(self.indptr, new_M + 1)
1204 self.indptr[M + 1:].fill(self.indptr[M])
1206 if new_N < N:
1207 mask = self.indices < new_N
1208 if not np.all(mask):
1209 self.indices = self.indices[mask]
1210 self.data = self.data[mask]
1211 major_index, val = self._minor_reduce(np.add, mask)
1212 self.indptr.fill(0)
1213 self.indptr[1:][major_index] = val
1214 np.cumsum(self.indptr, out=self.indptr)
1216 self._shape = shape
1218 resize.__doc__ = _spbase.resize.__doc__
1220 ###################
1221 # utility methods #
1222 ###################
1224 # needed by _data_matrix
1225 def _with_data(self, data, copy=True):
1226 """Returns a matrix with the same sparsity structure as self,
1227 but with different data. By default the structure arrays
1228 (i.e. .indptr and .indices) are copied.
1229 """
1230 if copy:
1231 return self.__class__((data, self.indices.copy(),
1232 self.indptr.copy()),
1233 shape=self.shape,
1234 dtype=data.dtype)
1235 else:
1236 return self.__class__((data, self.indices, self.indptr),
1237 shape=self.shape, dtype=data.dtype)
1239 def _binopt(self, other, op):
1240 """apply the binary operation fn to two sparse matrices."""
1241 other = self.__class__(other)
1243 # e.g. csr_plus_csr, csr_minus_csr, etc.
1244 fn = getattr(_sparsetools, self.format + op + self.format)
1246 maxnnz = self.nnz + other.nnz
1247 idx_dtype = self._get_index_dtype((self.indptr, self.indices,
1248 other.indptr, other.indices),
1249 maxval=maxnnz)
1250 indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
1251 indices = np.empty(maxnnz, dtype=idx_dtype)
1253 bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
1254 if op in bool_ops:
1255 data = np.empty(maxnnz, dtype=np.bool_)
1256 else:
1257 data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype))
1259 fn(self.shape[0], self.shape[1],
1260 np.asarray(self.indptr, dtype=idx_dtype),
1261 np.asarray(self.indices, dtype=idx_dtype),
1262 self.data,
1263 np.asarray(other.indptr, dtype=idx_dtype),
1264 np.asarray(other.indices, dtype=idx_dtype),
1265 other.data,
1266 indptr, indices, data)
1268 A = self.__class__((data, indices, indptr), shape=self.shape)
1269 A.prune()
1271 return A
1273 def _divide_sparse(self, other):
1274 """
1275 Divide this matrix by a second sparse matrix.
1276 """
1277 if other.shape != self.shape:
1278 raise ValueError('inconsistent shapes')
1280 r = self._binopt(other, '_eldiv_')
1282 if np.issubdtype(r.dtype, np.inexact):
1283 # Eldiv leaves entries outside the combined sparsity
1284 # pattern empty, so they must be filled manually.
1285 # Everything outside of other's sparsity is NaN, and everything
1286 # inside it is either zero or defined by eldiv.
1287 out = np.empty(self.shape, dtype=self.dtype)
1288 out.fill(np.nan)
1289 row, col = other.nonzero()
1290 out[row, col] = 0
1291 r = r.tocoo()
1292 out[r.row, r.col] = r.data
1293 out = self._container(out)
1294 else:
1295 # integers types go with nan <-> 0
1296 out = r
1298 return out
1301def _process_slice(sl, num):
1302 if sl is None:
1303 i0, i1 = 0, num
1304 elif isinstance(sl, slice):
1305 i0, i1, stride = sl.indices(num)
1306 if stride != 1:
1307 raise ValueError('slicing with step != 1 not supported')
1308 i0 = min(i0, i1) # give an empty slice when i0 > i1
1309 elif isintlike(sl):
1310 if sl < 0:
1311 sl += num
1312 i0, i1 = sl, sl + 1
1313 if i0 < 0 or i1 > num:
1314 raise IndexError('index out of bounds: 0 <= %d < %d <= %d' %
1315 (i0, i1, num))
1316 else:
1317 raise TypeError('expected slice or scalar')
1319 return i0, i1