Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/scipy/sparse/_compressed.py: 11%
745 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:37 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-14 06:37 +0000
1"""Base class for sparse matrix formats using compressed storage."""
2__all__ = []
4from warnings import warn
5import operator
7import numpy as np
8from scipy._lib._util import _prune_array
10from ._base import _spbase, issparse, SparseEfficiencyWarning
11from ._data import _data_matrix, _minmax_mixin
12from . import _sparsetools
13from ._sparsetools import (get_csr_submatrix, csr_sample_offsets, csr_todense,
14 csr_sample_values, csr_row_index, csr_row_slice,
15 csr_column_index1, csr_column_index2)
16from ._index import IndexMixin
17from ._sputils import (upcast, upcast_char, to_native, isdense, isshape,
18 getdtype, isscalarlike, isintlike, downcast_intp_index,
19 get_sum_dtype, check_shape, is_pydata_spmatrix)
22class _cs_matrix(_data_matrix, _minmax_mixin, IndexMixin):
23 """
24 base array/matrix class for compressed row- and column-oriented arrays/matrices
25 """
27 def __init__(self, arg1, shape=None, dtype=None, copy=False):
28 _data_matrix.__init__(self)
30 if issparse(arg1):
31 if arg1.format == self.format and copy:
32 arg1 = arg1.copy()
33 else:
34 arg1 = arg1.asformat(self.format)
35 self._set_self(arg1)
37 elif isinstance(arg1, tuple):
38 if isshape(arg1):
39 # It's a tuple of matrix dimensions (M, N)
40 # create empty matrix
41 self._shape = check_shape(arg1)
42 M, N = self.shape
43 # Select index dtype large enough to pass array and
44 # scalar parameters to sparsetools
45 idx_dtype = self._get_index_dtype(maxval=max(M, N))
46 self.data = np.zeros(0, getdtype(dtype, default=float))
47 self.indices = np.zeros(0, idx_dtype)
48 self.indptr = np.zeros(self._swap((M, N))[0] + 1,
49 dtype=idx_dtype)
50 else:
51 if len(arg1) == 2:
52 # (data, ij) format
53 other = self.__class__(
54 self._coo_container(arg1, shape=shape, dtype=dtype)
55 )
56 self._set_self(other)
57 elif len(arg1) == 3:
58 # (data, indices, indptr) format
59 (data, indices, indptr) = arg1
61 # Select index dtype large enough to pass array and
62 # scalar parameters to sparsetools
63 maxval = None
64 if shape is not None:
65 maxval = max(shape)
66 idx_dtype = self._get_index_dtype((indices, indptr),
67 maxval=maxval,
68 check_contents=True)
70 self.indices = np.array(indices, copy=copy,
71 dtype=idx_dtype)
72 self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype)
73 self.data = np.array(data, copy=copy, dtype=dtype)
74 else:
75 raise ValueError(f"unrecognized {self.format}_matrix "
76 "constructor usage")
78 else:
79 # must be dense
80 try:
81 arg1 = np.asarray(arg1)
82 except Exception as e:
83 msg = f"unrecognized {self.format}_matrix constructor usage"
84 raise ValueError(msg) from e
85 self._set_self(self.__class__(
86 self._coo_container(arg1, dtype=dtype)
87 ))
89 # Read matrix dimensions given, if any
90 if shape is not None:
91 self._shape = check_shape(shape)
92 else:
93 if self.shape is None:
94 # shape not already set, try to infer dimensions
95 try:
96 major_dim = len(self.indptr) - 1
97 minor_dim = self.indices.max() + 1
98 except Exception as e:
99 raise ValueError('unable to infer matrix dimensions') from e
100 else:
101 self._shape = check_shape(self._swap((major_dim,
102 minor_dim)))
104 if dtype is not None:
105 self.data = self.data.astype(dtype, copy=False)
107 self.check_format(full_check=False)
109 def _getnnz(self, axis=None):
110 if axis is None:
111 return int(self.indptr[-1])
112 else:
113 if axis < 0:
114 axis += 2
115 axis, _ = self._swap((axis, 1 - axis))
116 _, N = self._swap(self.shape)
117 if axis == 0:
118 return np.bincount(downcast_intp_index(self.indices),
119 minlength=N)
120 elif axis == 1:
121 return np.diff(self.indptr)
122 raise ValueError('axis out of bounds')
124 _getnnz.__doc__ = _spbase._getnnz.__doc__
126 def _set_self(self, other, copy=False):
127 """take the member variables of other and assign them to self"""
129 if copy:
130 other = other.copy()
132 self.data = other.data
133 self.indices = other.indices
134 self.indptr = other.indptr
135 self._shape = check_shape(other.shape)
137 def check_format(self, full_check=True):
138 """Check whether the array/matrix respects the CSR or CSC format.
140 Parameters
141 ----------
142 full_check : bool, optional
143 If `True`, run rigorous check, scanning arrays for valid values.
144 Note that activating those check might copy arrays for casting,
145 modifying indices and index pointers' inplace.
146 If `False`, run basic checks on attributes. O(1) operations.
147 Default is `True`.
148 """
149 # use _swap to determine proper bounds
150 major_name, minor_name = self._swap(('row', 'column'))
151 major_dim, minor_dim = self._swap(self.shape)
153 # index arrays should have integer data types
154 if self.indptr.dtype.kind != 'i':
155 warn(f"indptr array has non-integer dtype ({self.indptr.dtype.name})",
156 stacklevel=3)
157 if self.indices.dtype.kind != 'i':
158 warn(f"indices array has non-integer dtype ({self.indices.dtype.name})",
159 stacklevel=3)
161 # check array shapes
162 for x in [self.data.ndim, self.indices.ndim, self.indptr.ndim]:
163 if x != 1:
164 raise ValueError('data, indices, and indptr should be 1-D')
166 # check index pointer
167 if (len(self.indptr) != major_dim + 1):
168 raise ValueError("index pointer size ({}) should be ({})"
169 "".format(len(self.indptr), major_dim + 1))
170 if (self.indptr[0] != 0):
171 raise ValueError("index pointer should start with 0")
173 # check index and data arrays
174 if (len(self.indices) != len(self.data)):
175 raise ValueError("indices and data should have the same size")
176 if (self.indptr[-1] > len(self.indices)):
177 raise ValueError("Last value of index pointer should be less than "
178 "the size of index and data arrays")
180 self.prune()
182 if full_check:
183 # check format validity (more expensive)
184 if self.nnz > 0:
185 if self.indices.max() >= minor_dim:
186 raise ValueError(f"{minor_name} index values must be < {minor_dim}")
187 if self.indices.min() < 0:
188 raise ValueError(f"{minor_name} index values must be >= 0")
189 if np.diff(self.indptr).min() < 0:
190 raise ValueError("index pointer values must form a "
191 "non-decreasing sequence")
193 idx_dtype = self._get_index_dtype((self.indptr, self.indices))
194 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
195 self.indices = np.asarray(self.indices, dtype=idx_dtype)
196 self.data = to_native(self.data)
198 # if not self.has_sorted_indices():
199 # warn('Indices were not in sorted order. Sorting indices.')
200 # self.sort_indices()
201 # assert(self.has_sorted_indices())
202 # TODO check for duplicates?
204 #######################
205 # Boolean comparisons #
206 #######################
208 def _scalar_binopt(self, other, op):
209 """Scalar version of self._binopt, for cases in which no new nonzeros
210 are added. Produces a new sparse array in canonical form.
211 """
212 self.sum_duplicates()
213 res = self._with_data(op(self.data, other), copy=True)
214 res.eliminate_zeros()
215 return res
217 def __eq__(self, other):
218 # Scalar other.
219 if isscalarlike(other):
220 if np.isnan(other):
221 return self.__class__(self.shape, dtype=np.bool_)
223 if other == 0:
224 warn("Comparing a sparse matrix with 0 using == is inefficient"
225 ", try using != instead.", SparseEfficiencyWarning,
226 stacklevel=3)
227 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
228 inv = self._scalar_binopt(other, operator.ne)
229 return all_true - inv
230 else:
231 return self._scalar_binopt(other, operator.eq)
232 # Dense other.
233 elif isdense(other):
234 return self.todense() == other
235 # Pydata sparse other.
236 elif is_pydata_spmatrix(other):
237 return NotImplemented
238 # Sparse other.
239 elif issparse(other):
240 warn("Comparing sparse matrices using == is inefficient, try using"
241 " != instead.", SparseEfficiencyWarning, stacklevel=3)
242 # TODO sparse broadcasting
243 if self.shape != other.shape:
244 return False
245 elif self.format != other.format:
246 other = other.asformat(self.format)
247 res = self._binopt(other, '_ne_')
248 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
249 return all_true - res
250 else:
251 return NotImplemented
253 def __ne__(self, other):
254 # Scalar other.
255 if isscalarlike(other):
256 if np.isnan(other):
257 warn("Comparing a sparse matrix with nan using != is"
258 " inefficient", SparseEfficiencyWarning, stacklevel=3)
259 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
260 return all_true
261 elif other != 0:
262 warn("Comparing a sparse matrix with a nonzero scalar using !="
263 " is inefficient, try using == instead.",
264 SparseEfficiencyWarning, stacklevel=3)
265 all_true = self.__class__(np.ones(self.shape), dtype=np.bool_)
266 inv = self._scalar_binopt(other, operator.eq)
267 return all_true - inv
268 else:
269 return self._scalar_binopt(other, operator.ne)
270 # Dense other.
271 elif isdense(other):
272 return self.todense() != other
273 # Pydata sparse other.
274 elif is_pydata_spmatrix(other):
275 return NotImplemented
276 # Sparse other.
277 elif issparse(other):
278 # TODO sparse broadcasting
279 if self.shape != other.shape:
280 return True
281 elif self.format != other.format:
282 other = other.asformat(self.format)
283 return self._binopt(other, '_ne_')
284 else:
285 return NotImplemented
287 def _inequality(self, other, op, op_name, bad_scalar_msg):
288 # Scalar other.
289 if isscalarlike(other):
290 if 0 == other and op_name in ('_le_', '_ge_'):
291 raise NotImplementedError(" >= and <= don't work with 0.")
292 elif op(0, other):
293 warn(bad_scalar_msg, SparseEfficiencyWarning, stacklevel=3)
294 other_arr = np.empty(self.shape, dtype=np.result_type(other))
295 other_arr.fill(other)
296 other_arr = self.__class__(other_arr)
297 return self._binopt(other_arr, op_name)
298 else:
299 return self._scalar_binopt(other, op)
300 # Dense other.
301 elif isdense(other):
302 return op(self.todense(), other)
303 # Sparse other.
304 elif issparse(other):
305 # TODO sparse broadcasting
306 if self.shape != other.shape:
307 raise ValueError("inconsistent shapes")
308 elif self.format != other.format:
309 other = other.asformat(self.format)
310 if op_name not in ('_ge_', '_le_'):
311 return self._binopt(other, op_name)
313 warn("Comparing sparse matrices using >= and <= is inefficient, "
314 "using <, >, or !=, instead.",
315 SparseEfficiencyWarning, stacklevel=3)
316 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
317 res = self._binopt(other, '_gt_' if op_name == '_le_' else '_lt_')
318 return all_true - res
319 else:
320 return NotImplemented
322 def __lt__(self, other):
323 return self._inequality(other, operator.lt, '_lt_',
324 "Comparing a sparse matrix with a scalar "
325 "greater than zero using < is inefficient, "
326 "try using >= instead.")
328 def __gt__(self, other):
329 return self._inequality(other, operator.gt, '_gt_',
330 "Comparing a sparse matrix with a scalar "
331 "less than zero using > is inefficient, "
332 "try using <= instead.")
334 def __le__(self, other):
335 return self._inequality(other, operator.le, '_le_',
336 "Comparing a sparse matrix with a scalar "
337 "greater than zero using <= is inefficient, "
338 "try using > instead.")
340 def __ge__(self, other):
341 return self._inequality(other, operator.ge, '_ge_',
342 "Comparing a sparse matrix with a scalar "
343 "less than zero using >= is inefficient, "
344 "try using < instead.")
346 #################################
347 # Arithmetic operator overrides #
348 #################################
350 def _add_dense(self, other):
351 if other.shape != self.shape:
352 raise ValueError(f'Incompatible shapes ({self.shape} and {other.shape})')
353 dtype = upcast_char(self.dtype.char, other.dtype.char)
354 order = self._swap('CF')[0]
355 result = np.array(other, dtype=dtype, order=order, copy=True)
356 M, N = self._swap(self.shape)
357 y = result if result.flags.c_contiguous else result.T
358 csr_todense(M, N, self.indptr, self.indices, self.data, y)
359 return self._container(result, copy=False)
361 def _add_sparse(self, other):
362 return self._binopt(other, '_plus_')
364 def _sub_sparse(self, other):
365 return self._binopt(other, '_minus_')
367 def multiply(self, other):
368 """Point-wise multiplication by another array/matrix, vector, or
369 scalar.
370 """
371 # Scalar multiplication.
372 if isscalarlike(other):
373 return self._mul_scalar(other)
374 # Sparse matrix or vector.
375 if issparse(other):
376 if self.shape == other.shape:
377 other = self.__class__(other)
378 return self._binopt(other, '_elmul_')
379 if other.ndim == 1:
380 raise TypeError("broadcast from a 1d array not yet supported")
381 # Single element.
382 elif other.shape == (1, 1):
383 return self._mul_scalar(other.toarray()[0, 0])
384 elif self.shape == (1, 1):
385 return other._mul_scalar(self.toarray()[0, 0])
386 # A row times a column.
387 elif self.shape[1] == 1 and other.shape[0] == 1:
388 return self._matmul_sparse(other.tocsc())
389 elif self.shape[0] == 1 and other.shape[1] == 1:
390 return other._matmul_sparse(self.tocsc())
391 # Row vector times matrix. other is a row.
392 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
393 other = self._dia_container(
394 (other.toarray().ravel(), [0]),
395 shape=(other.shape[1], other.shape[1])
396 )
397 return self._matmul_sparse(other)
398 # self is a row.
399 elif self.shape[0] == 1 and self.shape[1] == other.shape[1]:
400 copy = self._dia_container(
401 (self.toarray().ravel(), [0]),
402 shape=(self.shape[1], self.shape[1])
403 )
404 return other._matmul_sparse(copy)
405 # Column vector times matrix. other is a column.
406 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
407 other = self._dia_container(
408 (other.toarray().ravel(), [0]),
409 shape=(other.shape[0], other.shape[0])
410 )
411 return other._matmul_sparse(self)
412 # self is a column.
413 elif self.shape[1] == 1 and self.shape[0] == other.shape[0]:
414 copy = self._dia_container(
415 (self.toarray().ravel(), [0]),
416 shape=(self.shape[0], self.shape[0])
417 )
418 return copy._matmul_sparse(other)
419 else:
420 raise ValueError("inconsistent shapes")
422 # Assume other is a dense matrix/array, which produces a single-item
423 # object array if other isn't convertible to ndarray.
424 other = np.atleast_2d(other)
426 if other.ndim != 2:
427 return np.multiply(self.toarray(), other)
428 # Single element / wrapped object.
429 if other.size == 1:
430 if other.dtype == np.object_:
431 # 'other' not convertible to ndarray.
432 return NotImplemented
433 return self._mul_scalar(other.flat[0])
434 # Fast case for trivial sparse matrix.
435 elif self.shape == (1, 1):
436 return np.multiply(self.toarray()[0, 0], other)
438 ret = self.tocoo()
439 # Matching shapes.
440 if self.shape == other.shape:
441 data = np.multiply(ret.data, other[ret.row, ret.col])
442 # Sparse row vector times...
443 elif self.shape[0] == 1:
444 if other.shape[1] == 1: # Dense column vector.
445 data = np.multiply(ret.data, other)
446 elif other.shape[1] == self.shape[1]: # Dense matrix.
447 data = np.multiply(ret.data, other[:, ret.col])
448 else:
449 raise ValueError("inconsistent shapes")
450 row = np.repeat(np.arange(other.shape[0]), len(ret.row))
451 col = np.tile(ret.col, other.shape[0])
452 return self._coo_container(
453 (data.view(np.ndarray).ravel(), (row, col)),
454 shape=(other.shape[0], self.shape[1]),
455 copy=False
456 )
457 # Sparse column vector times...
458 elif self.shape[1] == 1:
459 if other.shape[0] == 1: # Dense row vector.
460 data = np.multiply(ret.data[:, None], other)
461 elif other.shape[0] == self.shape[0]: # Dense matrix.
462 data = np.multiply(ret.data[:, None], other[ret.row])
463 else:
464 raise ValueError("inconsistent shapes")
465 row = np.repeat(ret.row, other.shape[1])
466 col = np.tile(np.arange(other.shape[1]), len(ret.col))
467 return self._coo_container(
468 (data.view(np.ndarray).ravel(), (row, col)),
469 shape=(self.shape[0], other.shape[1]),
470 copy=False
471 )
472 # Sparse matrix times dense row vector.
473 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
474 data = np.multiply(ret.data, other[:, ret.col].ravel())
475 # Sparse matrix times dense column vector.
476 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
477 data = np.multiply(ret.data, other[ret.row].ravel())
478 else:
479 raise ValueError("inconsistent shapes")
480 ret.data = data.view(np.ndarray).ravel()
481 return ret
483 ###########################
484 # Multiplication handlers #
485 ###########################
487 def _matmul_vector(self, other):
488 M, N = self.shape
490 # output array
491 result = np.zeros(M, dtype=upcast_char(self.dtype.char,
492 other.dtype.char))
494 # csr_matvec or csc_matvec
495 fn = getattr(_sparsetools, self.format + '_matvec')
496 fn(M, N, self.indptr, self.indices, self.data, other, result)
498 return result
500 def _matmul_multivector(self, other):
501 M, N = self.shape
502 n_vecs = other.shape[1] # number of column vectors
504 result = np.zeros((M, n_vecs),
505 dtype=upcast_char(self.dtype.char, other.dtype.char))
507 # csr_matvecs or csc_matvecs
508 fn = getattr(_sparsetools, self.format + '_matvecs')
509 fn(M, N, n_vecs, self.indptr, self.indices, self.data,
510 other.ravel(), result.ravel())
512 return result
514 def _matmul_sparse(self, other):
515 M, K1 = self.shape
516 K2, N = other.shape
518 major_axis = self._swap((M, N))[0]
519 other = self.__class__(other) # convert to this format
521 idx_dtype = self._get_index_dtype((self.indptr, self.indices,
522 other.indptr, other.indices))
524 fn = getattr(_sparsetools, self.format + '_matmat_maxnnz')
525 nnz = fn(M, N,
526 np.asarray(self.indptr, dtype=idx_dtype),
527 np.asarray(self.indices, dtype=idx_dtype),
528 np.asarray(other.indptr, dtype=idx_dtype),
529 np.asarray(other.indices, dtype=idx_dtype))
531 idx_dtype = self._get_index_dtype((self.indptr, self.indices,
532 other.indptr, other.indices),
533 maxval=nnz)
535 indptr = np.empty(major_axis + 1, dtype=idx_dtype)
536 indices = np.empty(nnz, dtype=idx_dtype)
537 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
539 fn = getattr(_sparsetools, self.format + '_matmat')
540 fn(M, N, np.asarray(self.indptr, dtype=idx_dtype),
541 np.asarray(self.indices, dtype=idx_dtype),
542 self.data,
543 np.asarray(other.indptr, dtype=idx_dtype),
544 np.asarray(other.indices, dtype=idx_dtype),
545 other.data,
546 indptr, indices, data)
548 return self.__class__((data, indices, indptr), shape=(M, N))
550 def diagonal(self, k=0):
551 rows, cols = self.shape
552 if k <= -rows or k >= cols:
553 return np.empty(0, dtype=self.data.dtype)
554 fn = getattr(_sparsetools, self.format + "_diagonal")
555 y = np.empty(min(rows + min(k, 0), cols - max(k, 0)),
556 dtype=upcast(self.dtype))
557 fn(k, self.shape[0], self.shape[1], self.indptr, self.indices,
558 self.data, y)
559 return y
561 diagonal.__doc__ = _spbase.diagonal.__doc__
563 #####################
564 # Other binary ops #
565 #####################
567 def _maximum_minimum(self, other, npop, op_name, dense_check):
568 if isscalarlike(other):
569 if dense_check(other):
570 warn("Taking maximum (minimum) with > 0 (< 0) number results"
571 " to a dense matrix.", SparseEfficiencyWarning,
572 stacklevel=3)
573 other_arr = np.empty(self.shape, dtype=np.asarray(other).dtype)
574 other_arr.fill(other)
575 other_arr = self.__class__(other_arr)
576 return self._binopt(other_arr, op_name)
577 else:
578 self.sum_duplicates()
579 new_data = npop(self.data, np.asarray(other))
580 mat = self.__class__((new_data, self.indices, self.indptr),
581 dtype=new_data.dtype, shape=self.shape)
582 return mat
583 elif isdense(other):
584 return npop(self.todense(), other)
585 elif issparse(other):
586 return self._binopt(other, op_name)
587 else:
588 raise ValueError("Operands not compatible.")
590 def maximum(self, other):
591 return self._maximum_minimum(other, np.maximum,
592 '_maximum_', lambda x: np.asarray(x) > 0)
594 maximum.__doc__ = _spbase.maximum.__doc__
596 def minimum(self, other):
597 return self._maximum_minimum(other, np.minimum,
598 '_minimum_', lambda x: np.asarray(x) < 0)
600 minimum.__doc__ = _spbase.minimum.__doc__
602 #####################
603 # Reduce operations #
604 #####################
606 def sum(self, axis=None, dtype=None, out=None):
607 """Sum the array/matrix over the given axis. If the axis is None, sum
608 over both rows and columns, returning a scalar.
609 """
610 # The _spbase base class already does axis=0 and axis=1 efficiently
611 # so we only do the case axis=None here
612 if (not hasattr(self, 'blocksize') and
613 axis in self._swap(((1, -1), (0, 2)))[0]):
614 # faster than multiplication for large minor axis in CSC/CSR
615 res_dtype = get_sum_dtype(self.dtype)
616 ret = np.zeros(len(self.indptr) - 1, dtype=res_dtype)
618 major_index, value = self._minor_reduce(np.add)
619 ret[major_index] = value
620 ret = self._ascontainer(ret)
621 if axis % 2 == 1:
622 ret = ret.T
624 if out is not None and out.shape != ret.shape:
625 raise ValueError('dimensions do not match')
627 return ret.sum(axis=(), dtype=dtype, out=out)
628 # _spbase will handle the remaining situations when axis
629 # is in {None, -1, 0, 1}
630 else:
631 return _spbase.sum(self, axis=axis, dtype=dtype, out=out)
633 sum.__doc__ = _spbase.sum.__doc__
635 def _minor_reduce(self, ufunc, data=None):
636 """Reduce nonzeros with a ufunc over the minor axis when non-empty
638 Can be applied to a function of self.data by supplying data parameter.
640 Warning: this does not call sum_duplicates()
642 Returns
643 -------
644 major_index : array of ints
645 Major indices where nonzero
647 value : array of self.dtype
648 Reduce result for nonzeros in each major_index
649 """
650 if data is None:
651 data = self.data
652 major_index = np.flatnonzero(np.diff(self.indptr))
653 value = ufunc.reduceat(data,
654 downcast_intp_index(self.indptr[major_index]))
655 return major_index, value
657 #######################
658 # Getting and Setting #
659 #######################
661 def _get_intXint(self, row, col):
662 M, N = self._swap(self.shape)
663 major, minor = self._swap((row, col))
664 indptr, indices, data = get_csr_submatrix(
665 M, N, self.indptr, self.indices, self.data,
666 major, major + 1, minor, minor + 1)
667 return data.sum(dtype=self.dtype)
669 def _get_sliceXslice(self, row, col):
670 major, minor = self._swap((row, col))
671 if major.step in (1, None) and minor.step in (1, None):
672 return self._get_submatrix(major, minor, copy=True)
673 return self._major_slice(major)._minor_slice(minor)
675 def _get_arrayXarray(self, row, col):
676 # inner indexing
677 idx_dtype = self.indices.dtype
678 M, N = self._swap(self.shape)
679 major, minor = self._swap((row, col))
680 major = np.asarray(major, dtype=idx_dtype)
681 minor = np.asarray(minor, dtype=idx_dtype)
683 val = np.empty(major.size, dtype=self.dtype)
684 csr_sample_values(M, N, self.indptr, self.indices, self.data,
685 major.size, major.ravel(), minor.ravel(), val)
686 if major.ndim == 1:
687 return self._ascontainer(val)
688 return self.__class__(val.reshape(major.shape))
690 def _get_columnXarray(self, row, col):
691 # outer indexing
692 major, minor = self._swap((row, col))
693 return self._major_index_fancy(major)._minor_index_fancy(minor)
695 def _major_index_fancy(self, idx):
696 """Index along the major axis where idx is an array of ints.
697 """
698 idx_dtype = self.indices.dtype
699 indices = np.asarray(idx, dtype=idx_dtype).ravel()
701 _, N = self._swap(self.shape)
702 M = len(indices)
703 new_shape = self._swap((M, N))
704 if M == 0:
705 return self.__class__(new_shape, dtype=self.dtype)
707 row_nnz = self.indptr[indices + 1] - self.indptr[indices]
708 idx_dtype = self.indices.dtype
709 res_indptr = np.zeros(M+1, dtype=idx_dtype)
710 np.cumsum(row_nnz, out=res_indptr[1:])
712 nnz = res_indptr[-1]
713 res_indices = np.empty(nnz, dtype=idx_dtype)
714 res_data = np.empty(nnz, dtype=self.dtype)
715 csr_row_index(M, indices, self.indptr, self.indices, self.data,
716 res_indices, res_data)
718 return self.__class__((res_data, res_indices, res_indptr),
719 shape=new_shape, copy=False)
721 def _major_slice(self, idx, copy=False):
722 """Index along the major axis where idx is a slice object.
723 """
724 if idx == slice(None):
725 return self.copy() if copy else self
727 M, N = self._swap(self.shape)
728 start, stop, step = idx.indices(M)
729 M = len(range(start, stop, step))
730 new_shape = self._swap((M, N))
731 if M == 0:
732 return self.__class__(new_shape, dtype=self.dtype)
734 # Work out what slices are needed for `row_nnz`
735 # start,stop can be -1, only if step is negative
736 start0, stop0 = start, stop
737 if stop == -1 and start >= 0:
738 stop0 = None
739 start1, stop1 = start + 1, stop + 1
741 row_nnz = self.indptr[start1:stop1:step] - \
742 self.indptr[start0:stop0:step]
743 idx_dtype = self.indices.dtype
744 res_indptr = np.zeros(M+1, dtype=idx_dtype)
745 np.cumsum(row_nnz, out=res_indptr[1:])
747 if step == 1:
748 all_idx = slice(self.indptr[start], self.indptr[stop])
749 res_indices = np.array(self.indices[all_idx], copy=copy)
750 res_data = np.array(self.data[all_idx], copy=copy)
751 else:
752 nnz = res_indptr[-1]
753 res_indices = np.empty(nnz, dtype=idx_dtype)
754 res_data = np.empty(nnz, dtype=self.dtype)
755 csr_row_slice(start, stop, step, self.indptr, self.indices,
756 self.data, res_indices, res_data)
758 return self.__class__((res_data, res_indices, res_indptr),
759 shape=new_shape, copy=False)
761 def _minor_index_fancy(self, idx):
762 """Index along the minor axis where idx is an array of ints.
763 """
764 idx_dtype = self.indices.dtype
765 idx = np.asarray(idx, dtype=idx_dtype).ravel()
767 M, N = self._swap(self.shape)
768 k = len(idx)
769 new_shape = self._swap((M, k))
770 if k == 0:
771 return self.__class__(new_shape, dtype=self.dtype)
773 # pass 1: count idx entries and compute new indptr
774 col_offsets = np.zeros(N, dtype=idx_dtype)
775 res_indptr = np.empty_like(self.indptr)
776 csr_column_index1(k, idx, M, N, self.indptr, self.indices,
777 col_offsets, res_indptr)
779 # pass 2: copy indices/data for selected idxs
780 col_order = np.argsort(idx).astype(idx_dtype, copy=False)
781 nnz = res_indptr[-1]
782 res_indices = np.empty(nnz, dtype=idx_dtype)
783 res_data = np.empty(nnz, dtype=self.dtype)
784 csr_column_index2(col_order, col_offsets, len(self.indices),
785 self.indices, self.data, res_indices, res_data)
786 return self.__class__((res_data, res_indices, res_indptr),
787 shape=new_shape, copy=False)
789 def _minor_slice(self, idx, copy=False):
790 """Index along the minor axis where idx is a slice object.
791 """
792 if idx == slice(None):
793 return self.copy() if copy else self
795 M, N = self._swap(self.shape)
796 start, stop, step = idx.indices(N)
797 N = len(range(start, stop, step))
798 if N == 0:
799 return self.__class__(self._swap((M, N)), dtype=self.dtype)
800 if step == 1:
801 return self._get_submatrix(minor=idx, copy=copy)
802 # TODO: don't fall back to fancy indexing here
803 return self._minor_index_fancy(np.arange(start, stop, step))
805 def _get_submatrix(self, major=None, minor=None, copy=False):
806 """Return a submatrix of this matrix.
808 major, minor: None, int, or slice with step 1
809 """
810 M, N = self._swap(self.shape)
811 i0, i1 = _process_slice(major, M)
812 j0, j1 = _process_slice(minor, N)
814 if i0 == 0 and j0 == 0 and i1 == M and j1 == N:
815 return self.copy() if copy else self
817 indptr, indices, data = get_csr_submatrix(
818 M, N, self.indptr, self.indices, self.data, i0, i1, j0, j1)
820 shape = self._swap((i1 - i0, j1 - j0))
821 return self.__class__((data, indices, indptr), shape=shape,
822 dtype=self.dtype, copy=False)
824 def _set_intXint(self, row, col, x):
825 i, j = self._swap((row, col))
826 self._set_many(i, j, x)
828 def _set_arrayXarray(self, row, col, x):
829 i, j = self._swap((row, col))
830 self._set_many(i, j, x)
832 def _set_arrayXarray_sparse(self, row, col, x):
833 # clear entries that will be overwritten
834 self._zero_many(*self._swap((row, col)))
836 M, N = row.shape # matches col.shape
837 broadcast_row = M != 1 and x.shape[0] == 1
838 broadcast_col = N != 1 and x.shape[1] == 1
839 r, c = x.row, x.col
841 x = np.asarray(x.data, dtype=self.dtype)
842 if x.size == 0:
843 return
845 if broadcast_row:
846 r = np.repeat(np.arange(M), len(r))
847 c = np.tile(c, M)
848 x = np.tile(x, M)
849 if broadcast_col:
850 r = np.repeat(r, N)
851 c = np.tile(np.arange(N), len(c))
852 x = np.repeat(x, N)
853 # only assign entries in the new sparsity structure
854 i, j = self._swap((row[r, c], col[r, c]))
855 self._set_many(i, j, x)
857 def _setdiag(self, values, k):
858 if 0 in self.shape:
859 return
861 M, N = self.shape
862 broadcast = (values.ndim == 0)
864 if k < 0:
865 if broadcast:
866 max_index = min(M + k, N)
867 else:
868 max_index = min(M + k, N, len(values))
869 i = np.arange(max_index, dtype=self.indices.dtype)
870 j = np.arange(max_index, dtype=self.indices.dtype)
871 i -= k
873 else:
874 if broadcast:
875 max_index = min(M, N - k)
876 else:
877 max_index = min(M, N - k, len(values))
878 i = np.arange(max_index, dtype=self.indices.dtype)
879 j = np.arange(max_index, dtype=self.indices.dtype)
880 j += k
882 if not broadcast:
883 values = values[:len(i)]
885 self[i, j] = values
887 def _prepare_indices(self, i, j):
888 M, N = self._swap(self.shape)
890 def check_bounds(indices, bound):
891 idx = indices.max()
892 if idx >= bound:
893 raise IndexError('index (%d) out of range (>= %d)' %
894 (idx, bound))
895 idx = indices.min()
896 if idx < -bound:
897 raise IndexError('index (%d) out of range (< -%d)' %
898 (idx, bound))
900 i = np.atleast_1d(np.asarray(i, dtype=self.indices.dtype)).ravel()
901 j = np.atleast_1d(np.asarray(j, dtype=self.indices.dtype)).ravel()
902 check_bounds(i, M)
903 check_bounds(j, N)
904 return i, j, M, N
906 def _set_many(self, i, j, x):
907 """Sets value at each (i, j) to x
909 Here (i,j) index major and minor respectively, and must not contain
910 duplicate entries.
911 """
912 i, j, M, N = self._prepare_indices(i, j)
913 x = np.atleast_1d(np.asarray(x, dtype=self.dtype)).ravel()
915 n_samples = x.size
916 offsets = np.empty(n_samples, dtype=self.indices.dtype)
917 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
918 i, j, offsets)
919 if ret == 1:
920 # rinse and repeat
921 self.sum_duplicates()
922 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
923 i, j, offsets)
925 if -1 not in offsets:
926 # only affects existing non-zero cells
927 self.data[offsets] = x
928 return
930 else:
931 warn("Changing the sparsity structure of a {}_matrix is expensive."
932 " lil_matrix is more efficient.".format(self.format),
933 SparseEfficiencyWarning, stacklevel=3)
934 # replace where possible
935 mask = offsets > -1
936 self.data[offsets[mask]] = x[mask]
937 # only insertions remain
938 mask = ~mask
939 i = i[mask]
940 i[i < 0] += M
941 j = j[mask]
942 j[j < 0] += N
943 self._insert_many(i, j, x[mask])
945 def _zero_many(self, i, j):
946 """Sets value at each (i, j) to zero, preserving sparsity structure.
948 Here (i,j) index major and minor respectively.
949 """
950 i, j, M, N = self._prepare_indices(i, j)
952 n_samples = len(i)
953 offsets = np.empty(n_samples, dtype=self.indices.dtype)
954 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
955 i, j, offsets)
956 if ret == 1:
957 # rinse and repeat
958 self.sum_duplicates()
959 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
960 i, j, offsets)
962 # only assign zeros to the existing sparsity structure
963 self.data[offsets[offsets > -1]] = 0
965 def _insert_many(self, i, j, x):
966 """Inserts new nonzero at each (i, j) with value x
968 Here (i,j) index major and minor respectively.
969 i, j and x must be non-empty, 1d arrays.
970 Inserts each major group (e.g. all entries per row) at a time.
971 Maintains has_sorted_indices property.
972 Modifies i, j, x in place.
973 """
974 order = np.argsort(i, kind='mergesort') # stable for duplicates
975 i = i.take(order, mode='clip')
976 j = j.take(order, mode='clip')
977 x = x.take(order, mode='clip')
979 do_sort = self.has_sorted_indices
981 # Update index data type
982 idx_dtype = self._get_index_dtype((self.indices, self.indptr),
983 maxval=(self.indptr[-1] + x.size))
984 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
985 self.indices = np.asarray(self.indices, dtype=idx_dtype)
986 i = np.asarray(i, dtype=idx_dtype)
987 j = np.asarray(j, dtype=idx_dtype)
989 # Collate old and new in chunks by major index
990 indices_parts = []
991 data_parts = []
992 ui, ui_indptr = np.unique(i, return_index=True)
993 ui_indptr = np.append(ui_indptr, len(j))
994 new_nnzs = np.diff(ui_indptr)
995 prev = 0
996 for c, (ii, js, je) in enumerate(zip(ui, ui_indptr, ui_indptr[1:])):
997 # old entries
998 start = self.indptr[prev]
999 stop = self.indptr[ii]
1000 indices_parts.append(self.indices[start:stop])
1001 data_parts.append(self.data[start:stop])
1003 # handle duplicate j: keep last setting
1004 uj, uj_indptr = np.unique(j[js:je][::-1], return_index=True)
1005 if len(uj) == je - js:
1006 indices_parts.append(j[js:je])
1007 data_parts.append(x[js:je])
1008 else:
1009 indices_parts.append(j[js:je][::-1][uj_indptr])
1010 data_parts.append(x[js:je][::-1][uj_indptr])
1011 new_nnzs[c] = len(uj)
1013 prev = ii
1015 # remaining old entries
1016 start = self.indptr[ii]
1017 indices_parts.append(self.indices[start:])
1018 data_parts.append(self.data[start:])
1020 # update attributes
1021 self.indices = np.concatenate(indices_parts)
1022 self.data = np.concatenate(data_parts)
1023 nnzs = np.empty(self.indptr.shape, dtype=idx_dtype)
1024 nnzs[0] = idx_dtype(0)
1025 indptr_diff = np.diff(self.indptr)
1026 indptr_diff[ui] += new_nnzs
1027 nnzs[1:] = indptr_diff
1028 self.indptr = np.cumsum(nnzs, out=nnzs)
1030 if do_sort:
1031 # TODO: only sort where necessary
1032 self.has_sorted_indices = False
1033 self.sort_indices()
1035 self.check_format(full_check=False)
1037 ######################
1038 # Conversion methods #
1039 ######################
1041 def tocoo(self, copy=True):
1042 major_dim, minor_dim = self._swap(self.shape)
1043 minor_indices = self.indices
1044 major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
1045 _sparsetools.expandptr(major_dim, self.indptr, major_indices)
1046 coords = self._swap((major_indices, minor_indices))
1048 return self._coo_container(
1049 (self.data, coords), self.shape, copy=copy, dtype=self.dtype
1050 )
1052 tocoo.__doc__ = _spbase.tocoo.__doc__
1054 def toarray(self, order=None, out=None):
1055 if out is None and order is None:
1056 order = self._swap('cf')[0]
1057 out = self._process_toarray_args(order, out)
1058 if not (out.flags.c_contiguous or out.flags.f_contiguous):
1059 raise ValueError('Output array must be C or F contiguous')
1060 # align ideal order with output array order
1061 if out.flags.c_contiguous:
1062 x = self.tocsr()
1063 y = out
1064 else:
1065 x = self.tocsc()
1066 y = out.T
1067 M, N = x._swap(x.shape)
1068 csr_todense(M, N, x.indptr, x.indices, x.data, y)
1069 return out
1071 toarray.__doc__ = _spbase.toarray.__doc__
1073 ##############################################################
1074 # methods that examine or modify the internal data structure #
1075 ##############################################################
1077 def eliminate_zeros(self):
1078 """Remove zero entries from the array/matrix
1080 This is an *in place* operation.
1081 """
1082 M, N = self._swap(self.shape)
1083 _sparsetools.csr_eliminate_zeros(M, N, self.indptr, self.indices,
1084 self.data)
1085 self.prune() # nnz may have changed
1087 @property
1088 def has_canonical_format(self) -> bool:
1089 """Whether the array/matrix has sorted indices and no duplicates
1091 Returns
1092 - True: if the above applies
1093 - False: otherwise
1095 has_canonical_format implies has_sorted_indices, so if the latter flag
1096 is False, so will the former be; if the former is found True, the
1097 latter flag is also set.
1098 """
1099 # first check to see if result was cached
1100 if not getattr(self, '_has_sorted_indices', True):
1101 # not sorted => not canonical
1102 self._has_canonical_format = False
1103 elif not hasattr(self, '_has_canonical_format'):
1104 self.has_canonical_format = bool(
1105 _sparsetools.csr_has_canonical_format(
1106 len(self.indptr) - 1, self.indptr, self.indices)
1107 )
1108 return self._has_canonical_format
1110 @has_canonical_format.setter
1111 def has_canonical_format(self, val: bool):
1112 self._has_canonical_format = bool(val)
1113 if val:
1114 self.has_sorted_indices = True
1116 def sum_duplicates(self):
1117 """Eliminate duplicate entries by adding them together
1119 This is an *in place* operation.
1120 """
1121 if self.has_canonical_format:
1122 return
1123 self.sort_indices()
1125 M, N = self._swap(self.shape)
1126 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices,
1127 self.data)
1129 self.prune() # nnz may have changed
1130 self.has_canonical_format = True
1132 @property
1133 def has_sorted_indices(self) -> bool:
1134 """Whether the indices are sorted
1136 Returns
1137 - True: if the indices of the array/matrix are in sorted order
1138 - False: otherwise
1139 """
1140 # first check to see if result was cached
1141 if not hasattr(self, '_has_sorted_indices'):
1142 self._has_sorted_indices = bool(
1143 _sparsetools.csr_has_sorted_indices(
1144 len(self.indptr) - 1, self.indptr, self.indices)
1145 )
1146 return self._has_sorted_indices
1148 @has_sorted_indices.setter
1149 def has_sorted_indices(self, val: bool):
1150 self._has_sorted_indices = bool(val)
1153 def sorted_indices(self):
1154 """Return a copy of this array/matrix with sorted indices
1155 """
1156 A = self.copy()
1157 A.sort_indices()
1158 return A
1160 # an alternative that has linear complexity is the following
1161 # although the previous option is typically faster
1162 # return self.toother().toother()
1164 def sort_indices(self):
1165 """Sort the indices of this array/matrix *in place*
1166 """
1168 if not self.has_sorted_indices:
1169 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr,
1170 self.indices, self.data)
1171 self.has_sorted_indices = True
1173 def prune(self):
1174 """Remove empty space after all non-zero elements.
1175 """
1176 major_dim = self._swap(self.shape)[0]
1178 if len(self.indptr) != major_dim + 1:
1179 raise ValueError('index pointer has invalid length')
1180 if len(self.indices) < self.nnz:
1181 raise ValueError('indices array has fewer than nnz elements')
1182 if len(self.data) < self.nnz:
1183 raise ValueError('data array has fewer than nnz elements')
1185 self.indices = _prune_array(self.indices[:self.nnz])
1186 self.data = _prune_array(self.data[:self.nnz])
1188 def resize(self, *shape):
1189 shape = check_shape(shape)
1190 if hasattr(self, 'blocksize'):
1191 bm, bn = self.blocksize
1192 new_M, rm = divmod(shape[0], bm)
1193 new_N, rn = divmod(shape[1], bn)
1194 if rm or rn:
1195 raise ValueError("shape must be divisible into {} blocks. "
1196 "Got {}".format(self.blocksize, shape))
1197 M, N = self.shape[0] // bm, self.shape[1] // bn
1198 else:
1199 new_M, new_N = self._swap(shape)
1200 M, N = self._swap(self.shape)
1202 if new_M < M:
1203 self.indices = self.indices[:self.indptr[new_M]]
1204 self.data = self.data[:self.indptr[new_M]]
1205 self.indptr = self.indptr[:new_M + 1]
1206 elif new_M > M:
1207 self.indptr = np.resize(self.indptr, new_M + 1)
1208 self.indptr[M + 1:].fill(self.indptr[M])
1210 if new_N < N:
1211 mask = self.indices < new_N
1212 if not np.all(mask):
1213 self.indices = self.indices[mask]
1214 self.data = self.data[mask]
1215 major_index, val = self._minor_reduce(np.add, mask)
1216 self.indptr.fill(0)
1217 self.indptr[1:][major_index] = val
1218 np.cumsum(self.indptr, out=self.indptr)
1220 self._shape = shape
1222 resize.__doc__ = _spbase.resize.__doc__
1224 ###################
1225 # utility methods #
1226 ###################
1228 # needed by _data_matrix
1229 def _with_data(self, data, copy=True):
1230 """Returns a matrix with the same sparsity structure as self,
1231 but with different data. By default the structure arrays
1232 (i.e. .indptr and .indices) are copied.
1233 """
1234 if copy:
1235 return self.__class__((data, self.indices.copy(),
1236 self.indptr.copy()),
1237 shape=self.shape,
1238 dtype=data.dtype)
1239 else:
1240 return self.__class__((data, self.indices, self.indptr),
1241 shape=self.shape, dtype=data.dtype)
1243 def _binopt(self, other, op):
1244 """apply the binary operation fn to two sparse matrices."""
1245 other = self.__class__(other)
1247 # e.g. csr_plus_csr, csr_minus_csr, etc.
1248 fn = getattr(_sparsetools, self.format + op + self.format)
1250 maxnnz = self.nnz + other.nnz
1251 idx_dtype = self._get_index_dtype((self.indptr, self.indices,
1252 other.indptr, other.indices),
1253 maxval=maxnnz)
1254 indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
1255 indices = np.empty(maxnnz, dtype=idx_dtype)
1257 bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
1258 if op in bool_ops:
1259 data = np.empty(maxnnz, dtype=np.bool_)
1260 else:
1261 data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype))
1263 fn(self.shape[0], self.shape[1],
1264 np.asarray(self.indptr, dtype=idx_dtype),
1265 np.asarray(self.indices, dtype=idx_dtype),
1266 self.data,
1267 np.asarray(other.indptr, dtype=idx_dtype),
1268 np.asarray(other.indices, dtype=idx_dtype),
1269 other.data,
1270 indptr, indices, data)
1272 A = self.__class__((data, indices, indptr), shape=self.shape)
1273 A.prune()
1275 return A
1277 def _divide_sparse(self, other):
1278 """
1279 Divide this matrix by a second sparse matrix.
1280 """
1281 if other.shape != self.shape:
1282 raise ValueError('inconsistent shapes')
1284 r = self._binopt(other, '_eldiv_')
1286 if np.issubdtype(r.dtype, np.inexact):
1287 # Eldiv leaves entries outside the combined sparsity
1288 # pattern empty, so they must be filled manually.
1289 # Everything outside of other's sparsity is NaN, and everything
1290 # inside it is either zero or defined by eldiv.
1291 out = np.empty(self.shape, dtype=self.dtype)
1292 out.fill(np.nan)
1293 row, col = other.nonzero()
1294 out[row, col] = 0
1295 r = r.tocoo()
1296 out[r.row, r.col] = r.data
1297 out = self._container(out)
1298 else:
1299 # integers types go with nan <-> 0
1300 out = r
1302 return out
1305def _process_slice(sl, num):
1306 if sl is None:
1307 i0, i1 = 0, num
1308 elif isinstance(sl, slice):
1309 i0, i1, stride = sl.indices(num)
1310 if stride != 1:
1311 raise ValueError('slicing with step != 1 not supported')
1312 i0 = min(i0, i1) # give an empty slice when i0 > i1
1313 elif isintlike(sl):
1314 if sl < 0:
1315 sl += num
1316 i0, i1 = sl, sl + 1
1317 if i0 < 0 or i1 > num:
1318 raise IndexError('index out of bounds: 0 <= %d < %d <= %d' %
1319 (i0, i1, num))
1320 else:
1321 raise TypeError('expected slice or scalar')
1323 return i0, i1