Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/scipy/sparse/_lil.py: 20%
290 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-12 06:31 +0000
1"""List of Lists sparse matrix class
2"""
4__docformat__ = "restructuredtext en"
6__all__ = ['lil_matrix', 'isspmatrix_lil']
8from bisect import bisect_left
10import numpy as np
12from ._base import spmatrix, isspmatrix
13from ._index import IndexMixin, INT_TYPES, _broadcast_arrays
14from ._sputils import (getdtype, isshape, isscalarlike, upcast_scalar,
15 get_index_dtype, check_shape, check_reshape_kwargs)
16from . import _csparsetools
19class lil_matrix(spmatrix, IndexMixin):
20 """Row-based LIst of Lists sparse matrix
22 This is a structure for constructing sparse matrices incrementally.
23 Note that inserting a single item can take linear time in the worst case;
24 to construct a matrix efficiently, make sure the items are pre-sorted by
25 index, per row.
27 This can be instantiated in several ways:
28 lil_matrix(D)
29 with a dense matrix or rank-2 ndarray D
31 lil_matrix(S)
32 with another sparse matrix S (equivalent to S.tolil())
34 lil_matrix((M, N), [dtype])
35 to construct an empty matrix with shape (M, N)
36 dtype is optional, defaulting to dtype='d'.
38 Attributes
39 ----------
40 dtype : dtype
41 Data type of the matrix
42 shape : 2-tuple
43 Shape of the matrix
44 ndim : int
45 Number of dimensions (this is always 2)
46 nnz
47 Number of stored values, including explicit zeros
48 data
49 LIL format data array of the matrix
50 rows
51 LIL format row index array of the matrix
53 Notes
54 -----
55 Sparse matrices can be used in arithmetic operations: they support
56 addition, subtraction, multiplication, division, and matrix power.
58 Advantages of the LIL format
59 - supports flexible slicing
60 - changes to the matrix sparsity structure are efficient
62 Disadvantages of the LIL format
63 - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
64 - slow column slicing (consider CSC)
65 - slow matrix vector products (consider CSR or CSC)
67 Intended Usage
68 - LIL is a convenient format for constructing sparse matrices
69 - once a matrix has been constructed, convert to CSR or
70 CSC format for fast arithmetic and matrix vector operations
71 - consider using the COO format when constructing large matrices
73 Data Structure
74 - An array (``self.rows``) of rows, each of which is a sorted
75 list of column indices of non-zero elements.
76 - The corresponding nonzero values are stored in similar
77 fashion in ``self.data``.
80 """
81 format = 'lil'
83 def __init__(self, arg1, shape=None, dtype=None, copy=False):
84 spmatrix.__init__(self)
85 self.dtype = getdtype(dtype, arg1, default=float)
87 # First get the shape
88 if isspmatrix(arg1):
89 if isspmatrix_lil(arg1) and copy:
90 A = arg1.copy()
91 else:
92 A = arg1.tolil()
94 if dtype is not None:
95 A = A.astype(dtype, copy=False)
97 self._shape = check_shape(A.shape)
98 self.dtype = A.dtype
99 self.rows = A.rows
100 self.data = A.data
101 elif isinstance(arg1,tuple):
102 if isshape(arg1):
103 if shape is not None:
104 raise ValueError('invalid use of shape parameter')
105 M, N = arg1
106 self._shape = check_shape((M, N))
107 self.rows = np.empty((M,), dtype=object)
108 self.data = np.empty((M,), dtype=object)
109 for i in range(M):
110 self.rows[i] = []
111 self.data[i] = []
112 else:
113 raise TypeError('unrecognized lil_matrix constructor usage')
114 else:
115 # assume A is dense
116 try:
117 A = self._ascontainer(arg1)
118 except TypeError as e:
119 raise TypeError('unsupported matrix type') from e
120 else:
121 A = self._csr_container(A, dtype=dtype).tolil()
123 self._shape = check_shape(A.shape)
124 self.dtype = A.dtype
125 self.rows = A.rows
126 self.data = A.data
128 def __iadd__(self,other):
129 self[:,:] = self + other
130 return self
132 def __isub__(self,other):
133 self[:,:] = self - other
134 return self
136 def __imul__(self,other):
137 if isscalarlike(other):
138 self[:,:] = self * other
139 return self
140 else:
141 return NotImplemented
143 def __itruediv__(self,other):
144 if isscalarlike(other):
145 self[:,:] = self / other
146 return self
147 else:
148 return NotImplemented
150 # Whenever the dimensions change, empty lists should be created for each
151 # row
153 def getnnz(self, axis=None):
154 if axis is None:
155 return sum([len(rowvals) for rowvals in self.data])
156 if axis < 0:
157 axis += 2
158 if axis == 0:
159 out = np.zeros(self.shape[1], dtype=np.intp)
160 for row in self.rows:
161 out[row] += 1
162 return out
163 elif axis == 1:
164 return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp)
165 else:
166 raise ValueError('axis out of bounds')
168 def count_nonzero(self):
169 return sum(np.count_nonzero(rowvals) for rowvals in self.data)
171 getnnz.__doc__ = spmatrix.getnnz.__doc__
172 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
174 def __str__(self):
175 val = ''
176 for i, row in enumerate(self.rows):
177 for pos, j in enumerate(row):
178 val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos]))
179 return val[:-1]
181 def getrowview(self, i):
182 """Returns a view of the 'i'th row (without copying).
183 """
184 new = self._lil_container((1, self.shape[1]), dtype=self.dtype)
185 new.rows[0] = self.rows[i]
186 new.data[0] = self.data[i]
187 return new
189 def getrow(self, i):
190 """Returns a copy of the 'i'th row.
191 """
192 M, N = self.shape
193 if i < 0:
194 i += M
195 if i < 0 or i >= M:
196 raise IndexError('row index out of bounds')
197 new = self._lil_container((1, N), dtype=self.dtype)
198 new.rows[0] = self.rows[i][:]
199 new.data[0] = self.data[i][:]
200 return new
202 def __getitem__(self, key):
203 # Fast path for simple (int, int) indexing.
204 if (isinstance(key, tuple) and len(key) == 2 and
205 isinstance(key[0], INT_TYPES) and
206 isinstance(key[1], INT_TYPES)):
207 # lil_get1 handles validation for us.
208 return self._get_intXint(*key)
209 # Everything else takes the normal path.
210 return IndexMixin.__getitem__(self, key)
212 def _asindices(self, idx, N):
213 # LIL routines handle bounds-checking for us, so don't do it here.
214 try:
215 x = np.asarray(idx)
216 except (ValueError, TypeError, MemoryError) as e:
217 raise IndexError('invalid index') from e
218 if x.ndim not in (1, 2):
219 raise IndexError('Index dimension must be <= 2')
220 return x
222 def _get_intXint(self, row, col):
223 v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows,
224 self.data, row, col)
225 return self.dtype.type(v)
227 def _get_sliceXint(self, row, col):
228 row = range(*row.indices(self.shape[0]))
229 return self._get_row_ranges(row, slice(col, col+1))
231 def _get_arrayXint(self, row, col):
232 row = row.squeeze()
233 return self._get_row_ranges(row, slice(col, col+1))
235 def _get_intXslice(self, row, col):
236 return self._get_row_ranges((row,), col)
238 def _get_sliceXslice(self, row, col):
239 row = range(*row.indices(self.shape[0]))
240 return self._get_row_ranges(row, col)
242 def _get_arrayXslice(self, row, col):
243 return self._get_row_ranges(row, col)
245 def _get_intXarray(self, row, col):
246 row = np.array(row, dtype=col.dtype, ndmin=1)
247 return self._get_columnXarray(row, col)
249 def _get_sliceXarray(self, row, col):
250 row = np.arange(*row.indices(self.shape[0]))
251 return self._get_columnXarray(row, col)
253 def _get_columnXarray(self, row, col):
254 # outer indexing
255 row, col = _broadcast_arrays(row[:,None], col)
256 return self._get_arrayXarray(row, col)
258 def _get_arrayXarray(self, row, col):
259 # inner indexing
260 i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col))
261 new = self._lil_container(i.shape, dtype=self.dtype)
262 _csparsetools.lil_fancy_get(self.shape[0], self.shape[1],
263 self.rows, self.data,
264 new.rows, new.data,
265 i, j)
266 return new
268 def _get_row_ranges(self, rows, col_slice):
269 """
270 Fast path for indexing in the case where column index is slice.
272 This gains performance improvement over brute force by more
273 efficient skipping of zeros, by accessing the elements
274 column-wise in order.
276 Parameters
277 ----------
278 rows : sequence or range
279 Rows indexed. If range, must be within valid bounds.
280 col_slice : slice
281 Columns indexed
283 """
284 j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
285 col_range = range(j_start, j_stop, j_stride)
286 nj = len(col_range)
287 new = self._lil_container((len(rows), nj), dtype=self.dtype)
289 _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
290 self.rows, self.data,
291 new.rows, new.data,
292 rows,
293 j_start, j_stop, j_stride, nj)
295 return new
297 def _set_intXint(self, row, col, x):
298 _csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows,
299 self.data, row, col, x)
301 def _set_arrayXarray(self, row, col, x):
302 i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x))
303 _csparsetools.lil_fancy_set(self.shape[0], self.shape[1],
304 self.rows, self.data,
305 i, j, x)
307 def _set_arrayXarray_sparse(self, row, col, x):
308 # Special case: full matrix assignment
309 if (x.shape == self.shape and
310 isinstance(row, slice) and row == slice(None) and
311 isinstance(col, slice) and col == slice(None)):
312 x = self._lil_container(x, dtype=self.dtype)
313 self.rows = x.rows
314 self.data = x.data
315 return
316 # Fall back to densifying x
317 x = np.asarray(x.toarray(), dtype=self.dtype)
318 x, _ = _broadcast_arrays(x, row)
319 self._set_arrayXarray(row, col, x)
321 def __setitem__(self, key, x):
322 # Fast path for simple (int, int) indexing.
323 if (isinstance(key, tuple) and len(key) == 2 and
324 isinstance(key[0], INT_TYPES) and
325 isinstance(key[1], INT_TYPES)):
326 x = self.dtype.type(x)
327 if x.size > 1:
328 raise ValueError("Trying to assign a sequence to an item")
329 return self._set_intXint(key[0], key[1], x)
330 # Everything else takes the normal path.
331 IndexMixin.__setitem__(self, key, x)
333 def _mul_scalar(self, other):
334 if other == 0:
335 # Multiply by zero: return the zero matrix
336 new = self._lil_container(self.shape, dtype=self.dtype)
337 else:
338 res_dtype = upcast_scalar(self.dtype, other)
340 new = self.copy()
341 new = new.astype(res_dtype)
342 # Multiply this scalar by every element.
343 for j, rowvals in enumerate(new.data):
344 new.data[j] = [val*other for val in rowvals]
345 return new
347 def __truediv__(self, other): # self / other
348 if isscalarlike(other):
349 new = self.copy()
350 # Divide every element by this scalar
351 for j, rowvals in enumerate(new.data):
352 new.data[j] = [val/other for val in rowvals]
353 return new
354 else:
355 return self.tocsr() / other
357 def copy(self):
358 M, N = self.shape
359 new = self._lil_container(self.shape, dtype=self.dtype)
360 # This is ~14x faster than calling deepcopy() on rows and data.
361 _csparsetools.lil_get_row_ranges(M, N, self.rows, self.data,
362 new.rows, new.data, range(M),
363 0, N, 1, N)
364 return new
366 copy.__doc__ = spmatrix.copy.__doc__
368 def reshape(self, *args, **kwargs):
369 shape = check_shape(args, self.shape)
370 order, copy = check_reshape_kwargs(kwargs)
372 # Return early if reshape is not required
373 if shape == self.shape:
374 if copy:
375 return self.copy()
376 else:
377 return self
379 new = self._lil_container(shape, dtype=self.dtype)
381 if order == 'C':
382 ncols = self.shape[1]
383 for i, row in enumerate(self.rows):
384 for col, j in enumerate(row):
385 new_r, new_c = np.unravel_index(i * ncols + j, shape)
386 new[new_r, new_c] = self[i, j]
387 elif order == 'F':
388 nrows = self.shape[0]
389 for i, row in enumerate(self.rows):
390 for col, j in enumerate(row):
391 new_r, new_c = np.unravel_index(i + j * nrows, shape, order)
392 new[new_r, new_c] = self[i, j]
393 else:
394 raise ValueError("'order' must be 'C' or 'F'")
396 return new
398 reshape.__doc__ = spmatrix.reshape.__doc__
400 def resize(self, *shape):
401 shape = check_shape(shape)
402 new_M, new_N = shape
403 M, N = self.shape
405 if new_M < M:
406 self.rows = self.rows[:new_M]
407 self.data = self.data[:new_M]
408 elif new_M > M:
409 self.rows = np.resize(self.rows, new_M)
410 self.data = np.resize(self.data, new_M)
411 for i in range(M, new_M):
412 self.rows[i] = []
413 self.data[i] = []
415 if new_N < N:
416 for row, data in zip(self.rows, self.data):
417 trunc = bisect_left(row, new_N)
418 del row[trunc:]
419 del data[trunc:]
421 self._shape = shape
423 resize.__doc__ = spmatrix.resize.__doc__
425 def toarray(self, order=None, out=None):
426 d = self._process_toarray_args(order, out)
427 for i, row in enumerate(self.rows):
428 for pos, j in enumerate(row):
429 d[i, j] = self.data[i][pos]
430 return d
432 toarray.__doc__ = spmatrix.toarray.__doc__
434 def transpose(self, axes=None, copy=False):
435 return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False)
437 transpose.__doc__ = spmatrix.transpose.__doc__
439 def tolil(self, copy=False):
440 if copy:
441 return self.copy()
442 else:
443 return self
445 tolil.__doc__ = spmatrix.tolil.__doc__
447 def tocsr(self, copy=False):
448 M, N = self.shape
449 if M == 0 or N == 0:
450 return self._csr_container((M, N), dtype=self.dtype)
452 # construct indptr array
453 if M*N <= np.iinfo(np.int32).max:
454 # fast path: it is known that 64-bit indexing will not be needed.
455 idx_dtype = np.int32
456 indptr = np.empty(M + 1, dtype=idx_dtype)
457 indptr[0] = 0
458 _csparsetools.lil_get_lengths(self.rows, indptr[1:])
459 np.cumsum(indptr, out=indptr)
460 nnz = indptr[-1]
461 else:
462 idx_dtype = get_index_dtype(maxval=N)
463 lengths = np.empty(M, dtype=idx_dtype)
464 _csparsetools.lil_get_lengths(self.rows, lengths)
465 nnz = lengths.sum(dtype=np.int64)
466 idx_dtype = get_index_dtype(maxval=max(N, nnz))
467 indptr = np.empty(M + 1, dtype=idx_dtype)
468 indptr[0] = 0
469 np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:])
471 indices = np.empty(nnz, dtype=idx_dtype)
472 data = np.empty(nnz, dtype=self.dtype)
473 _csparsetools.lil_flatten_to_array(self.rows, indices)
474 _csparsetools.lil_flatten_to_array(self.data, data)
476 # init csr matrix
477 return self._csr_container((data, indices, indptr), shape=self.shape)
479 tocsr.__doc__ = spmatrix.tocsr.__doc__
482def _prepare_index_for_memoryview(i, j, x=None):
483 """
484 Convert index and data arrays to form suitable for passing to the
485 Cython fancy getset routines.
487 The conversions are necessary since to (i) ensure the integer
488 index arrays are in one of the accepted types, and (ii) to ensure
489 the arrays are writable so that Cython memoryview support doesn't
490 choke on them.
492 Parameters
493 ----------
494 i, j
495 Index arrays
496 x : optional
497 Data arrays
499 Returns
500 -------
501 i, j, x
502 Re-formatted arrays (x is omitted, if input was None)
504 """
505 if i.dtype > j.dtype:
506 j = j.astype(i.dtype)
507 elif i.dtype < j.dtype:
508 i = i.astype(j.dtype)
510 if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
511 i = i.astype(np.intp)
512 if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
513 j = j.astype(np.intp)
515 if x is not None:
516 if not x.flags.writeable:
517 x = x.copy()
518 return i, j, x
519 else:
520 return i, j
523def isspmatrix_lil(x):
524 """Is x of lil_matrix type?
526 Parameters
527 ----------
528 x
529 object to check for being a lil matrix
531 Returns
532 -------
533 bool
534 True if x is a lil matrix, False otherwise
536 Examples
537 --------
538 >>> from scipy.sparse import lil_matrix, isspmatrix_lil
539 >>> isspmatrix_lil(lil_matrix([[5]]))
540 True
542 >>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil
543 >>> isspmatrix_lil(csr_matrix([[5]]))
544 False
545 """
546 from ._arrays import lil_array
547 return isinstance(x, lil_matrix) or isinstance(x, lil_array)